Skip to content

Commit

Permalink
Added new data plot, added board input format
Browse files Browse the repository at this point in the history
  • Loading branch information
fshcat committed Nov 23, 2021
1 parent 9869cac commit 7f5ec92
Show file tree
Hide file tree
Showing 21 changed files with 132 additions and 41 deletions.
2 changes: 1 addition & 1 deletion agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def random_action(self, board):
legal_moves = board.legal_moves()
return legal_moves[random.randint(0, len(legal_moves) - 1)]

def action(self, board, training, epsilon=0):
def action(self, board, training=False, epsilon=0):
legal_moves = board.legal_moves()
assert len(legal_moves) > 0, "No legal moves can be played."

Expand Down
9 changes: 9 additions & 0 deletions good-models/modelXO/keras_metadata.pb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

�,root"_tf_keras_sequential*�,{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "conv2d_input"}}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 11, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 2}}, "shared_object_id": 12}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "float32", "conv2d_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "float32", "conv2d_input"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "conv2d_input"}, "shared_object_id": 0}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}, "shared_object_id": 4}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 7}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 10}]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "SGD", "config": {"name": "SGD", "learning_rate": 0.009999999776482582, "decay": 0.0, "momentum": 0.0, "nesterov": false}}}}2
root.layer_with_weights-0"_tf_keras_layer*�
{"name": "conv2d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "stateful": false, "must_restore_from_config": false, "class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 2}}, "shared_object_id": 12}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 3, 3, 2]}}2
� root.layer-1"_tf_keras_layer*�{"name": "flatten", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}, "shared_object_id": 4, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 1, "axes": {}}, "shared_object_id": 13}}2
�root.layer_with_weights-1"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 7, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 48}}, "shared_object_id": 14}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 48]}}2
�root.layer_with_weights-2"_tf_keras_layer*�{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 10, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 27}}, "shared_object_id": 15}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 27]}}2
�>root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 16}2
Binary file added good-models/modelXO/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file added good-models/modelXO/variables/variables.index
Binary file not shown.
36 changes: 35 additions & 1 deletion hof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import os
from math import floor
from model import Model
from agent import Agent
from mnk import Board


class HOF:
def __init__(self, folder):
def __init__(self, mnk, folder):
self.hof = []
self.folder = folder
self.mnk = mnk
self.sample_history = []
self.pop_size = 0
self.basel = 0 # Used in limit-uniform sampling
Expand Down Expand Up @@ -41,6 +44,8 @@ def sample(self, method='uniform'):
break
elif method == 'uniform':
ind = floor(random.random()*self.pop_size)
elif method == 'naive':
ind = self.pop_size-1

self.sample_history.append(ind)

Expand All @@ -53,3 +58,32 @@ def sample_histogram(self, num=100):
pyplot.title("Sampling of Model Indices from HOF")
pyplot.show()

# Displays a winrate matrix of the historical policies for the given player
def winrate_matrix(self, iterations):
matrix = []
for i in range (0, self.pop_size, iterations):
matrix.append([])
for j in range (0, self.pop_size, iterations):
model_i = Model("{}/{}".format(self.folder, self.hof[i]))
model_j = Model("{}/{}".format(self.folder, self.hof[j]))

value = self.run_game(Agent(model_i, 1), Agent(model_j, -1))
matrix[-1].append(value)
pyplot.imshow(matrix, cmap="bwr")

def run_game(self, agent1, agent2):
board = Board(*self.mnk, form="multiplanar-2", hist_length=-1)

while board.game_ongoing():
if board.player == agent1.player:
agent1.action(board)
else:
agent2.action(board)

return board.who_won()






10 changes: 10 additions & 0 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ def get_board(self):
elif self.board[i][j] == -1:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))
elif self.form == "multiplanar-2":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
if self.board[i][j] == self.player:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1*self.player:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))


def game_ongoing(self):
return not ( self.player_has_lost() or (self.num_legal_moves() == 0) )
Expand Down
48 changes: 36 additions & 12 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD

class Model:

Expand All @@ -13,11 +13,12 @@ def __init__(self, location=False):
self.model = self.retrieve(location)
return

opt = Adam(learning_rate=0.1, beta_1=0.9, beta_2=0.999)
opt = SGD(learning_rate=0.02, momentum=0.0)

self.model = Sequential()
self.model.add(Dense(27, input_shape=(1, 9), kernel_initializer='normal', activation='tanh'))
self.model.add(Dense(27, kernel_initializer='normal', activation='tanh'))
self.model.add(Conv2D(48, 3, activation='relu', input_shape=(3,3,2)))
self.model.add(Flatten())
self.model.add(Dense(27, kernel_initializer='normal', activation='relu', input_shape=(1,18)))
self.model.add(Dense(1, kernel_initializer='normal', activation='tanh'))

self.model.compile(loss='mean_squared_error', optimizer=opt)
Expand All @@ -33,28 +34,51 @@ def raw_value(self, board):
if board.who_won() != 2:
return tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
else:
return self.model(board.get_board())
return board.player*self.model(board.get_board())

def raw_action_value(self, board, move):
board.move(*move)
val = self.raw_value(board)
board.undo_move(*move)

return val

# Changes 1 to mean the supplied player is at advantage, -1 disadvantage
def state_value(self, board, player):
return player * self.raw_value(board)
def state_value(self, board):
if board.who_won() == 0:
return tf.constant(0, dtype="float32", shape=(1, 1))
elif board.who_won() == board.player:
return tf.constant(1, dtype="float32", shape=(1, 1))
elif board.who_won() == -1*board.player:
return tf.constant(-1, dtype="float32", shape=(1, 1))
else:
return self.model(board.get_board())

# Returns the value of taking a move from the given board state
def action_value(self, board, move):
player = board.player

board.move(*move)
val = self.state_value(board, player)
val = self.state_value(board)
board.undo_move(*move)

return val

def scheduler(self, epoch, lr):
if epoch < 5000:
return 0.02
elif epoch < 15000:
return 0.01
elif epoch < 25000:
return 0.002
else:
return 0.001

# Performs a temporal difference update of the model
def td_update(self, board, greedy_move=(), terminal=False):
callback = tf.keras.callbacks.LearningRateScheduler(self.scheduler)
if terminal:
assert board.who_won() != 2
assert greedy_move == ()
self.model.fit(board.history()[-2], self.raw_value(board), batch_size=1, verbose=0)
self.model.fit(board.history()[-2], self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
else:
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0)
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])

Loading

0 comments on commit 7f5ec92

Please sign in to comment.