Skip to content

Commit

Permalink
Added option to separate board into two planes, new hall of fame
Browse files Browse the repository at this point in the history
sampling option, bugs with action-taking fixed, added diagnostic games
to the loop
  • Loading branch information
fshcat committed Nov 10, 2021
1 parent ff983d9 commit dc9e97d
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 113 deletions.
64 changes: 52 additions & 12 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,75 @@
import mnk
import keras.models
import tensorflow as tf
import random


class Agent:

def __init__(self, board, model, player):
def __init__(self, board, model, player, training):
self.board = board
self.model = model
self.player = player
self.training = training

def action(self, epsilon=0.01):
def greedy_action(self):
legal_moves = self.board.legal_moves()
assert len(legal_moves) > 0, "No legal moves can be played."

# Exploration
if (random.random() < epsilon):
print("Played epsilon move ({:.5f})".format(epsilon))
self.board.move(*legal_moves[random.randint(0, len(legal_moves) - 1)])
return

best_move = legal_moves[-1]
max_evaluation = -1

for move in legal_moves:
self.board.move(*move)
evaluation = self.player * self.model(self.board.get_board())
if evaluation > max_evaluation:

val = self.value()
if val > max_evaluation:
best_move = move
max_evaluation = evaluation
max_evaluation = val

self.board.undo_move(*move)
self.board.move(*best_move)

return best_move

def random_action(self):
legal_moves = self.board.legal_moves()
return legal_moves[random.randint(0, len(legal_moves) - 1)]

def value(self):
if self.board.who_won() == self.player:
return tf.constant(1, dtype="float32", shape=(1, 1))
elif self.board.who_won() == -1*self.player:
return tf.constant(-1, dtype="float32", shape=(1, 1))
elif self.board.who_won() == 0:
return tf.constant(0, dtype="float32", shape=(1, 1))
else:
return self.player*self.model(self.board.get_board())

def action(self, epsilon=0):
legal_moves = self.board.legal_moves()
assert len(legal_moves) > 0, "No legal moves can be played."

greedy = self.greedy_action()
if self.training and len(self.board.history()) >= (2 + (self.player == -1)):
self.update_model(greedy)

# Exploration
if random.random() < epsilon:
print("Played epsilon move ({:.5f})".format(epsilon))
move = self.random_action()
else:
move = greedy

self.board.move(*move)

def update_model(self, greedy_move=()):
if greedy_move == ():
assert self.board.who_won() != 2 and self.board.who_won() != self.player
self.model.fit(self.board.history()[-2], self.value(), batch_size=1, verbose=0)
else:
self.board.move(*greedy_move)
self.model.fit(self.board.history()[-3], self.value(), batch_size=1, verbose=0)
self.board.undo_move(*greedy_move)



32 changes: 28 additions & 4 deletions hof.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,47 @@
import random
import tensorflow as tf
from math import floor
from matplotlib import pyplot
import os
from math import floor


class HOF:
def __init__(self, folder):
self.hof = []
self.folder = folder
self.sample_history = []
self.pop_size = 0
self.basel = 0 # used in limit-uniform sampling
if not os.path.isdir(folder):
os.makedirs(folder)

def store(self, model, name):
model.save("{}/{}".format(self.folder, name))
self.hof.append(name)
self.pop_size += 1
self.basel += 1/self.pop_size**2

def sample_hof(self, method='uniform'):
if method == 'limit-uniform':
threshold = random.random()*self.basel

cum_prob = 0
ind = self.pop_size-1
for i in range(self.pop_size):
cum_prob += 1/(self.pop_size-i)**2
if cum_prob > threshold:
ind = i
break
elif method == 'uniform':
ind = floor(random.random()*self.pop_size)

self.sample_history.append(ind)

def sample_hof(self):
pop_size = len(self.hof)
ind = floor(pop_size*random.random())
name = self.hof[ind]
return tf.keras.models.load_model("{}/{}".format(self.folder, name))

def sample_hist(self, num=100):
pyplot.hist(self.sample_history, num)
pyplot.title("Sampling of Model Indices from HOF")
pyplot.show()

20 changes: 14 additions & 6 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@


class Board:
def __init__(self, m, n, k, flatten=True, hist_length=-1):
def __init__(self, m, n, k, form="flatten", hist_length=-1):
self.m = m
self.n = n
self.k = k
self.flatten = flatten
self.form = form
self.hist_length = hist_length
self.board = np.zeros((m, n), dtype=int)
self.empty = 0
Expand Down Expand Up @@ -39,7 +39,6 @@ def del_history(self):
self.board_history[0] = self.undo_buffer
self.undo_buffer = np.zeros((self.m, self.n), dtype=int)


def flip_players(self):
self.player, self.opponent = self.opponent, self.player

Expand Down Expand Up @@ -78,10 +77,19 @@ def legal_moves(self):

# reshapes board into 1-dimensional array for feeding as input to model if flatten is True
def get_board(self):
if self.flatten:
return np.copy(self.board.reshape(1, self.m * self.n))
else:
if self.form == "flatten":
return np.copy(self.board.reshape(1, 1, self.m * self.n))
elif self.form == "planar":
return np.copy(self.board.reshape(1, 3, 3, 1))
elif self.form == "multiplanar":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
if self.board[i][j] == 1:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, 3, 3, 2))

# converting numbers to their respective game values
@staticmethod
Expand Down
10 changes: 3 additions & 7 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@
from tensorflow.keras.optimizers import Adadelta

learning_rate = 1.0
rho = 0.995
rho = 0.7
epsilon = 1e-07
sgd = Adadelta(lr=learning_rate, rho=rho, epsilon=epsilon)
sgd = Adadelta(learning_rate=learning_rate, rho=rho, epsilon=epsilon)

modelXO = Sequential()
modelXO.add(Conv2D(12, 3, padding="same", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Conv2D(9, 2, padding="valid", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Flatten())
modelXO.add(Dense(27, input_shape=(1,9), kernel_initializer='normal', activation='tanh'))
modelXO.add(Dense(18, kernel_initializer='normal', activation='tanh'))
modelXO.add(Dense(1, kernel_initializer='normal', activation='tanh'))

Expand Down
Loading

0 comments on commit dc9e97d

Please sign in to comment.