Skip to content

Commit

Permalink
Added Hall of Fame functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
fshcat committed Nov 7, 2021
1 parent 23644e8 commit ff983d9
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 51 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__pycache__/
.idea/
.idea/
menagerie/
14 changes: 7 additions & 7 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import keras.models
import random

class Agent():

class Agent:

def __init__(self, board, model, player):
self.board = board;
self.model = model;
self.player = player;
self.board = board
self.model = model
self.player = player

def action(self, epsilon=0.01):
legal_moves = self.board.legal_moves()
Expand All @@ -19,17 +20,16 @@ def action(self, epsilon=0.01):
self.board.move(*legal_moves[random.randint(0, len(legal_moves) - 1)])
return

best_move = legal_moves[0]
best_move = legal_moves[-1]
max_evaluation = -1

for move in legal_moves:
self.board.move(*move)
evaluation = self.player * self.model(self.board.get_board())
if evaluation > max_evaluation:
best_move = move
max_evaluation = evaluation
self.board.undo_move(*move)

self.board.undo_move(*move)
self.board.move(*best_move)


23 changes: 23 additions & 0 deletions hof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import random
import tensorflow as tf
from math import floor
import os


class HOF:
def __init__(self, folder):
self.hof = []
self.folder = folder
if not os.path.isdir(folder):
os.makedirs(folder)

def store(self, model, name):
model.save("{}/{}".format(self.folder, name))
self.hof.append(name)

def sample_hof(self):
pop_size = len(self.hof)
ind = floor(pop_size*random.random())
name = self.hof[ind]
return tf.keras.models.load_model("{}/{}".format(self.folder, name))

8 changes: 4 additions & 4 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Future games should share a similar interface to make conversion of the AI
# to play different games as seamless as possible.
import numpy as np
import random


class Board:
def __init__(self, m, n, k, flatten=True, hist_length=-1):
Expand Down Expand Up @@ -34,7 +34,7 @@ def del_history(self):
if self.hist_length == -1 or len(self.board_history) < self.hist_length:
self.board_history.pop()
else:
for i in range(0,len(self.board_history)-1,-1):
for i in range(0,len(self.board_history)-1, -1):
self.board_history[i+1] = self.board_history[i]
self.board_history[0] = self.undo_buffer
self.undo_buffer = np.zeros((self.m, self.n), dtype=int)
Expand Down Expand Up @@ -79,9 +79,9 @@ def legal_moves(self):
# reshapes board into 1-dimensional array for feeding as input to model if flatten is True
def get_board(self):
if self.flatten:
return self.board.reshape(1, self.m * self.n)
return np.copy(self.board.reshape(1, self.m * self.n))
else:
return self.board
return np.copy(self.board.reshape(1, 3, 3, 1))

# converting numbers to their respective game values
@staticmethod
Expand Down
21 changes: 13 additions & 8 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import mnk
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from keras.layers import Dense, Conv2D, Flatten, Dropout
from tensorflow.keras.optimizers import Adadelta

learning_rate = 0.005
momentum = 0.8
sgd = SGD(lr=learning_rate, momentum=momentum, nesterov=False)
learning_rate = 1.0
rho = 0.995
epsilon = 1e-07
sgd = Adadelta(lr=learning_rate, rho=rho, epsilon=epsilon)

modelXO = Sequential()
modelXO.add(Dense(6, input_dim=9, kernel_initializer='normal', activation='relu'))
modelXO.add(Dense(6, input_dim=9, kernel_initializer='normal', activation='relu'))
modelXO.add(Conv2D(12, 3, padding="same", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Conv2D(9, 2, padding="valid", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Flatten())
modelXO.add(Dense(18, kernel_initializer='normal', activation='tanh'))
modelXO.add(Dense(1, kernel_initializer='normal', activation='tanh'))

modelXO.compile(loss='mean_squared_error', optimizer = sgd)
modelXO.compile(loss='mean_squared_error', optimizer=sgd)

18 changes: 13 additions & 5 deletions models/modelXO/keras_metadata.pb

Large diffs are not rendered by default.

Binary file modified models/modelXO/saved_model.pb
Binary file not shown.
Binary file modified models/modelXO/variables/variables.data-00000-of-00001
Binary file not shown.
Binary file modified models/modelXO/variables/variables.index
Binary file not shown.
7 changes: 2 additions & 5 deletions play.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@
import mnk
import tensorflow as tf

board = mnk.Board(3, 3, 3)
board = mnk.Board(3, 3, 3, flatten=False)
model = tf.keras.models.load_model('models/modelXO')

print("\n\n" + str(board))
current_player = input("\nWho plays first (Me/AI)? ")

ai_side = [-1, 1][current_player == "AI"]
agent = Agent(board, model, ai_side)

while board.who_won() == 2:

if current_player == 'Me':
played = False
while not played:
Expand All @@ -25,12 +23,11 @@
played = True
except:
print("Invalid move! Try again")

current_player = "AI"

else:
agent.action(0)
current_player = "Me"

print(board)

if board.who_won() == 0:
Expand Down
28 changes: 28 additions & 0 deletions plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from matplotlib import pyplot


def plot_wins(win_states, num, labels=['X', 'O']):
xs = []
os = []
ties = []

values = [0, 0, 0]

for i, w in enumerate(win_states):
if i < num:
values[w] += 1
else:
xs.append(values[1])
os.append(values[-1])
ties.append(values[0])

values[w] += 1
values[win_states[i-num]] -= 1

game = range(num, len(xs)+num)
pyplot.plot(game, xs, label="{} wins".format(labels[0]))
pyplot.plot(game, os, label="{} wins".format(labels[1]))
pyplot.plot(game, ties, label="Ties")
pyplot.legend()
pyplot.title("Number of Each End State for Previous {} Games".format(num))
pyplot.show()
72 changes: 51 additions & 21 deletions train.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,77 @@
# TODO: PLOT LOSS CURVES
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import numpy as np
import mnk
import random
from agent import Agent
from model import modelXO
from plot import plot_wins
from hof import HOF


games = 1010
m, n, k = 3, 3, 3
epsilon = 1
numEpochs = 1
batchSize = 1
verbose = 0
hof = HOF("menagerie")
hof.store(modelXO, "init")
modelHOF = hof.sample_hof()

hof_freq = 10 # how often to save the model to the HOF
hof_duration = 2 # how long to keep using the same HOF model before loading a new one

games = 1000
epsilon = 0.1 # exploration constant
decay_freq = 10 # how often to decrease epsilon
decay_factor = 0.00099 # how much to decrease by

end_states = []
victories = []
stored_games = []

for game in range(games):
board = mnk.Board(m, n, k, hist_length=2)
board = mnk.Board(m, n, k, flatten=False, hist_length=-1)

# decrease exploration over time
if game % decay_freq == 0 and game != 0:
epsilon -= decay_factor

agentX = Agent(board, modelXO, 1)
agentO = Agent(board, modelXO, -1)
# initialize the agents
if game % hof_duration == 0 and game != 0:
modelHOF = hof.sample_hof()
sideT = [-1, 1][random.random() > 0.5]
sideHOF = [None, -1, 1][sideT]
agentT = Agent(board, modelXO, sideT)
agentHOF = Agent(board, modelHOF, sideHOF)

move = 1
while not board.player_has_lost() and len(board.legal_moves()) != 0:
# have the appropriate agent select a move
if board.player == sideHOF:
agentHOF.action(epsilon)
else:
agentT.action(epsilon)

# back up the current board evaluation to the last action chosen by the current agent
if move > 2:
evaluation = modelXO(board.get_board())
modelXO.fit(board.history()[-2], evaluation, epochs=numEpochs, batch_size=batchSize, verbose=0)

if board.player == 1:
agentX.action(epsilon)
else:
agentO.action(epsilon)
modelXO.fit(board.history()[-3], evaluation, batch_size=1, verbose=0)
move += 1

if game % 50 == 0:
print(board)
move += 1

# back up the terminal state value to the last actions chosen by either agent
terminal_eval = tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
modelXO.fit(board.history()[-2], terminal_eval, epochs=numEpochs, batch_size=batchSize, verbose=0)
modelXO.fit(board.history()[-1], terminal_eval, epochs=numEpochs, batch_size=batchSize, verbose=0)
modelXO.fit(board.history()[-3], terminal_eval, batch_size=1, verbose=0)
modelXO.fit(board.history()[-2], terminal_eval, batch_size=1, verbose=0)

if game % 300 == 0:
epsilon /= 10
# occasionally save new model to hall of fame
if game % hof_freq == 0 and game != 0:
hof.store(modelXO, game)

end_states.append(board.who_won())
victories.append(board.who_won()*sideT)
if game % 10 == 0:
print("Game " + str(game) + " goes to " + ['Tie', 'X', 'O'][board.who_won()])
print("Game {} goes to {} ({})".format(str(game), ["tie", "best", "hof"][board.who_won()*sideT], ['Tie', 'X', 'O'][board.who_won()]))

plot_wins(end_states, 50)
plot_wins(victories, 50, ["Best", "HOF"])
modelXO.save('models/modelXO')

0 comments on commit ff983d9

Please sign in to comment.