Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/testing' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
PedroContipelli committed Nov 16, 2021
2 parents ef34b08 + 9869cac commit 4ebe47f
Show file tree
Hide file tree
Showing 15 changed files with 256 additions and 137 deletions.
46 changes: 29 additions & 17 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,47 @@
import mnk
import keras.models
import tensorflow as tf
import random


class Agent:

def __init__(self, board, model, player):
self.board = board
def __init__(self, model, player):
self.model = model
self.player = player

def action(self, epsilon=0.01):
legal_moves = self.board.legal_moves()
def greedy_action(self, board):
legal_moves = board.legal_moves()
assert len(legal_moves) > 0, "No legal moves can be played."

# Exploration
if (random.random() < epsilon):
print("Played epsilon move ({:.5f})".format(epsilon))
self.board.move(*legal_moves[random.randint(0, len(legal_moves) - 1)])
return

best_move = legal_moves[-1]
best_move = legal_moves[0]
max_evaluation = -1

for move in legal_moves:
self.board.move(*move)
evaluation = self.player * self.model(self.board.get_board())
if evaluation > max_evaluation:
val = self.model.action_value(board, move)
if val > max_evaluation:
best_move = move
max_evaluation = evaluation
max_evaluation = val

return best_move

def random_action(self, board):
legal_moves = board.legal_moves()
return legal_moves[random.randint(0, len(legal_moves) - 1)]

self.board.undo_move(*move)
self.board.move(*best_move)
def action(self, board, training, epsilon=0):
legal_moves = board.legal_moves()
assert len(legal_moves) > 0, "No legal moves can be played."

greedy_move = self.greedy_action(board)
if training and len(board.history()) >= (2 + (self.player == -1)):
self.model.td_update(board, greedy_move)

# Exploration
if random.random() < epsilon:
move = self.random_action(board)
else:
move = greedy_move

board.move(*move)

48 changes: 40 additions & 8 deletions hof.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,55 @@
import random
import tensorflow as tf
from math import floor
from matplotlib import pyplot
import os
from math import floor
from model import Model


class HOF:
def __init__(self, folder):
self.hof = []
self.folder = folder
self.sample_history = []
self.pop_size = 0
self.basel = 0 # Used in limit-uniform sampling
if not os.path.isdir(folder):
os.makedirs(folder)

def store(self, model, name):
model.save("{}/{}".format(self.folder, name))
self.hof.append(name)
def store(self, model):
model.save_to("{}/{}".format(self.folder, self.pop_size))
self.hof.append(self.pop_size)
self.pop_size += 1
self.basel += 1/self.pop_size**2

# Gating method decides whether to add the model to the hall of fame
def gate(self, model):
# Simple gating method, stores model after every training episode
self.store(model)

# Samples from the hall of fame with the provided method
def sample(self, method='uniform'):
if method == 'limit-uniform': # Performs poorly. Do not use.
threshold = random.random()*self.basel

cum_prob = 0
ind = self.pop_size-1
for i in range(self.pop_size):
cum_prob += 1/(self.pop_size-i)**2
if cum_prob > threshold:
ind = i
break
elif method == 'uniform':
ind = floor(random.random()*self.pop_size)

self.sample_history.append(ind)

def sample_hof(self):
pop_size = len(self.hof)
ind = floor(pop_size*random.random())
name = self.hof[ind]
return tf.keras.models.load_model("{}/{}".format(self.folder, name))
return Model("{}/{}".format(self.folder, name))

# Displays a histogram of the model iterations sampled from the hall of fame
def sample_histogram(self, num=100):
pyplot.hist(self.sample_history, num)
pyplot.title("Sampling of Model Indices from HOF")
pyplot.show()

40 changes: 27 additions & 13 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@


class Board:
def __init__(self, m, n, k, flatten=True, hist_length=-1):
def __init__(self, m, n, k, form="flatten", hist_length=-1):
self.m = m
self.n = n
self.k = k
self.flatten = flatten
self.form = form
self.hist_length = hist_length
self.board = np.zeros((m, n), dtype=int)
self.empty = 0
Expand Down Expand Up @@ -39,10 +39,12 @@ def del_history(self):
self.board_history[0] = self.undo_buffer
self.undo_buffer = np.zeros((self.m, self.n), dtype=int)


def flip_players(self):
self.player, self.opponent = self.opponent, self.player

def num_legal_moves(self):
return len(self.legal_moves())

def who_won(self):
if self.player_has_lost():
return 1 if self.player == -1 else -1
Expand All @@ -53,21 +55,21 @@ def who_won(self):
# draw
return 0

# does a move by changing the board and current player
# Does a move by changing the board and current player
def move(self, x, y):
assert 0 <= x < self.m and 0 <= y < self.n, "Illegal move - Out of bounds"
assert self.board[x][y] == self.empty, "Illegal move - Spot already taken"
self.board[x][y] = self.player
self.add_history()
self.flip_players()

# undoes everything done in the move method
# Undoes everything done in the move method
def undo_move(self, x, y):
self.board[x][y] = self.empty
self.del_history()
self.flip_players()

# generates and returns a list of all legal moves
# Generates and returns a list of all legal moves
def legal_moves(self):
moves = []
for x, column in enumerate(self.board):
Expand All @@ -79,19 +81,31 @@ def legal_moves(self):
def num_legal_moves(self):
return len(self.legal_moves())

# reshapes board into 1-dimensional array for feeding as input to model if flatten is True
# Reshapes board into the form needed for the model
def get_board(self):
if self.flatten:
return np.copy(self.board.reshape(1, self.m * self.n))
else:
return np.copy(self.board.reshape(1, 3, 3, 1))
if self.form == "flatten":
return np.copy(self.board.reshape(1, 1, self.m * self.n))
elif self.form == "planar":
return np.copy(self.board.reshape(1, self.m, self.n, 1))
elif self.form == "multiplanar":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
if self.board[i][j] == 1:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))

def game_ongoing(self):
return not ( self.player_has_lost() or (self.num_legal_moves() == 0) )

# converting numbers to their respective game values
# Converting numbers to their respective game values
@staticmethod
def print_cast(move):
return 'O_X'[move + 1]

# allows for printing of the current board state
# Allows for printing of the current board state
def __str__(self):
string = ''
for i, row in enumerate(reversed(list(zip(*self.board)))):
Expand Down
77 changes: 58 additions & 19 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,61 @@
import mnk
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout
from tensorflow.keras.optimizers import Adadelta

learning_rate = 1.0
rho = 0.995
epsilon = 1e-07
sgd = Adadelta(lr=learning_rate, rho=rho, epsilon=epsilon)

modelXO = Sequential()
modelXO.add(Conv2D(12, 3, padding="same", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Conv2D(9, 2, padding="valid", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
modelXO.add(Dropout(0.1))
modelXO.add(Flatten())
modelXO.add(Dense(18, kernel_initializer='normal', activation='tanh'))
modelXO.add(Dense(1, kernel_initializer='normal', activation='tanh'))

modelXO.compile(loss='mean_squared_error', optimizer=sgd)
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import Adam

class Model:

def __init__(self, location=False):

# If a location is provided, retrieve the model stored at that location
if location != False:
self.model = self.retrieve(location)
return

opt = Adam(learning_rate=0.1, beta_1=0.9, beta_2=0.999)

self.model = Sequential()
self.model.add(Dense(27, input_shape=(1, 9), kernel_initializer='normal', activation='tanh'))
self.model.add(Dense(27, kernel_initializer='normal', activation='tanh'))
self.model.add(Dense(1, kernel_initializer='normal', activation='tanh'))

self.model.compile(loss='mean_squared_error', optimizer=opt)

def retrieve(self, location):
return tf.keras.models.load_model(location)

def save_to(self, location):
self.model.save(location)

# Values closer to 1 mean X advantage, -1 means O advantage
def raw_value(self, board):
if board.who_won() != 2:
return tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
else:
return self.model(board.get_board())

# Changes 1 to mean the supplied player is at advantage, -1 disadvantage
def state_value(self, board, player):
return player * self.raw_value(board)

# Returns the value of taking a move from the given board state
def action_value(self, board, move):
player = board.player

board.move(*move)
val = self.state_value(board, player)
board.undo_move(*move)

return val

# Performs a temporal difference update of the model
# Q-learning: trains model based on move it would take, even if an exploratory path is chosen
def td_update(self, board, greedy_move=(), terminal=False):
if terminal:
assert board.who_won() != 2
assert greedy_move == ()
self.model.fit(board.history()[-2], self.raw_value(board), batch_size=1, verbose=0)
else:
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0)

Loading

0 comments on commit 4ebe47f

Please sign in to comment.