-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/testing' into main
- Loading branch information
Showing
15 changed files
with
256 additions
and
137 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,47 @@ | ||
import mnk | ||
import keras.models | ||
import tensorflow as tf | ||
import random | ||
|
||
|
||
class Agent: | ||
|
||
def __init__(self, board, model, player): | ||
self.board = board | ||
def __init__(self, model, player): | ||
self.model = model | ||
self.player = player | ||
|
||
def action(self, epsilon=0.01): | ||
legal_moves = self.board.legal_moves() | ||
def greedy_action(self, board): | ||
legal_moves = board.legal_moves() | ||
assert len(legal_moves) > 0, "No legal moves can be played." | ||
|
||
# Exploration | ||
if (random.random() < epsilon): | ||
print("Played epsilon move ({:.5f})".format(epsilon)) | ||
self.board.move(*legal_moves[random.randint(0, len(legal_moves) - 1)]) | ||
return | ||
|
||
best_move = legal_moves[-1] | ||
best_move = legal_moves[0] | ||
max_evaluation = -1 | ||
|
||
for move in legal_moves: | ||
self.board.move(*move) | ||
evaluation = self.player * self.model(self.board.get_board()) | ||
if evaluation > max_evaluation: | ||
val = self.model.action_value(board, move) | ||
if val > max_evaluation: | ||
best_move = move | ||
max_evaluation = evaluation | ||
max_evaluation = val | ||
|
||
return best_move | ||
|
||
def random_action(self, board): | ||
legal_moves = board.legal_moves() | ||
return legal_moves[random.randint(0, len(legal_moves) - 1)] | ||
|
||
self.board.undo_move(*move) | ||
self.board.move(*best_move) | ||
def action(self, board, training, epsilon=0): | ||
legal_moves = board.legal_moves() | ||
assert len(legal_moves) > 0, "No legal moves can be played." | ||
|
||
greedy_move = self.greedy_action(board) | ||
if training and len(board.history()) >= (2 + (self.player == -1)): | ||
self.model.td_update(board, greedy_move) | ||
|
||
# Exploration | ||
if random.random() < epsilon: | ||
move = self.random_action(board) | ||
else: | ||
move = greedy_move | ||
|
||
board.move(*move) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,55 @@ | ||
import random | ||
import tensorflow as tf | ||
from math import floor | ||
from matplotlib import pyplot | ||
import os | ||
from math import floor | ||
from model import Model | ||
|
||
|
||
class HOF: | ||
def __init__(self, folder): | ||
self.hof = [] | ||
self.folder = folder | ||
self.sample_history = [] | ||
self.pop_size = 0 | ||
self.basel = 0 # Used in limit-uniform sampling | ||
if not os.path.isdir(folder): | ||
os.makedirs(folder) | ||
|
||
def store(self, model, name): | ||
model.save("{}/{}".format(self.folder, name)) | ||
self.hof.append(name) | ||
def store(self, model): | ||
model.save_to("{}/{}".format(self.folder, self.pop_size)) | ||
self.hof.append(self.pop_size) | ||
self.pop_size += 1 | ||
self.basel += 1/self.pop_size**2 | ||
|
||
# Gating method decides whether to add the model to the hall of fame | ||
def gate(self, model): | ||
# Simple gating method, stores model after every training episode | ||
self.store(model) | ||
|
||
# Samples from the hall of fame with the provided method | ||
def sample(self, method='uniform'): | ||
if method == 'limit-uniform': # Performs poorly. Do not use. | ||
threshold = random.random()*self.basel | ||
|
||
cum_prob = 0 | ||
ind = self.pop_size-1 | ||
for i in range(self.pop_size): | ||
cum_prob += 1/(self.pop_size-i)**2 | ||
if cum_prob > threshold: | ||
ind = i | ||
break | ||
elif method == 'uniform': | ||
ind = floor(random.random()*self.pop_size) | ||
|
||
self.sample_history.append(ind) | ||
|
||
def sample_hof(self): | ||
pop_size = len(self.hof) | ||
ind = floor(pop_size*random.random()) | ||
name = self.hof[ind] | ||
return tf.keras.models.load_model("{}/{}".format(self.folder, name)) | ||
return Model("{}/{}".format(self.folder, name)) | ||
|
||
# Displays a histogram of the model iterations sampled from the hall of fame | ||
def sample_histogram(self, num=100): | ||
pyplot.hist(self.sample_history, num) | ||
pyplot.title("Sampling of Model Indices from HOF") | ||
pyplot.show() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,61 @@ | ||
import mnk | ||
import pandas as pd | ||
import tensorflow as tf | ||
from keras.models import Sequential | ||
from keras.layers import Dense, Conv2D, Flatten, Dropout | ||
from tensorflow.keras.optimizers import Adadelta | ||
|
||
learning_rate = 1.0 | ||
rho = 0.995 | ||
epsilon = 1e-07 | ||
sgd = Adadelta(lr=learning_rate, rho=rho, epsilon=epsilon) | ||
|
||
modelXO = Sequential() | ||
modelXO.add(Conv2D(12, 3, padding="same", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal")) | ||
modelXO.add(Dropout(0.1)) | ||
modelXO.add(Conv2D(9, 2, padding="valid", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal")) | ||
modelXO.add(Dropout(0.1)) | ||
modelXO.add(Flatten()) | ||
modelXO.add(Dense(18, kernel_initializer='normal', activation='tanh')) | ||
modelXO.add(Dense(1, kernel_initializer='normal', activation='tanh')) | ||
|
||
modelXO.compile(loss='mean_squared_error', optimizer=sgd) | ||
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D | ||
from tensorflow.keras.optimizers import Adam | ||
|
||
class Model: | ||
|
||
def __init__(self, location=False): | ||
|
||
# If a location is provided, retrieve the model stored at that location | ||
if location != False: | ||
self.model = self.retrieve(location) | ||
return | ||
|
||
opt = Adam(learning_rate=0.1, beta_1=0.9, beta_2=0.999) | ||
|
||
self.model = Sequential() | ||
self.model.add(Dense(27, input_shape=(1, 9), kernel_initializer='normal', activation='tanh')) | ||
self.model.add(Dense(27, kernel_initializer='normal', activation='tanh')) | ||
self.model.add(Dense(1, kernel_initializer='normal', activation='tanh')) | ||
|
||
self.model.compile(loss='mean_squared_error', optimizer=opt) | ||
|
||
def retrieve(self, location): | ||
return tf.keras.models.load_model(location) | ||
|
||
def save_to(self, location): | ||
self.model.save(location) | ||
|
||
# Values closer to 1 mean X advantage, -1 means O advantage | ||
def raw_value(self, board): | ||
if board.who_won() != 2: | ||
return tf.constant(board.who_won(), dtype="float32", shape=(1, 1)) | ||
else: | ||
return self.model(board.get_board()) | ||
|
||
# Changes 1 to mean the supplied player is at advantage, -1 disadvantage | ||
def state_value(self, board, player): | ||
return player * self.raw_value(board) | ||
|
||
# Returns the value of taking a move from the given board state | ||
def action_value(self, board, move): | ||
player = board.player | ||
|
||
board.move(*move) | ||
val = self.state_value(board, player) | ||
board.undo_move(*move) | ||
|
||
return val | ||
|
||
# Performs a temporal difference update of the model | ||
# Q-learning: trains model based on move it would take, even if an exploratory path is chosen | ||
def td_update(self, board, greedy_move=(), terminal=False): | ||
if terminal: | ||
assert board.who_won() != 2 | ||
assert greedy_move == () | ||
self.model.fit(board.history()[-2], self.raw_value(board), batch_size=1, verbose=0) | ||
else: | ||
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0) | ||
|
Oops, something went wrong.