Skip to content

Commit

Permalink
See previous commit
Browse files Browse the repository at this point in the history
  • Loading branch information
fshcat committed Feb 9, 2022
1 parent ad1eab4 commit ad027ee
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 50 deletions.
2 changes: 1 addition & 1 deletion agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def action(self, board, training=False, epsilon=0):
assert len(legal_moves) > 0, "No legal moves can be played."

greedy_move = self.greedy_action(board)
if training and len(board.history()) >= (2 + (self.player == -1)):
if training:
self.model.td_update(board, greedy_move)

# Exploration
Expand Down
14 changes: 2 additions & 12 deletions hof.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from model import Model
from agent import Agent
from mnk import Board
from utils import run_game


class HOF:
Expand Down Expand Up @@ -67,21 +68,10 @@ def winrate_matrix(self, iterations):
model_i = Model("{}/{}".format(self.folder, self.hof[i]))
model_j = Model("{}/{}".format(self.folder, self.hof[j]))

value = self.run_game(Agent(model_i, 1), Agent(model_j, -1))
value = run_game(Agent(model_i, 1), Agent(model_j, -1))
matrix[-1].append(value)
pyplot.imshow(matrix, cmap="bwr")

def run_game(self, agent1, agent2):
board = Board(*self.mnk, form="multiplanar-2", hist_length=-1)

while board.game_ongoing():
if board.player == agent1.player:
agent1.action(board)
else:
agent2.action(board)

return board.who_won()




Expand Down
7 changes: 3 additions & 4 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def del_history(self):
if self.hist_length == -1 or len(self.board_history) < self.hist_length:
self.board_history.pop()
else:
for i in range(0,len(self.board_history)-1, -1):
for i in range(0, len(self.board_history)-1, -1):
self.board_history[i+1] = self.board_history[i]
self.board_history[0] = self.undo_buffer
self.undo_buffer = np.zeros((self.m, self.n), dtype=int)
Expand Down Expand Up @@ -93,7 +93,7 @@ def get_board(self):
elif self.board[i][j] == -1:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))
elif self.form == "multiplanar-2":
elif self.form == "multiplanar-turnflipped":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
Expand All @@ -103,9 +103,8 @@ def get_board(self):
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))


def game_ongoing(self):
return not ( self.player_has_lost() or (self.num_legal_moves() == 0) )
return not (self.player_has_lost() or (self.num_legal_moves() == 0))

# Converting numbers to their respective game values
@staticmethod
Expand Down
17 changes: 11 additions & 6 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import SGD


class Model:

def __init__(self, location=False):
def __init__(self, location=None):

# If a location is provided, retrieve the model stored at that location
if location != False:
if location is not None:
self.model = self.retrieve(location)
return

opt = SGD(learning_rate=0.02, momentum=0.0)

self.model = Sequential()
self.model.add(Conv2D(48, 3, activation='relu', input_shape=(3,3,2)))
self.model.add(Conv2D(48, 3, activation='relu', input_shape=(3, 3, 2)))
self.model.add(Flatten())
self.model.add(Dense(27, kernel_initializer='normal', activation='relu', input_shape=(1,18)))
self.model.add(Dense(27, kernel_initializer='normal', activation='relu', input_shape=(1, 18)))
self.model.add(Dense(1, kernel_initializer='normal', activation='tanh'))

self.model.compile(loss='mean_squared_error', optimizer=opt)
Expand Down Expand Up @@ -73,11 +74,15 @@ def scheduler(self, epoch, lr):
return 0.001

# Performs a temporal difference update of the model
def td_update(self, board, greedy_move=(), terminal=False):
def td_update(self, board, greedy_move=None, terminal=False):
# Ensures td_update is possible (agent has experienced 2 states)
if len(board.history()) < 3:
return

callback = tf.keras.callbacks.LearningRateScheduler(self.scheduler)
if terminal:
assert board.who_won() != 2
assert greedy_move == ()
assert greedy_move is None
self.model.fit(board.history()[-2], self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
else:
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])
Expand Down
30 changes: 3 additions & 27 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,10 @@
from model import Model
from plot import plot_wins
from hof import HOF
from utils import run_game

mnk = (3, 3, 3)

# Runs a game from start to end
def run_game(agent_train, agent_versing, epsilon, training):
board = Board(*mnk, form="multiplanar-2", hist_length=-1)
game = []

while board.game_ongoing():
# Select a move
if board.player == agent_versing.player:
agent_versing.action(board)
else:
agent_train.action(board, training, epsilon)

# Store game for later analysis
game.append(board.__str__())

winner = board.who_won()

# Back up the terminal state value to the last action chosen by training agent
if winner != agent_train.player and training:
agent_train.model.td_update(board, terminal=True)

return winner, game


def train(hof, loops, loop_length, epsilon, model):
end_states = []
Expand All @@ -43,7 +21,7 @@ def train(hof, loops, loop_length, epsilon, model):
model_hof = hof.sample()

for loop in range(loops):
print("\n loop: ",loop)
print("\n loop: ", loop)

side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1
Expand Down Expand Up @@ -71,14 +49,13 @@ def train(hof, loops, loop_length, epsilon, model):
agent_hof = Agent(model_hof, side_hof)

# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False)
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)

# Store data from loop
games.append(game_data)
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)


return model, end_states, victories, games


Expand Down Expand Up @@ -113,7 +90,6 @@ def train(hof, loops, loop_length, epsilon, model):
hof.winrate_matrix(150)
plt.show()


ind = 0
while ind != -1:
ind = int(input("Query a game: "))
Expand Down
24 changes: 24 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from mnk import Board


def run_game(agent_train, agent_versing, epsilon=0, training=False, mnk=(3, 3, 3)):
board = Board(*mnk, form="multiplanar-turnflipped", hist_length=-1)
game = []

while board.game_ongoing():
# Select a move
if board.player == agent_versing.player:
agent_versing.action(board)
else:
agent_train.action(board, training, epsilon)

# Store game for later analysis
game.append(board.__str__())

winner = board.who_won()

# Back up the terminal state value to the last action chosen by training agent
if winner != agent_train.player and training:
agent_train.model.td_update(board, terminal=True)

return winner, game

0 comments on commit ad027ee

Please sign in to comment.