Skip to content

Commit

Permalink
See last commit message (woops)
Browse files Browse the repository at this point in the history
  • Loading branch information
fshcat committed Feb 9, 2022
1 parent b93bd9e commit 986f310
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 78 deletions.
23 changes: 1 addition & 22 deletions mnk.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,28 +83,7 @@ def num_legal_moves(self):

# Reshapes board into the form needed for the model
def get_board(self):
if self.form == "flatten":
return np.copy(self.board.reshape(1, 1, self.m * self.n))
elif self.form == "planar":
return np.copy(self.board.reshape(1, self.m, self.n, 1))
elif self.form == "multiplanar":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
if self.board[i][j] == 1:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))
elif self.form == "multiplanar-turnflipped":
board_planes = np.zeros((self.m, self.n, 2), dtype=int)
for i in range(self.m):
for j in range(self.n):
if self.board[i][j] == self.player:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1*self.player:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, self.m, self.n, 2))
return (self.board, self.player)

def game_ongoing(self):
return not (self.player_has_lost() or (self.num_legal_moves() == 0))
Expand Down
9 changes: 5 additions & 4 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import SGD
from state_representation import get_input_rep


class Model:
Expand Down Expand Up @@ -35,7 +36,7 @@ def raw_value(self, board):
if board.who_won() != 2:
return tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
else:
return board.player*self.model(board.get_board())
return board.player*self.model(get_input_rep(board.get_board()))

def raw_action_value(self, board, move):
board.move(*move)
Expand All @@ -53,7 +54,7 @@ def state_value(self, board):
elif board.who_won() == -1*board.player:
return tf.constant(-1, dtype="float32", shape=(1, 1))
else:
return self.model(board.get_board())
return self.model(get_input_rep(board.get_board()))

# Returns the value of taking a move from the given board state
def action_value(self, board, move):
Expand Down Expand Up @@ -83,7 +84,7 @@ def td_update(self, board, greedy_move=None, terminal=False):
if terminal:
assert board.who_won() != 2
assert greedy_move is None
self.model.fit(board.history()[-2], self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
self.model.fit(get_input_rep(board.history()[-2]), self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
else:
self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])
self.model.fit(get_input_rep(board.history()[-2]), self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])

9 changes: 6 additions & 3 deletions state_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@


# Reshapes board into the form needed for the model
def get_input_rep(self, board, form):
def get_input_rep(board, form="multiplanar-turnflipped"):
player = board[1]
board = board[0]
m, n = np.shape(board)

if form == "flatten":
Expand All @@ -22,11 +24,12 @@ def get_input_rep(self, board, form):
return np.copy(board_planes.reshape(1, m, n, 2))

elif form == "multiplanar-turnflipped":
assert player is not None
board_planes = np.zeros((m, n, 2), dtype=int)
for i in range(m):
for j in range(n):
if self.board[i][j] == self.player:
if board[i][j] == player:
board_planes[i][j][0] = 1
elif self.board[i][j] == -1 * self.player:
elif board[i][j] == -1 * player:
board_planes[i][j][1] = 1
return np.copy(board_planes.reshape(1, m, n, 2))
105 changes: 56 additions & 49 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,7 @@
mnk = (3, 3, 3)


def train(hof, loops, loop_length, epsilon, model):
end_states = []
victories = []
games = []

# Initialize values
hof.store(model)
model_hof = hof.sample()

for loop in range(loops):
print("\n loop: ", loop)

side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1

for game in range(loop_length):
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)

run_game(agent_best, agent_hof, epsilon, training=True)

# Switch sides for the next game
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1

model_hof = hof.sample("uniform")

# Update hall of fame and sample from it for the next loop
hof.gate(model)

side_best *= -1
side_hof = side_best * -1

agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)

# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)

# Store data from loop
games.append(game_data)
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)

return model, end_states, victories, games


if __name__ == "__main__":
def main():
# Initialize hall of fame
hof = HOF(mnk, "menagerie")

Expand Down Expand Up @@ -96,3 +48,58 @@ def train(hof, loops, loop_length, epsilon, model):
for move in games[ind]:
print(move)
pass


def train(hof, loops, loop_length, epsilon, model):
end_states = []
victories = []
games = []

# Initialize values
hof.store(model)
model_hof = hof.sample()

try:
for loop in range(loops):
print("\n loop: ", loop)

side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1

for game in range(loop_length):
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)

run_game(agent_best, agent_hof, epsilon, training=True)

# Switch sides for the next game
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1

model_hof = hof.sample("uniform")

# Update hall of fame and sample from it for the next loop
hof.gate(model)

side_best *= -1
side_hof = side_best * -1

agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)

# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)

# Store data from loop
games.append(game_data)
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)
except KeyboardInterrupt:
print("Training interrupted")

return model, end_states, victories, games


if __name__ == "__main__":
main()

0 comments on commit 986f310

Please sign in to comment.