From 8cd09fe3a8be2dedd95686c786b27961b73f59e3 Mon Sep 17 00:00:00 2001 From: "Thouis (Ray) Jones" Date: Thu, 22 Sep 2016 09:29:04 -0400 Subject: [PATCH] global replace of tab with 4-spaces --- AlphaGo/ai.py | 158 ++-- AlphaGo/go.py | 848 +++++++++--------- AlphaGo/mcts.py | 402 ++++----- AlphaGo/models/nn_util.py | 224 ++--- AlphaGo/models/policy.py | 488 +++++----- AlphaGo/preprocessing/game_converter.py | 392 ++++---- AlphaGo/preprocessing/preprocessing.py | 452 +++++----- .../training/reinforcement_policy_trainer.py | 526 +++++------ AlphaGo/training/supervised_policy_trainer.py | 388 ++++---- AlphaGo/util.py | 190 ++-- benchmarks/preprocessing_benchmark.py | 4 +- ...reinforcement_policy_training_benchmark.py | 4 +- .../supervised_policy_training_benchmark.py | 2 +- interface/Play.py | 54 +- interface/gtp_wrapper.py | 270 +++--- tests/test_game_converter.py | 24 +- tests/test_gamestate.py | 298 +++--- tests/test_gtp_wrapper.py | 30 +- tests/test_liberties.py | 70 +- tests/test_mcts.py | 208 ++--- tests/test_policy.py | 222 ++--- tests/test_preprocessing.py | 636 ++++++------- tests/test_reinforcement_policy_trainer.py | 228 ++--- tests/test_supervised_policy_trainer.py | 22 +- 24 files changed, 3070 insertions(+), 3070 deletions(-) diff --git a/AlphaGo/ai.py b/AlphaGo/ai.py index 593e856da..437759960 100644 --- a/AlphaGo/ai.py +++ b/AlphaGo/ai.py @@ -5,95 +5,95 @@ class GreedyPolicyPlayer(object): - """A player that uses a greedy policy (i.e. chooses the highest probability - move each turn) - """ + """A player that uses a greedy policy (i.e. chooses the highest probability + move each turn) + """ - def __init__(self, policy_function, pass_when_offered=False, move_limit=None): - self.policy = policy_function - self.pass_when_offered = pass_when_offered - self.move_limit = move_limit + def __init__(self, policy_function, pass_when_offered=False, move_limit=None): + self.policy = policy_function + self.pass_when_offered = pass_when_offered + self.move_limit = move_limit - def get_move(self, state): - if self.move_limit is not None and len(state.history) > self.move_limit: - return go.PASS_MOVE - if self.pass_when_offered: - if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE: - return go.PASS_MOVE - sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] - if len(sensible_moves) > 0: - move_probs = self.policy.eval_state(state, sensible_moves) - max_prob = max(move_probs, key=lambda (a, p): p) - return max_prob[0] - # No 'sensible' moves available, so do pass move - return go.PASS_MOVE + def get_move(self, state): + if self.move_limit is not None and len(state.history) > self.move_limit: + return go.PASS_MOVE + if self.pass_when_offered: + if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE: + return go.PASS_MOVE + sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] + if len(sensible_moves) > 0: + move_probs = self.policy.eval_state(state, sensible_moves) + max_prob = max(move_probs, key=lambda (a, p): p) + return max_prob[0] + # No 'sensible' moves available, so do pass move + return go.PASS_MOVE class ProbabilisticPolicyPlayer(object): - """A player that samples a move in proportion to the probability given by the - policy. + """A player that samples a move in proportion to the probability given by the + policy. - By manipulating the 'temperature', moves can be pushed towards totally random - (high temperature) or towards greedy play (low temperature) - """ + By manipulating the 'temperature', moves can be pushed towards totally random + (high temperature) or towards greedy play (low temperature) + """ - def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None): - assert(temperature > 0.0) - self.policy = policy_function - self.move_limit = move_limit - self.beta = 1.0 / temperature - self.pass_when_offered = pass_when_offered - self.move_limit = move_limit + def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None): + assert(temperature > 0.0) + self.policy = policy_function + self.move_limit = move_limit + self.beta = 1.0 / temperature + self.pass_when_offered = pass_when_offered + self.move_limit = move_limit - def get_move(self, state): - if self.move_limit is not None and len(state.history) > self.move_limit: - return go.PASS_MOVE - if self.pass_when_offered: - if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE: - return go.PASS_MOVE - sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] - if len(sensible_moves) > 0: - move_probs = self.policy.eval_state(state, sensible_moves) - # zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)] - moves, probabilities = zip(*move_probs) - probabilities = np.array(probabilities) - probabilities = probabilities ** self.beta - probabilities = probabilities / probabilities.sum() - # numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps - choice_idx = np.random.choice(len(moves), p=probabilities) - return moves[choice_idx] - return go.PASS_MOVE + def get_move(self, state): + if self.move_limit is not None and len(state.history) > self.move_limit: + return go.PASS_MOVE + if self.pass_when_offered: + if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE: + return go.PASS_MOVE + sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] + if len(sensible_moves) > 0: + move_probs = self.policy.eval_state(state, sensible_moves) + # zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)] + moves, probabilities = zip(*move_probs) + probabilities = np.array(probabilities) + probabilities = probabilities ** self.beta + probabilities = probabilities / probabilities.sum() + # numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps + choice_idx = np.random.choice(len(moves), p=probabilities) + return moves[choice_idx] + return go.PASS_MOVE - def get_moves(self, states): - """Batch version of get_move. A list of moves is returned (one per state) - """ - sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states] - all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists) - move_list = [None] * len(states) - for i, move_probs in enumerate(all_moves_distributions): - if len(move_probs) == 0 or len(states[i].history) > self.move_limit: - move_list[i] = go.PASS_MOVE - else: - # this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move - moves, probabilities = zip(*move_probs) - probabilities = np.array(probabilities) - probabilities = probabilities ** self.beta - probabilities = probabilities / probabilities.sum() - choice_idx = np.random.choice(len(moves), p=probabilities) - move_list[i] = moves[choice_idx] - return move_list + def get_moves(self, states): + """Batch version of get_move. A list of moves is returned (one per state) + """ + sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states] + all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists) + move_list = [None] * len(states) + for i, move_probs in enumerate(all_moves_distributions): + if len(move_probs) == 0 or len(states[i].history) > self.move_limit: + move_list[i] = go.PASS_MOVE + else: + # this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move + moves, probabilities = zip(*move_probs) + probabilities = np.array(probabilities) + probabilities = probabilities ** self.beta + probabilities = probabilities / probabilities.sum() + choice_idx = np.random.choice(len(moves), p=probabilities) + move_list[i] = moves[choice_idx] + return move_list class MCTSPlayer(object): - def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100): - self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct, - rollout_limit, playout_depth, n_playout) + def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100): + self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct, + rollout_limit, playout_depth, n_playout) - def get_move(self, state): - sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] - if len(sensible_moves) > 0: - move = self.mcts.get_move(state) - self.mcts.update_with_move(move) - return move - # No 'sensible' moves available, so do pass move - return go.PASS_MOVE + def get_move(self, state): + sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] + if len(sensible_moves) > 0: + move = self.mcts.get_move(state) + self.mcts.update_with_move(move) + return move + # No 'sensible' moves available, so do pass move + return go.PASS_MOVE diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 67b6c7269..48b696fda 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -7,430 +7,430 @@ class GameState(object): - """State of a game of Go and some basic functions to interact with it - """ - - # Looking up positions adjacent to a given position takes a surprising - # amount of time, hence this shared lookup table {boardsize: {position: [neighbors]}} - __NEIGHBORS_CACHE = {} - - def __init__(self, size=19, komi=7.5, enforce_superko=False): - self.board = np.zeros((size, size)) - self.board.fill(EMPTY) - self.size = size - self.current_player = BLACK - self.ko = None - self.komi = komi # Komi is number of extra points WHITE gets for going 2nd - self.handicaps = [] - self.history = [] - self.num_black_prisoners = 0 - self.num_white_prisoners = 0 - self.is_end_of_game = False - # Each pass move by a player subtracts a point - self.passes_white = 0 - self.passes_black = 0 - # `self.liberty_sets` is a 2D array with the same indexes as `board` - # each entry points to a set of tuples - the liberties of a stone's - # connected block. By caching liberties in this way, we can directly - # optimize update functions (e.g. do_move) and in doing so indirectly - # speed up any function that queries liberties - self._create_neighbors_cache() - self.liberty_sets = [[set() for _ in range(size)] for _ in range(size)] - for x in range(size): - for y in range(size): - self.liberty_sets[x][y] = set(self._neighbors((x, y))) - # separately cache the 2D numpy array of the _size_ of liberty sets - # at each board position - self.liberty_counts = np.zeros((size, size), dtype=np.int) - self.liberty_counts.fill(-1) - # initialize liberty_sets of empty board: the set of neighbors of each position - # similarly to `liberty_sets`, `group_sets[x][y]` points to a set of tuples - # containing all (x',y') pairs in the group connected to (x,y) - self.group_sets = [[set() for _ in range(size)] for _ in range(size)] - # cache of list of legal moves (actually 'sensible' moves, with a separate list for eye-moves on request) - self.__legal_move_cache = None - self.__legal_eyes_cache = None - # on-the-fly record of 'age' of each stone - self.stone_ages = np.zeros((size, size), dtype=np.int) - 1 - - # setup Zobrist hash to keep track of board state - self.enforce_superko = enforce_superko - rng = np.random.RandomState(0) - self.hash_lookup = { - WHITE: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64'), - BLACK: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64')} - self.current_hash = np.uint64(0) - self.previous_hashes = set() - - def get_group(self, position): - """Get the group of connected same-color stones to the given position - Keyword arguments: - position -- a tuple of (x, y) - x being the column index of the starting position of the search - y being the row index of the starting position of the search - Return: - a set of tuples consist of (x, y)s which are the same-color cluster - which contains the input single position. len(group) is size of the cluster, can be large. - """ - (x, y) = position - # given that this is already cached, it is a fast lookup - return self.group_sets[x][y] - - def get_groups_around(self, position): - """returns a list of the unique groups adjacent to position - 'unique' means that, for example in this position: - . . . . . - . B W . . - . W W . . - . . . . . - . . . . . - only the one white group would be returned on get_groups_around((1,1)) - """ - groups = [] - for (nx, ny) in self._neighbors(position): - group = self.group_sets[nx][ny] - if len(group) > 0 and group not in groups: - groups.append(self.group_sets[nx][ny]) - return groups - - def _on_board(self, position): - """simply return True iff position is within the bounds of [0, self.size) - """ - (x, y) = position - return x >= 0 and y >= 0 and x < self.size and y < self.size - - def _create_neighbors_cache(self): - if self.size not in GameState.__NEIGHBORS_CACHE: - GameState.__NEIGHBORS_CACHE[self.size] = {} - for x in xrange(self.size): - for y in xrange(self.size): - neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] if self._on_board(xy)] - GameState.__NEIGHBORS_CACHE[self.size][(x, y)] = neighbors - - def _neighbors(self, position): - """A private helper function that simply returns a list of positions neighboring - the given (x,y) position. Basically it handles edges and corners. - """ - return GameState.__NEIGHBORS_CACHE[self.size][position] - - def _diagonals(self, position): - """Like _neighbors but for diagonal positions - """ - (x, y) = position - return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1)]) - - def _update_neighbors(self, position): - """A private helper function to update self.group_sets and self.liberty_sets - given that a stone was just played at `position` - """ - (x, y) = position - - merged_group = set() - merged_group.add(position) - merged_libs = self.liberty_sets[x][y] - for (nx, ny) in self._neighbors(position): - # remove (x,y) from liberties of neighboring positions - self.liberty_sets[nx][ny] -= set([position]) - # if neighbor was opponent, update group's liberties count - # (current_player's groups will be updated below regardless) - if self.board[nx][ny] == -self.current_player: - new_liberty_count = len(self.liberty_sets[nx][ny]) - for (gx, gy) in self.group_sets[nx][ny]: - self.liberty_counts[gx][gy] = new_liberty_count - # MERGE group/liberty sets if neighbor is the same color - # note: this automatically takes care of merging two separate - # groups that just became connected through (x,y) - elif self.board[x][y] == self.board[nx][ny]: - merged_group |= self.group_sets[nx][ny] - merged_libs |= self.liberty_sets[nx][ny] - - # now that we have one big 'merged' set for groups and liberties, loop - # over every member of the same-color group to update them - # Note: neighboring opponent groups are already updated in the previous loop - count_merged_libs = len(merged_libs) - for (gx, gy) in merged_group: - self.group_sets[gx][gy] = merged_group - self.liberty_sets[gx][gy] = merged_libs - self.liberty_counts[gx][gy] = count_merged_libs - - def _update_hash(self, action, color): - (x, y) = action - self.current_hash = np.bitwise_xor(self.current_hash, self.hash_lookup[color][x][y]) - - def _remove_group(self, group): - """A private helper function to take a group off the board (due to capture), - updating group sets and liberties along the way - """ - for (x, y) in group: - self._update_hash((x, y), self.board[x, y]) - self.board[x, y] = EMPTY - for (x, y) in group: - # clear group_sets for all positions in 'group' - self.group_sets[x][y] = set() - self.liberty_sets[x][y] = set() - self.liberty_counts[x][y] = -1 - self.stone_ages[x][y] = -1 - for (nx, ny) in self._neighbors((x, y)): - if self.board[nx, ny] == EMPTY: - # add empty neighbors of (x,y) to its liberties - self.liberty_sets[x][y].add((nx, ny)) - else: - # add (x,y) to the liberties of its nonempty neighbors - self.liberty_sets[nx][ny].add((x, y)) - for (gx, gy) in self.group_sets[nx][ny]: - self.liberty_counts[gx][gy] = len(self.liberty_sets[nx][ny]) - - def copy(self): - """get a copy of this Game state - """ - other = GameState(self.size, self.komi) - other.board = self.board.copy() - other.current_player = self.current_player - other.ko = self.ko - other.handicaps = list(self.handicaps) - other.history = list(self.history) - other.num_black_prisoners = self.num_black_prisoners - other.num_white_prisoners = self.num_white_prisoners - other.enforce_superko = self.enforce_superko - other.current_hash = self.current_hash.copy() - other.previous_hashes = self.previous_hashes.copy() - - # update liberty and group sets. Note: calling set(a) on another set - # copies the entries (any iterable as an argument would work so - # set(list(a)) is unnecessary) - for x in range(self.size): - for y in range(self.size): - other.group_sets[x][y] = set(self.group_sets[x][y]) - other.liberty_sets[x][y] = set(self.liberty_sets[x][y]) - other.liberty_counts = self.liberty_counts.copy() - return other - - def is_suicide(self, action): - """return true if having current_player play at would be suicide - """ - (x, y) = action - num_liberties_here = len(self.liberty_sets[x][y]) - if num_liberties_here == 0: - # no liberties here 'immediately' - # but this may still connect to another group of the same color - for (nx, ny) in self._neighbors(action): - # check if we're saved by attaching to a friendly group that has - # liberties elsewhere - is_friendly_group = self.board[nx, ny] == self.current_player - group_has_other_liberties = len(self.liberty_sets[nx][ny] - set([action])) > 0 - if is_friendly_group and group_has_other_liberties: - return False - # check if we're killing an unfriendly group - is_enemy_group = self.board[nx, ny] == -self.current_player - if is_enemy_group and (not group_has_other_liberties): - return False - # checked all the neighbors, and it doesn't look good. - return True - return False - - def is_positional_superko(self, action): - """Find all actions that the current_player has done in the past, taking into account the fact that - history starts with BLACK when there are no handicaps or with WHITE when there are. - """ - if len(self.handicaps) == 0 and self.current_player == BLACK: - player_history = self.history[0::2] - elif len(self.handicaps) > 0 and self.current_player == WHITE: - player_history = self.history[0::2] - else: - player_history = self.history[1::2] - - if action not in self.handicaps and action not in player_history: - return False - - state_copy = self.copy() - state_copy.enforce_superko = False - state_copy.do_move(action) - - if state_copy.current_hash in self.previous_hashes: - return True - else: - return False - - def is_legal(self, action): - """determine if the given action (x,y tuple) is a legal move - note: we only check ko, not superko at this point (TODO?) - """ - # passing is always legal - if action is PASS_MOVE: - return True - (x, y) = action - if not self._on_board(action): - return False - if self.board[x][y] != EMPTY: - return False - if self.is_suicide(action): - return False - if action == self.ko: - return False - if self.enforce_superko and self.is_positional_superko(action): - return False - return True - - def is_eyeish(self, position, owner): - """returns whether the position is empty and is surrounded by all stones of 'owner' - """ - (x, y) = position - if self.board[x, y] != EMPTY: - return False - - for (nx, ny) in self._neighbors(position): - if self.board[nx, ny] != owner: - return False - return True - - def is_eye(self, position, owner, stack=[]): - """returns whether the position is a true eye of 'owner' - Requires a recursive call; empty spaces diagonal to 'position' are fine - as long as they themselves are eyes - """ - if not self.is_eyeish(position, owner): - return False - # (as in Fuego/Michi/etc) ensure that num "bad" diagonals is 0 (edges) or 1 - # where a bad diagonal is an opponent stone or an empty non-eye space - num_bad_diagonal = 0 - # if in middle of board, 1 bad neighbor is allowable; zero for edges and corners - allowable_bad_diagonal = 1 if len(self._neighbors(position)) == 4 else 0 - - for d in self._diagonals(position): - # opponent stones count against this being eye - if self.board[d] == -owner: - num_bad_diagonal += 1 - # empty spaces (that aren't themselves eyes) count against it too - # the 'stack' keeps track of where we've already been to prevent - # infinite loops of recursion - elif self.board[d] == EMPTY and d not in stack: - stack.append(position) - if not self.is_eye(d, owner, stack): - num_bad_diagonal += 1 - stack.pop() - # at any point, if we've surpassed # allowable, we can stop - if num_bad_diagonal > allowable_bad_diagonal: - return False - return True - - def get_legal_moves(self, include_eyes=True): - if self.__legal_move_cache is not None: - if include_eyes: - return self.__legal_move_cache + self.__legal_eyes_cache - else: - return self.__legal_move_cache - self.__legal_move_cache = [] - self.__legal_eyes_cache = [] - for x in range(self.size): - for y in range(self.size): - if self.is_legal((x, y)): - if not self.is_eye((x, y), self.current_player): - self.__legal_move_cache.append((x, y)) - else: - self.__legal_eyes_cache.append((x, y)) - return self.get_legal_moves(include_eyes) - - def get_winner(self): - """Calculate score of board state and return player ID (1, -1, or 0 for tie) - corresponding to winner. Uses 'Area scoring'. - """ - # Count number of positions filled by each player, plus 1 for each eye-ish space owned - score_white = np.sum(self.board == WHITE) - score_black = np.sum(self.board == BLACK) - empties = zip(*np.where(self.board == EMPTY)) - for empty in empties: - # Check that all surrounding points are of one color - if self.is_eyeish(empty, BLACK): - score_black += 1 - elif self.is_eyeish(empty, WHITE): - score_white += 1 - score_white += self.komi - score_white -= self.passes_white - score_black -= self.passes_black - if score_black > score_white: - winner = BLACK - elif score_white > score_black: - winner = WHITE - else: - # Tie - winner = 0 - return winner - - def place_handicaps(self, actions): - if len(self.history) > 0: - raise IllegalMove("Cannot place handicap on a started game") - self.handicaps.extend(actions) - for action in actions: - self.do_move(action, BLACK) - self.history = [] - - def get_current_player(self): - """Returns the color of the player who will make the next move. - """ - return self.current_player - - def do_move(self, action, color=None): - """Play stone at action=(x,y). If color is not specified, current_player is used - If it is a legal move, current_player switches to the opposite color - If not, an IllegalMove exception is raised - """ - color = color or self.current_player - reset_player = self.current_player - self.current_player = color - if self.is_legal(action): - # reset ko - self.ko = None - # increment age of stones by 1 - self.stone_ages[self.stone_ages >= 0] += 1 - if action is not PASS_MOVE: - (x, y) = action - self.board[x][y] = color - self._update_hash(action, color) - self._update_neighbors(action) - self.stone_ages[x][y] = 0 - - # check neighboring groups' liberties for captures - for (nx, ny) in self._neighbors(action): - if self.board[nx, ny] == -color and len(self.liberty_sets[nx][ny]) == 0: - # capture occurred! - captured_group = self.group_sets[nx][ny] - num_captured = len(captured_group) - self._remove_group(captured_group) - if color == BLACK: - self.num_white_prisoners += num_captured - else: - self.num_black_prisoners += num_captured - # check for ko - if num_captured == 1: - # it is a ko iff, were the opponent to play at the captured position, - # it would recapture (x,y) only - # (a bigger group containing xy may be captured - this is 'snapback') - would_recapture = len(self.liberty_sets[x][y]) == 1 - recapture_size_is_1 = len(self.group_sets[x][y]) == 1 - if would_recapture and recapture_size_is_1: - # note: (nx,ny) is the stone that was captured - self.ko = (nx, ny) - # _remove_group has finished updating the hash - self.previous_hashes.add(self.current_hash) - else: - if color == BLACK: - self.passes_black += 1 - if color == WHITE: - self.passes_white += 1 - # next turn - self.current_player = -color - self.history.append(action) - self.__legal_move_cache = None - else: - self.current_player = reset_player - raise IllegalMove(str(action)) - # Check for end of game - if len(self.history) > 1: - if self.history[-1] is PASS_MOVE and self.history[-2] is PASS_MOVE \ - and self.current_player == WHITE: - self.is_end_of_game = True - return self.is_end_of_game + """State of a game of Go and some basic functions to interact with it + """ + + # Looking up positions adjacent to a given position takes a surprising + # amount of time, hence this shared lookup table {boardsize: {position: [neighbors]}} + __NEIGHBORS_CACHE = {} + + def __init__(self, size=19, komi=7.5, enforce_superko=False): + self.board = np.zeros((size, size)) + self.board.fill(EMPTY) + self.size = size + self.current_player = BLACK + self.ko = None + self.komi = komi # Komi is number of extra points WHITE gets for going 2nd + self.handicaps = [] + self.history = [] + self.num_black_prisoners = 0 + self.num_white_prisoners = 0 + self.is_end_of_game = False + # Each pass move by a player subtracts a point + self.passes_white = 0 + self.passes_black = 0 + # `self.liberty_sets` is a 2D array with the same indexes as `board` + # each entry points to a set of tuples - the liberties of a stone's + # connected block. By caching liberties in this way, we can directly + # optimize update functions (e.g. do_move) and in doing so indirectly + # speed up any function that queries liberties + self._create_neighbors_cache() + self.liberty_sets = [[set() for _ in range(size)] for _ in range(size)] + for x in range(size): + for y in range(size): + self.liberty_sets[x][y] = set(self._neighbors((x, y))) + # separately cache the 2D numpy array of the _size_ of liberty sets + # at each board position + self.liberty_counts = np.zeros((size, size), dtype=np.int) + self.liberty_counts.fill(-1) + # initialize liberty_sets of empty board: the set of neighbors of each position + # similarly to `liberty_sets`, `group_sets[x][y]` points to a set of tuples + # containing all (x',y') pairs in the group connected to (x,y) + self.group_sets = [[set() for _ in range(size)] for _ in range(size)] + # cache of list of legal moves (actually 'sensible' moves, with a separate list for eye-moves on request) + self.__legal_move_cache = None + self.__legal_eyes_cache = None + # on-the-fly record of 'age' of each stone + self.stone_ages = np.zeros((size, size), dtype=np.int) - 1 + + # setup Zobrist hash to keep track of board state + self.enforce_superko = enforce_superko + rng = np.random.RandomState(0) + self.hash_lookup = { + WHITE: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64'), + BLACK: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64')} + self.current_hash = np.uint64(0) + self.previous_hashes = set() + + def get_group(self, position): + """Get the group of connected same-color stones to the given position + Keyword arguments: + position -- a tuple of (x, y) + x being the column index of the starting position of the search + y being the row index of the starting position of the search + Return: + a set of tuples consist of (x, y)s which are the same-color cluster + which contains the input single position. len(group) is size of the cluster, can be large. + """ + (x, y) = position + # given that this is already cached, it is a fast lookup + return self.group_sets[x][y] + + def get_groups_around(self, position): + """returns a list of the unique groups adjacent to position + 'unique' means that, for example in this position: + . . . . . + . B W . . + . W W . . + . . . . . + . . . . . + only the one white group would be returned on get_groups_around((1,1)) + """ + groups = [] + for (nx, ny) in self._neighbors(position): + group = self.group_sets[nx][ny] + if len(group) > 0 and group not in groups: + groups.append(self.group_sets[nx][ny]) + return groups + + def _on_board(self, position): + """simply return True iff position is within the bounds of [0, self.size) + """ + (x, y) = position + return x >= 0 and y >= 0 and x < self.size and y < self.size + + def _create_neighbors_cache(self): + if self.size not in GameState.__NEIGHBORS_CACHE: + GameState.__NEIGHBORS_CACHE[self.size] = {} + for x in xrange(self.size): + for y in xrange(self.size): + neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] if self._on_board(xy)] + GameState.__NEIGHBORS_CACHE[self.size][(x, y)] = neighbors + + def _neighbors(self, position): + """A private helper function that simply returns a list of positions neighboring + the given (x,y) position. Basically it handles edges and corners. + """ + return GameState.__NEIGHBORS_CACHE[self.size][position] + + def _diagonals(self, position): + """Like _neighbors but for diagonal positions + """ + (x, y) = position + return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1)]) + + def _update_neighbors(self, position): + """A private helper function to update self.group_sets and self.liberty_sets + given that a stone was just played at `position` + """ + (x, y) = position + + merged_group = set() + merged_group.add(position) + merged_libs = self.liberty_sets[x][y] + for (nx, ny) in self._neighbors(position): + # remove (x,y) from liberties of neighboring positions + self.liberty_sets[nx][ny] -= set([position]) + # if neighbor was opponent, update group's liberties count + # (current_player's groups will be updated below regardless) + if self.board[nx][ny] == -self.current_player: + new_liberty_count = len(self.liberty_sets[nx][ny]) + for (gx, gy) in self.group_sets[nx][ny]: + self.liberty_counts[gx][gy] = new_liberty_count + # MERGE group/liberty sets if neighbor is the same color + # note: this automatically takes care of merging two separate + # groups that just became connected through (x,y) + elif self.board[x][y] == self.board[nx][ny]: + merged_group |= self.group_sets[nx][ny] + merged_libs |= self.liberty_sets[nx][ny] + + # now that we have one big 'merged' set for groups and liberties, loop + # over every member of the same-color group to update them + # Note: neighboring opponent groups are already updated in the previous loop + count_merged_libs = len(merged_libs) + for (gx, gy) in merged_group: + self.group_sets[gx][gy] = merged_group + self.liberty_sets[gx][gy] = merged_libs + self.liberty_counts[gx][gy] = count_merged_libs + + def _update_hash(self, action, color): + (x, y) = action + self.current_hash = np.bitwise_xor(self.current_hash, self.hash_lookup[color][x][y]) + + def _remove_group(self, group): + """A private helper function to take a group off the board (due to capture), + updating group sets and liberties along the way + """ + for (x, y) in group: + self._update_hash((x, y), self.board[x, y]) + self.board[x, y] = EMPTY + for (x, y) in group: + # clear group_sets for all positions in 'group' + self.group_sets[x][y] = set() + self.liberty_sets[x][y] = set() + self.liberty_counts[x][y] = -1 + self.stone_ages[x][y] = -1 + for (nx, ny) in self._neighbors((x, y)): + if self.board[nx, ny] == EMPTY: + # add empty neighbors of (x,y) to its liberties + self.liberty_sets[x][y].add((nx, ny)) + else: + # add (x,y) to the liberties of its nonempty neighbors + self.liberty_sets[nx][ny].add((x, y)) + for (gx, gy) in self.group_sets[nx][ny]: + self.liberty_counts[gx][gy] = len(self.liberty_sets[nx][ny]) + + def copy(self): + """get a copy of this Game state + """ + other = GameState(self.size, self.komi) + other.board = self.board.copy() + other.current_player = self.current_player + other.ko = self.ko + other.handicaps = list(self.handicaps) + other.history = list(self.history) + other.num_black_prisoners = self.num_black_prisoners + other.num_white_prisoners = self.num_white_prisoners + other.enforce_superko = self.enforce_superko + other.current_hash = self.current_hash.copy() + other.previous_hashes = self.previous_hashes.copy() + + # update liberty and group sets. Note: calling set(a) on another set + # copies the entries (any iterable as an argument would work so + # set(list(a)) is unnecessary) + for x in range(self.size): + for y in range(self.size): + other.group_sets[x][y] = set(self.group_sets[x][y]) + other.liberty_sets[x][y] = set(self.liberty_sets[x][y]) + other.liberty_counts = self.liberty_counts.copy() + return other + + def is_suicide(self, action): + """return true if having current_player play at would be suicide + """ + (x, y) = action + num_liberties_here = len(self.liberty_sets[x][y]) + if num_liberties_here == 0: + # no liberties here 'immediately' + # but this may still connect to another group of the same color + for (nx, ny) in self._neighbors(action): + # check if we're saved by attaching to a friendly group that has + # liberties elsewhere + is_friendly_group = self.board[nx, ny] == self.current_player + group_has_other_liberties = len(self.liberty_sets[nx][ny] - set([action])) > 0 + if is_friendly_group and group_has_other_liberties: + return False + # check if we're killing an unfriendly group + is_enemy_group = self.board[nx, ny] == -self.current_player + if is_enemy_group and (not group_has_other_liberties): + return False + # checked all the neighbors, and it doesn't look good. + return True + return False + + def is_positional_superko(self, action): + """Find all actions that the current_player has done in the past, taking into account the fact that + history starts with BLACK when there are no handicaps or with WHITE when there are. + """ + if len(self.handicaps) == 0 and self.current_player == BLACK: + player_history = self.history[0::2] + elif len(self.handicaps) > 0 and self.current_player == WHITE: + player_history = self.history[0::2] + else: + player_history = self.history[1::2] + + if action not in self.handicaps and action not in player_history: + return False + + state_copy = self.copy() + state_copy.enforce_superko = False + state_copy.do_move(action) + + if state_copy.current_hash in self.previous_hashes: + return True + else: + return False + + def is_legal(self, action): + """determine if the given action (x,y tuple) is a legal move + note: we only check ko, not superko at this point (TODO?) + """ + # passing is always legal + if action is PASS_MOVE: + return True + (x, y) = action + if not self._on_board(action): + return False + if self.board[x][y] != EMPTY: + return False + if self.is_suicide(action): + return False + if action == self.ko: + return False + if self.enforce_superko and self.is_positional_superko(action): + return False + return True + + def is_eyeish(self, position, owner): + """returns whether the position is empty and is surrounded by all stones of 'owner' + """ + (x, y) = position + if self.board[x, y] != EMPTY: + return False + + for (nx, ny) in self._neighbors(position): + if self.board[nx, ny] != owner: + return False + return True + + def is_eye(self, position, owner, stack=[]): + """returns whether the position is a true eye of 'owner' + Requires a recursive call; empty spaces diagonal to 'position' are fine + as long as they themselves are eyes + """ + if not self.is_eyeish(position, owner): + return False + # (as in Fuego/Michi/etc) ensure that num "bad" diagonals is 0 (edges) or 1 + # where a bad diagonal is an opponent stone or an empty non-eye space + num_bad_diagonal = 0 + # if in middle of board, 1 bad neighbor is allowable; zero for edges and corners + allowable_bad_diagonal = 1 if len(self._neighbors(position)) == 4 else 0 + + for d in self._diagonals(position): + # opponent stones count against this being eye + if self.board[d] == -owner: + num_bad_diagonal += 1 + # empty spaces (that aren't themselves eyes) count against it too + # the 'stack' keeps track of where we've already been to prevent + # infinite loops of recursion + elif self.board[d] == EMPTY and d not in stack: + stack.append(position) + if not self.is_eye(d, owner, stack): + num_bad_diagonal += 1 + stack.pop() + # at any point, if we've surpassed # allowable, we can stop + if num_bad_diagonal > allowable_bad_diagonal: + return False + return True + + def get_legal_moves(self, include_eyes=True): + if self.__legal_move_cache is not None: + if include_eyes: + return self.__legal_move_cache + self.__legal_eyes_cache + else: + return self.__legal_move_cache + self.__legal_move_cache = [] + self.__legal_eyes_cache = [] + for x in range(self.size): + for y in range(self.size): + if self.is_legal((x, y)): + if not self.is_eye((x, y), self.current_player): + self.__legal_move_cache.append((x, y)) + else: + self.__legal_eyes_cache.append((x, y)) + return self.get_legal_moves(include_eyes) + + def get_winner(self): + """Calculate score of board state and return player ID (1, -1, or 0 for tie) + corresponding to winner. Uses 'Area scoring'. + """ + # Count number of positions filled by each player, plus 1 for each eye-ish space owned + score_white = np.sum(self.board == WHITE) + score_black = np.sum(self.board == BLACK) + empties = zip(*np.where(self.board == EMPTY)) + for empty in empties: + # Check that all surrounding points are of one color + if self.is_eyeish(empty, BLACK): + score_black += 1 + elif self.is_eyeish(empty, WHITE): + score_white += 1 + score_white += self.komi + score_white -= self.passes_white + score_black -= self.passes_black + if score_black > score_white: + winner = BLACK + elif score_white > score_black: + winner = WHITE + else: + # Tie + winner = 0 + return winner + + def place_handicaps(self, actions): + if len(self.history) > 0: + raise IllegalMove("Cannot place handicap on a started game") + self.handicaps.extend(actions) + for action in actions: + self.do_move(action, BLACK) + self.history = [] + + def get_current_player(self): + """Returns the color of the player who will make the next move. + """ + return self.current_player + + def do_move(self, action, color=None): + """Play stone at action=(x,y). If color is not specified, current_player is used + If it is a legal move, current_player switches to the opposite color + If not, an IllegalMove exception is raised + """ + color = color or self.current_player + reset_player = self.current_player + self.current_player = color + if self.is_legal(action): + # reset ko + self.ko = None + # increment age of stones by 1 + self.stone_ages[self.stone_ages >= 0] += 1 + if action is not PASS_MOVE: + (x, y) = action + self.board[x][y] = color + self._update_hash(action, color) + self._update_neighbors(action) + self.stone_ages[x][y] = 0 + + # check neighboring groups' liberties for captures + for (nx, ny) in self._neighbors(action): + if self.board[nx, ny] == -color and len(self.liberty_sets[nx][ny]) == 0: + # capture occurred! + captured_group = self.group_sets[nx][ny] + num_captured = len(captured_group) + self._remove_group(captured_group) + if color == BLACK: + self.num_white_prisoners += num_captured + else: + self.num_black_prisoners += num_captured + # check for ko + if num_captured == 1: + # it is a ko iff, were the opponent to play at the captured position, + # it would recapture (x,y) only + # (a bigger group containing xy may be captured - this is 'snapback') + would_recapture = len(self.liberty_sets[x][y]) == 1 + recapture_size_is_1 = len(self.group_sets[x][y]) == 1 + if would_recapture and recapture_size_is_1: + # note: (nx,ny) is the stone that was captured + self.ko = (nx, ny) + # _remove_group has finished updating the hash + self.previous_hashes.add(self.current_hash) + else: + if color == BLACK: + self.passes_black += 1 + if color == WHITE: + self.passes_white += 1 + # next turn + self.current_player = -color + self.history.append(action) + self.__legal_move_cache = None + else: + self.current_player = reset_player + raise IllegalMove(str(action)) + # Check for end of game + if len(self.history) > 1: + if self.history[-1] is PASS_MOVE and self.history[-2] is PASS_MOVE \ + and self.current_player == WHITE: + self.is_end_of_game = True + return self.is_end_of_game class IllegalMove(Exception): - pass + pass diff --git a/AlphaGo/mcts.py b/AlphaGo/mcts.py index d2d572e2a..962b4aabe 100644 --- a/AlphaGo/mcts.py +++ b/AlphaGo/mcts.py @@ -8,210 +8,210 @@ class TreeNode(object): - """A node in the MCTS tree. Each node keeps track of its own value Q, prior probability P, and - its visit-count-adjusted prior score u. - """ - def __init__(self, parent, prior_p): - self._parent = parent - self._children = {} # a map from action to TreeNode - self._n_visits = 0 - self._Q = 0 - # This value for u will be overwritten in the first call to update(), but is useful for - # choosing the first action from this node. - self._u = prior_p - self._P = prior_p - - def expand(self, action_priors): - """Expand tree by creating new children. - - Arguments: - action_priors -- output from policy function - a list of tuples of actions and their prior - probability according to the policy function. - - Returns: - None - """ - for action, prob in action_priors: - if action not in self._children: - self._children[action] = TreeNode(self, prob) - - def select(self): - """Select action among children that gives maximum action value, Q plus bonus u(P). - - Returns: - A tuple of (action, next_node) - """ - return max(self._children.iteritems(), key=lambda (action, node): node.get_value()) - - def update(self, leaf_value, c_puct): - """Update node values from leaf evaluation. - - Arguments: - leaf_value -- the value of subtree evaluation from the current player's perspective. - c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and - prior probability, P, on this node's score. - - Returns: - None - """ - # Count visit. - self._n_visits += 1 - # Update Q, a running average of values for all visits. - self._Q += (leaf_value - self._Q) / self._n_visits - # Update u, the prior weighted by an exploration hyperparameter c_puct and the number of - # visits. Note that u is not normalized to be a distribution. - if not self.is_root(): - self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits) - - def update_recursive(self, leaf_value, c_puct): - """Like a call to update(), but applied recursively for all ancestors. - - Note: it is important that this happens from the root downward so that 'parent' visit - counts are correct. - """ - # If it is not root, this node's parent should be updated first. - if self._parent: - self._parent.update_recursive(leaf_value, c_puct) - self.update(leaf_value, c_puct) - - def get_value(self): - """Calculate and return the value for this node: a combination of leaf evaluations, Q, and - this node's prior adjusted for its visit count, u - """ - return self._Q + self._u - - def is_leaf(self): - """Check if leaf node (i.e. no nodes below this have been expanded). - """ - return self._children == {} - - def is_root(self): - return self._parent is None + """A node in the MCTS tree. Each node keeps track of its own value Q, prior probability P, and + its visit-count-adjusted prior score u. + """ + def __init__(self, parent, prior_p): + self._parent = parent + self._children = {} # a map from action to TreeNode + self._n_visits = 0 + self._Q = 0 + # This value for u will be overwritten in the first call to update(), but is useful for + # choosing the first action from this node. + self._u = prior_p + self._P = prior_p + + def expand(self, action_priors): + """Expand tree by creating new children. + + Arguments: + action_priors -- output from policy function - a list of tuples of actions and their prior + probability according to the policy function. + + Returns: + None + """ + for action, prob in action_priors: + if action not in self._children: + self._children[action] = TreeNode(self, prob) + + def select(self): + """Select action among children that gives maximum action value, Q plus bonus u(P). + + Returns: + A tuple of (action, next_node) + """ + return max(self._children.iteritems(), key=lambda (action, node): node.get_value()) + + def update(self, leaf_value, c_puct): + """Update node values from leaf evaluation. + + Arguments: + leaf_value -- the value of subtree evaluation from the current player's perspective. + c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and + prior probability, P, on this node's score. + + Returns: + None + """ + # Count visit. + self._n_visits += 1 + # Update Q, a running average of values for all visits. + self._Q += (leaf_value - self._Q) / self._n_visits + # Update u, the prior weighted by an exploration hyperparameter c_puct and the number of + # visits. Note that u is not normalized to be a distribution. + if not self.is_root(): + self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits) + + def update_recursive(self, leaf_value, c_puct): + """Like a call to update(), but applied recursively for all ancestors. + + Note: it is important that this happens from the root downward so that 'parent' visit + counts are correct. + """ + # If it is not root, this node's parent should be updated first. + if self._parent: + self._parent.update_recursive(leaf_value, c_puct) + self.update(leaf_value, c_puct) + + def get_value(self): + """Calculate and return the value for this node: a combination of leaf evaluations, Q, and + this node's prior adjusted for its visit count, u + """ + return self._Q + self._u + + def is_leaf(self): + """Check if leaf node (i.e. no nodes below this have been expanded). + """ + return self._children == {} + + def is_root(self): + return self._parent is None class MCTS(object): - """A simple (and slow) single-threaded implementation of Monte Carlo Tree Search. - - Search works by exploring moves randomly according to the given policy up to a certain - depth, which is relatively small given the search space. "Leaves" at this depth are assigned a - value comprising a weighted combination of (1) the value function evaluated at that leaf, and - (2) the result of finishing the game from that leaf according to the 'rollout' policy. The - probability of revisiting a node changes over the course of the many playouts according to its - estimated value. Ultimately the most visited node is returned as the next action, not the most - valued node. - - The term "playout" refers to a single search from the root, whereas "rollout" refers to the - fast evaluation from leaf nodes to the end of the game. - """ - - def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, rollout_limit=500, playout_depth=20, n_playout=10000): - """Arguments: - value_fn -- a function that takes in a state and ouputs a score in [-1, 1], i.e. the - expected value of the end game score from the current player's perspective. - policy_fn -- a function that takes in a state and outputs a list of (action, probability) - tuples for the current player. - rollout_policy_fn -- a coarse, fast version of policy_fn used in the rollout phase. - lmbda -- controls the relative weight of the value network and fast rollout policy result - in determining the value of a leaf node. lmbda must be in [0, 1], where 0 means use only - the value network and 1 means use only the result from the rollout. - c_puct -- a number in (0, inf) that controls how quickly exploration converges to the maximum- - value policy, where a higher value means relying on the prior more, and should be used only - in conjunction with a large value for n_playout. - """ - self._root = TreeNode(None, 1.0) - self._value = value_fn - self._policy = policy_fn - self._rollout = rollout_policy_fn - self._lmbda = lmbda - self._c_puct = c_puct - self._rollout_limit = rollout_limit - self._L = playout_depth - self._n_playout = n_playout - - def _playout(self, state, leaf_depth): - """Run a single playout from the root to the given depth, getting a value at the leaf and - propagating it back through its parents. State is modified in-place, so a copy must be - provided. - - Arguments: - state -- a copy of the state. - leaf_depth -- after this many moves, leaves are evaluated. - - Returns: - None - """ - node = self._root - for i in range(leaf_depth): - # Only expand node if it has not already been done. Existing nodes already know their - # prior. - if node.is_leaf(): - action_probs = self._policy(state) - # Check for end of game. - if len(action_probs) == 0: - break - node.expand(action_probs) - # Greedily select next move. - action, node = node.select() - state.do_move(action) - - # Evaluate the leaf using a weighted combination of the value network, v, and the game's - # winner, z, according to the rollout policy. If lmbda is equal to 0 or 1, only one of - # these contributes and the other may be skipped. Both v and z are from the perspective - # of the current player (+1 is good, -1 is bad). - v = self._value(state) if self._lmbda < 1 else 0 - z = self._evaluate_rollout(state, self._rollout_limit) if self._lmbda > 0 else 0 - leaf_value = (1 - self._lmbda) * v + self._lmbda * z - - # Update value and visit count of nodes in this traversal. - node.update_recursive(leaf_value, self._c_puct) - - def _evaluate_rollout(self, state, limit): - """Use the rollout policy to play until the end of the game, returning +1 if the current - player wins, -1 if the opponent wins, and 0 if it is a tie. - """ - player = state.get_current_player() - for i in range(limit): - action_probs = self._rollout(state) - if len(action_probs) == 0: - break - max_action = max(action_probs, key=lambda (a, p): p)[0] - state.do_move(max_action) - else: - # If no break from the loop, issue a warning. - print "WARNING: rollout reached move limit" - winner = state.get_winner() - if winner == 0: - return 0 - else: - return 1 if winner == player else -1 - - def get_move(self, state): - """Runs all playouts sequentially and returns the most visited action. - - Arguments: - state -- the current state, including both game state and the current player. - - Returns: - the selected action - """ - for n in range(self._n_playout): - state_copy = state.copy() - self._playout(state_copy, self._L) - - # chosen action is the *most visited child*, not the highest-value one - # (they are the same as self._n_playout gets large). - return max(self._root._children.iteritems(), key=lambda (a, n): n._n_visits)[0] - - def update_with_move(self, last_move): - """Step forward in the tree, keeping everything we already know about the subtree, assuming - that get_move() has been called already. Siblings of the new root will be garbage-collected. - """ - if last_move in self._root._children: - self._root = self._root._children[last_move] - self._root._parent = None - else: - self._root = TreeNode(None, 1.0) + """A simple (and slow) single-threaded implementation of Monte Carlo Tree Search. + + Search works by exploring moves randomly according to the given policy up to a certain + depth, which is relatively small given the search space. "Leaves" at this depth are assigned a + value comprising a weighted combination of (1) the value function evaluated at that leaf, and + (2) the result of finishing the game from that leaf according to the 'rollout' policy. The + probability of revisiting a node changes over the course of the many playouts according to its + estimated value. Ultimately the most visited node is returned as the next action, not the most + valued node. + + The term "playout" refers to a single search from the root, whereas "rollout" refers to the + fast evaluation from leaf nodes to the end of the game. + """ + + def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, rollout_limit=500, playout_depth=20, n_playout=10000): + """Arguments: + value_fn -- a function that takes in a state and ouputs a score in [-1, 1], i.e. the + expected value of the end game score from the current player's perspective. + policy_fn -- a function that takes in a state and outputs a list of (action, probability) + tuples for the current player. + rollout_policy_fn -- a coarse, fast version of policy_fn used in the rollout phase. + lmbda -- controls the relative weight of the value network and fast rollout policy result + in determining the value of a leaf node. lmbda must be in [0, 1], where 0 means use only + the value network and 1 means use only the result from the rollout. + c_puct -- a number in (0, inf) that controls how quickly exploration converges to the maximum- + value policy, where a higher value means relying on the prior more, and should be used only + in conjunction with a large value for n_playout. + """ + self._root = TreeNode(None, 1.0) + self._value = value_fn + self._policy = policy_fn + self._rollout = rollout_policy_fn + self._lmbda = lmbda + self._c_puct = c_puct + self._rollout_limit = rollout_limit + self._L = playout_depth + self._n_playout = n_playout + + def _playout(self, state, leaf_depth): + """Run a single playout from the root to the given depth, getting a value at the leaf and + propagating it back through its parents. State is modified in-place, so a copy must be + provided. + + Arguments: + state -- a copy of the state. + leaf_depth -- after this many moves, leaves are evaluated. + + Returns: + None + """ + node = self._root + for i in range(leaf_depth): + # Only expand node if it has not already been done. Existing nodes already know their + # prior. + if node.is_leaf(): + action_probs = self._policy(state) + # Check for end of game. + if len(action_probs) == 0: + break + node.expand(action_probs) + # Greedily select next move. + action, node = node.select() + state.do_move(action) + + # Evaluate the leaf using a weighted combination of the value network, v, and the game's + # winner, z, according to the rollout policy. If lmbda is equal to 0 or 1, only one of + # these contributes and the other may be skipped. Both v and z are from the perspective + # of the current player (+1 is good, -1 is bad). + v = self._value(state) if self._lmbda < 1 else 0 + z = self._evaluate_rollout(state, self._rollout_limit) if self._lmbda > 0 else 0 + leaf_value = (1 - self._lmbda) * v + self._lmbda * z + + # Update value and visit count of nodes in this traversal. + node.update_recursive(leaf_value, self._c_puct) + + def _evaluate_rollout(self, state, limit): + """Use the rollout policy to play until the end of the game, returning +1 if the current + player wins, -1 if the opponent wins, and 0 if it is a tie. + """ + player = state.get_current_player() + for i in range(limit): + action_probs = self._rollout(state) + if len(action_probs) == 0: + break + max_action = max(action_probs, key=lambda (a, p): p)[0] + state.do_move(max_action) + else: + # If no break from the loop, issue a warning. + print "WARNING: rollout reached move limit" + winner = state.get_winner() + if winner == 0: + return 0 + else: + return 1 if winner == player else -1 + + def get_move(self, state): + """Runs all playouts sequentially and returns the most visited action. + + Arguments: + state -- the current state, including both game state and the current player. + + Returns: + the selected action + """ + for n in range(self._n_playout): + state_copy = state.copy() + self._playout(state_copy, self._L) + + # chosen action is the *most visited child*, not the highest-value one + # (they are the same as self._n_playout gets large). + return max(self._root._children.iteritems(), key=lambda (a, n): n._n_visits)[0] + + def update_with_move(self, last_move): + """Step forward in the tree, keeping everything we already know about the subtree, assuming + that get_move() has been called already. Siblings of the new root will be garbage-collected. + """ + if last_move in self._root._children: + self._root = self._root._children[last_move] + self._root._parent = None + else: + self._root = TreeNode(None, 1.0) class ParallelMCTS(MCTS): - pass + pass diff --git a/AlphaGo/models/nn_util.py b/AlphaGo/models/nn_util.py index 62696acd2..2f29211ae 100644 --- a/AlphaGo/models/nn_util.py +++ b/AlphaGo/models/nn_util.py @@ -6,124 +6,124 @@ class NeuralNetBase(object): - """Base class for neural network classes handling feature processing, construction - of a 'forward' function, etc. - """ - - # keep track of subclasses to make generic saving/loading cleaner. - # subclasses can be 'registered' with the @neuralnet decorator - subclasses = {} - - def __init__(self, feature_list, **kwargs): - """create a neural net object that preprocesses according to feature_list and uses - a neural network specified by keyword arguments (using subclass' create_network()) - - optional argument: init_network (boolean). If set to False, skips initializing - self.model and self.forward and the calling function should set them. - """ - self.preprocessor = Preprocess(feature_list) - kwargs["input_dim"] = self.preprocessor.output_dim - - if kwargs.get('init_network', True): - # self.__class__ refers to the subclass so that subclasses only - # need to override create_network() - self.model = self.__class__.create_network(**kwargs) - # self.forward is a lambda function wrapping a Keras function - self.forward = self._model_forward() - - def _model_forward(self): - """Construct a function using the current keras backend that, when given a batch - of inputs, simply processes them forward and returns the output - - This is as opposed to model.compile(), which takes a loss function - and training method. - - c.f. https://github.com/fchollet/keras/issues/1426 - """ - # The uses_learning_phase property is True if the model contains layers that behave - # differently during training and testing, e.g. Dropout or BatchNormalization. - # In these cases, K.learning_phase() is a reference to a backend variable that should - # be set to 0 when using the network in prediction mode and is automatically set to 1 - # during training. - if self.model.uses_learning_phase: - forward_function = K.function([self.model.input, K.learning_phase()], [self.model.output]) - - # the forward_function returns a list of tensors - # the first [0] gets the front tensor. - return lambda inpt: forward_function([inpt, 0])[0] - else: - # identical but without a second input argument for the learning phase - forward_function = K.function([self.model.input], [self.model.output]) - return lambda inpt: forward_function([inpt])[0] - - @staticmethod - def load_model(json_file): - """create a new neural net object from the architecture specified in json_file - """ - with open(json_file, 'r') as f: - object_specs = json.load(f) - - # Create object; may be a subclass of networks saved in specs['class'] - class_name = object_specs.get('class', 'CNNPolicy') - try: - network_class = NeuralNetBase.subclasses[class_name] - except KeyError: - raise ValueError("Unknown neural network type in json file: {}\n(was it registered with the @neuralnet decorator?)".format(class_name)) - - # create new object - new_net = network_class(object_specs['feature_list'], init_network=False) - - new_net.model = model_from_json(object_specs['keras_model'], custom_objects={'Bias': Bias}) - if 'weights_file' in object_specs: - new_net.model.load_weights(object_specs['weights_file']) - new_net.forward = new_net._model_forward() - return new_net - - def save_model(self, json_file, weights_file=None): - """write the network model and preprocessing features to the specified file - - If a weights_file (.hdf5 extension) is also specified, model weights are also - saved to that file and will be reloaded automatically in a call to load_model - """ - # this looks odd because we are serializing a model with json as a string - # then making that the value of an object which is then serialized as - # json again. - # It's not as crazy as it looks. A Network has 2 moving parts - the - # feature preprocessing and the neural net, each of which gets a top-level - # entry in the saved file. Keras just happens to serialize models with JSON - # as well. Note how this format makes load_model fairly clean as well. - object_specs = { - 'class': self.__class__.__name__, - 'keras_model': self.model.to_json(), - 'feature_list': self.preprocessor.feature_list - } - if weights_file is not None: - self.model.save_weights(weights_file) - object_specs['weights_file'] = weights_file - # use the json module to write object_specs to file - with open(json_file, 'w') as f: - json.dump(object_specs, f) + """Base class for neural network classes handling feature processing, construction + of a 'forward' function, etc. + """ + + # keep track of subclasses to make generic saving/loading cleaner. + # subclasses can be 'registered' with the @neuralnet decorator + subclasses = {} + + def __init__(self, feature_list, **kwargs): + """create a neural net object that preprocesses according to feature_list and uses + a neural network specified by keyword arguments (using subclass' create_network()) + + optional argument: init_network (boolean). If set to False, skips initializing + self.model and self.forward and the calling function should set them. + """ + self.preprocessor = Preprocess(feature_list) + kwargs["input_dim"] = self.preprocessor.output_dim + + if kwargs.get('init_network', True): + # self.__class__ refers to the subclass so that subclasses only + # need to override create_network() + self.model = self.__class__.create_network(**kwargs) + # self.forward is a lambda function wrapping a Keras function + self.forward = self._model_forward() + + def _model_forward(self): + """Construct a function using the current keras backend that, when given a batch + of inputs, simply processes them forward and returns the output + + This is as opposed to model.compile(), which takes a loss function + and training method. + + c.f. https://github.com/fchollet/keras/issues/1426 + """ + # The uses_learning_phase property is True if the model contains layers that behave + # differently during training and testing, e.g. Dropout or BatchNormalization. + # In these cases, K.learning_phase() is a reference to a backend variable that should + # be set to 0 when using the network in prediction mode and is automatically set to 1 + # during training. + if self.model.uses_learning_phase: + forward_function = K.function([self.model.input, K.learning_phase()], [self.model.output]) + + # the forward_function returns a list of tensors + # the first [0] gets the front tensor. + return lambda inpt: forward_function([inpt, 0])[0] + else: + # identical but without a second input argument for the learning phase + forward_function = K.function([self.model.input], [self.model.output]) + return lambda inpt: forward_function([inpt])[0] + + @staticmethod + def load_model(json_file): + """create a new neural net object from the architecture specified in json_file + """ + with open(json_file, 'r') as f: + object_specs = json.load(f) + + # Create object; may be a subclass of networks saved in specs['class'] + class_name = object_specs.get('class', 'CNNPolicy') + try: + network_class = NeuralNetBase.subclasses[class_name] + except KeyError: + raise ValueError("Unknown neural network type in json file: {}\n(was it registered with the @neuralnet decorator?)".format(class_name)) + + # create new object + new_net = network_class(object_specs['feature_list'], init_network=False) + + new_net.model = model_from_json(object_specs['keras_model'], custom_objects={'Bias': Bias}) + if 'weights_file' in object_specs: + new_net.model.load_weights(object_specs['weights_file']) + new_net.forward = new_net._model_forward() + return new_net + + def save_model(self, json_file, weights_file=None): + """write the network model and preprocessing features to the specified file + + If a weights_file (.hdf5 extension) is also specified, model weights are also + saved to that file and will be reloaded automatically in a call to load_model + """ + # this looks odd because we are serializing a model with json as a string + # then making that the value of an object which is then serialized as + # json again. + # It's not as crazy as it looks. A Network has 2 moving parts - the + # feature preprocessing and the neural net, each of which gets a top-level + # entry in the saved file. Keras just happens to serialize models with JSON + # as well. Note how this format makes load_model fairly clean as well. + object_specs = { + 'class': self.__class__.__name__, + 'keras_model': self.model.to_json(), + 'feature_list': self.preprocessor.feature_list + } + if weights_file is not None: + self.model.save_weights(weights_file) + object_specs['weights_file'] = weights_file + # use the json module to write object_specs to file + with open(json_file, 'w') as f: + json.dump(object_specs, f) def neuralnet(cls): - """Class decorator for registering subclasses of NeuralNetBase - """ - NeuralNetBase.subclasses[cls.__name__] = cls - return cls + """Class decorator for registering subclasses of NeuralNetBase + """ + NeuralNetBase.subclasses[cls.__name__] = cls + return cls class Bias(Layer): - """Custom keras layer that simply adds a scalar bias to each location in the input + """Custom keras layer that simply adds a scalar bias to each location in the input - Largely copied from the keras docs: - http://keras.io/layers/writing-your-own-keras-layers/#writing-your-own-keras-layers - """ - def __init__(self, **kwargs): - super(Bias, self).__init__(**kwargs) + Largely copied from the keras docs: + http://keras.io/layers/writing-your-own-keras-layers/#writing-your-own-keras-layers + """ + def __init__(self, **kwargs): + super(Bias, self).__init__(**kwargs) - def build(self, input_shape): - self.W = K.zeros(input_shape[1:]) - self.trainable_weights = [self.W] + def build(self, input_shape): + self.W = K.zeros(input_shape[1:]) + self.trainable_weights = [self.W] - def call(self, x, mask=None): - return x + self.W + def call(self, x, mask=None): + return x + self.W diff --git a/AlphaGo/models/policy.py b/AlphaGo/models/policy.py index bf65200be..c16058cc2 100644 --- a/AlphaGo/models/policy.py +++ b/AlphaGo/models/policy.py @@ -8,251 +8,251 @@ @neuralnet class CNNPolicy(NeuralNetBase): - """uses a convolutional neural network to evaluate the state of the game - and compute a probability distribution over the next action - """ - - def _select_moves_and_normalize(self, nn_output, moves, size): - """helper function to normalize a distribution over the given list of moves - and return a list of (move, prob) tuples - """ - if len(moves) == 0: - return [] - move_indices = [flatten_idx(m, size) for m in moves] - # get network activations at legal move locations - distribution = nn_output[move_indices] - distribution = distribution / distribution.sum() - return zip(moves, distribution) - - def batch_eval_state(self, states, moves_lists=None): - """Given a list of states, evaluates them all at once to make best use of GPU - batching capabilities. - - Analogous to [eval_state(s) for s in states] - - Returns: a parallel list of move distributions as in eval_state - """ - n_states = len(states) - if n_states == 0: - return [] - state_size = states[0].size - if not all([st.size == state_size for st in states]): - raise ValueError("all states must have the same size") - # concatenate together all one-hot encoded states along the 'batch' dimension - nn_input = np.concatenate([self.preprocessor.state_to_tensor(s) for s in states], axis=0) - # pass all input through the network at once (backend makes use of batches if len(states) is large) - network_output = self.forward(nn_input) - # default move lists to all legal moves - moves_lists = moves_lists or [st.get_legal_moves() for st in states] - results = [None] * n_states - for i in range(n_states): - results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], state_size) - return results - - def eval_state(self, state, moves=None): - """Given a GameState object, returns a list of (action, probability) pairs - according to the network outputs - - If a list of moves is specified, only those moves are kept in the distribution - """ - tensor = self.preprocessor.state_to_tensor(state) - # run the tensor through the network - network_output = self.forward(tensor) - moves = moves or state.get_legal_moves() - return self._select_moves_and_normalize(network_output[0], moves, state.size) - - @staticmethod - def create_network(**kwargs): - """construct a convolutional neural network. - - Keword Arguments: - - input_dim: depth of features to be processed by first layer (no default) - - board: width of the go board to be processed (default 19) - - filters_per_layer: number of filters used on every layer (default 128) - - layers: number of convolutional steps (default 12) - - filter_width_K: (where K is between 1 and ) width of filter on - layer K (default 3 except 1st layer which defaults to 5). - Must be odd. - """ - defaults = { - "board": 19, - "filters_per_layer": 128, - "layers": 12, - "filter_width_1": 5 - } - # copy defaults, but override with anything in kwargs - params = defaults - params.update(kwargs) - - # create the network: - # a series of zero-paddings followed by convolutions - # such that the output dimensions are also board x board - network = Sequential() - - # create first layer - network.add(convolutional.Convolution2D( - input_shape=(params["input_dim"], params["board"], params["board"]), - nb_filter=params["filters_per_layer"], - nb_row=params["filter_width_1"], - nb_col=params["filter_width_1"], - init='uniform', - activation='relu', - border_mode='same')) - - # create all other layers - for i in range(2, params["layers"] + 1): - # use filter_width_K if it is there, otherwise use 3 - filter_key = "filter_width_%d" % i - filter_width = params.get(filter_key, 3) - network.add(convolutional.Convolution2D( - nb_filter=params["filters_per_layer"], - nb_row=filter_width, - nb_col=filter_width, - init='uniform', - activation='relu', - border_mode='same')) - - # the last layer maps each feature to a number - network.add(convolutional.Convolution2D( - nb_filter=1, - nb_row=1, - nb_col=1, - init='uniform', - border_mode='same')) - # reshape output to be board x board - network.add(Flatten()) - # add a bias to each board location - network.add(Bias()) - # softmax makes it into a probability distribution - network.add(Activation('softmax')) - - return network + """uses a convolutional neural network to evaluate the state of the game + and compute a probability distribution over the next action + """ + + def _select_moves_and_normalize(self, nn_output, moves, size): + """helper function to normalize a distribution over the given list of moves + and return a list of (move, prob) tuples + """ + if len(moves) == 0: + return [] + move_indices = [flatten_idx(m, size) for m in moves] + # get network activations at legal move locations + distribution = nn_output[move_indices] + distribution = distribution / distribution.sum() + return zip(moves, distribution) + + def batch_eval_state(self, states, moves_lists=None): + """Given a list of states, evaluates them all at once to make best use of GPU + batching capabilities. + + Analogous to [eval_state(s) for s in states] + + Returns: a parallel list of move distributions as in eval_state + """ + n_states = len(states) + if n_states == 0: + return [] + state_size = states[0].size + if not all([st.size == state_size for st in states]): + raise ValueError("all states must have the same size") + # concatenate together all one-hot encoded states along the 'batch' dimension + nn_input = np.concatenate([self.preprocessor.state_to_tensor(s) for s in states], axis=0) + # pass all input through the network at once (backend makes use of batches if len(states) is large) + network_output = self.forward(nn_input) + # default move lists to all legal moves + moves_lists = moves_lists or [st.get_legal_moves() for st in states] + results = [None] * n_states + for i in range(n_states): + results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], state_size) + return results + + def eval_state(self, state, moves=None): + """Given a GameState object, returns a list of (action, probability) pairs + according to the network outputs + + If a list of moves is specified, only those moves are kept in the distribution + """ + tensor = self.preprocessor.state_to_tensor(state) + # run the tensor through the network + network_output = self.forward(tensor) + moves = moves or state.get_legal_moves() + return self._select_moves_and_normalize(network_output[0], moves, state.size) + + @staticmethod + def create_network(**kwargs): + """construct a convolutional neural network. + + Keword Arguments: + - input_dim: depth of features to be processed by first layer (no default) + - board: width of the go board to be processed (default 19) + - filters_per_layer: number of filters used on every layer (default 128) + - layers: number of convolutional steps (default 12) + - filter_width_K: (where K is between 1 and ) width of filter on + layer K (default 3 except 1st layer which defaults to 5). + Must be odd. + """ + defaults = { + "board": 19, + "filters_per_layer": 128, + "layers": 12, + "filter_width_1": 5 + } + # copy defaults, but override with anything in kwargs + params = defaults + params.update(kwargs) + + # create the network: + # a series of zero-paddings followed by convolutions + # such that the output dimensions are also board x board + network = Sequential() + + # create first layer + network.add(convolutional.Convolution2D( + input_shape=(params["input_dim"], params["board"], params["board"]), + nb_filter=params["filters_per_layer"], + nb_row=params["filter_width_1"], + nb_col=params["filter_width_1"], + init='uniform', + activation='relu', + border_mode='same')) + + # create all other layers + for i in range(2, params["layers"] + 1): + # use filter_width_K if it is there, otherwise use 3 + filter_key = "filter_width_%d" % i + filter_width = params.get(filter_key, 3) + network.add(convolutional.Convolution2D( + nb_filter=params["filters_per_layer"], + nb_row=filter_width, + nb_col=filter_width, + init='uniform', + activation='relu', + border_mode='same')) + + # the last layer maps each feature to a number + network.add(convolutional.Convolution2D( + nb_filter=1, + nb_row=1, + nb_col=1, + init='uniform', + border_mode='same')) + # reshape output to be board x board + network.add(Flatten()) + # add a bias to each board location + network.add(Bias()) + # softmax makes it into a probability distribution + network.add(Activation('softmax')) + + return network @neuralnet class ResnetPolicy(CNNPolicy): - """Residual network architecture as per He at al. 2015 - """ - @staticmethod - def create_network(**kwargs): - """construct a convolutional neural network with Resnet-style skip connections. - Arguments are the same as with the default CNNPolicy network, except the default - number of layers is 20 plus a new n_skip parameter - - Keword Arguments: - - input_dim: depth of features to be processed by first layer (no default) - - board: width of the go board to be processed (default 19) - - filters_per_layer: number of filters used on every layer (default 128) - - layers: number of convolutional steps (default 20) - - filter_width_K: (where K is between 1 and ) width of filter on - layer K (default 3 except 1st layer which defaults to 5). - Must be odd. - - n_skip_K: (where K is as in filter_width_K) number of convolutional - layers to skip with the linear path starting at K. Only valid - at K >= 1. (Each layer defaults to 1) - - Note that n_skip_1=s means that the next valid value of n_skip_* is 3 - - A diagram may help explain (numbers indicate layer): - - 1 2 3 4 5 6 - I--C -- B -- R -- C -- B -- R -- C -- M -- B -- R -- C -- B -- R -- C -- B -- R -- C -- M ... M -- R -- F -- O - \___________________________/ \____________________________________________________/ \ ... / - [n_skip_1 = 2] [n_skip_3 = 3] - - I - input - B - BatchNormalization - R - ReLU - C - Conv2D - F - Flatten - O - output - M - merge - - The input is always passed through a Conv2D layer, the output of which layer is counted as '1'. - Each subsequent [R -- C] block is counted as one 'layer'. The 'merge' layer isn't counted; hence - if n_skip_1 is 2, the next valid skip parameter is n_skip_3, which will start at the output - of the merge - """ - defaults = { - "board": 19, - "filters_per_layer": 128, - "layers": 20, - "filter_width_1": 5 - } - # copy defaults, but override with anything in kwargs - params = defaults - params.update(kwargs) - - # create the network using Keras' functional API, - # since this isn't 'Sequential' - model_input = Input(shape=(params["input_dim"], params["board"], params["board"])) - - # create first layer - convolution_path = convolutional.Convolution2D( - input_shape=(), - nb_filter=params["filters_per_layer"], - nb_row=params["filter_width_1"], - nb_col=params["filter_width_1"], - init='uniform', - activation='linear', # relu activations done inside resnet modules - border_mode='same')(model_input) - - def add_resnet_unit(path, K, **params): - """Add a resnet unit to path starting at layer 'K', - adding as many (ReLU + Conv2D) modules as specified by n_skip_K - - Returns new path and next layer index, i.e. K + n_skip_K, in a tuple - """ - # loosely based on https://github.com/keunwoochoi/residual_block_keras - # (see also keras docs here: http://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers) - - block_input = path - # use n_skip_K if it is there, default to 1 - skip_key = "n_skip_%d" % K - n_skip = params.get(skip_key, 1) - for i in range(n_skip): - layer = K + i - # add BatchNorm - path = BatchNormalization()(path) - # add ReLU - path = Activation('relu')(path) - # use filter_width_K if it is there, otherwise use 3 - filter_key = "filter_width_%d" % layer - filter_width = params.get(filter_key, 3) - # add Conv2D - path = convolutional.Convolution2D( - nb_filter=params["filters_per_layer"], - nb_row=filter_width, - nb_col=filter_width, - init='uniform', - activation='linear', - border_mode='same')(path) - # Merge 'input layer' with the path - path = merge([block_input, path], mode='sum') - return path, K + n_skip - - # create all other layers - layer = 1 - while layer < params['layers']: - convolution_path, layer = add_resnet_unit(convolution_path, layer, **params) - if layer > params['layers']: - print "Due to skipping, ended with {} layers instead of {}".format(layer, params['layers']) - - # since each layer's activation was linear, need one more ReLu - convolution_path = Activation('relu')(convolution_path) - - # the last layer maps each featuer to a number - convolution_path = convolutional.Convolution2D( - nb_filter=1, - nb_row=1, - nb_col=1, - init='uniform', - border_mode='same')(convolution_path) - # flatten output - network_output = Flatten()(convolution_path) - # add a bias to each board location - network_output = Bias()(network_output) - # softmax makes it into a probability distribution - network_output = Activation('softmax')(network_output) - - return Model(input=[model_input], output=[network_output]) + """Residual network architecture as per He at al. 2015 + """ + @staticmethod + def create_network(**kwargs): + """construct a convolutional neural network with Resnet-style skip connections. + Arguments are the same as with the default CNNPolicy network, except the default + number of layers is 20 plus a new n_skip parameter + + Keword Arguments: + - input_dim: depth of features to be processed by first layer (no default) + - board: width of the go board to be processed (default 19) + - filters_per_layer: number of filters used on every layer (default 128) + - layers: number of convolutional steps (default 20) + - filter_width_K: (where K is between 1 and ) width of filter on + layer K (default 3 except 1st layer which defaults to 5). + Must be odd. + - n_skip_K: (where K is as in filter_width_K) number of convolutional + layers to skip with the linear path starting at K. Only valid + at K >= 1. (Each layer defaults to 1) + + Note that n_skip_1=s means that the next valid value of n_skip_* is 3 + + A diagram may help explain (numbers indicate layer): + + 1 2 3 4 5 6 + I--C -- B -- R -- C -- B -- R -- C -- M -- B -- R -- C -- B -- R -- C -- B -- R -- C -- M ... M -- R -- F -- O + \___________________________/ \____________________________________________________/ \ ... / + [n_skip_1 = 2] [n_skip_3 = 3] + + I - input + B - BatchNormalization + R - ReLU + C - Conv2D + F - Flatten + O - output + M - merge + + The input is always passed through a Conv2D layer, the output of which layer is counted as '1'. + Each subsequent [R -- C] block is counted as one 'layer'. The 'merge' layer isn't counted; hence + if n_skip_1 is 2, the next valid skip parameter is n_skip_3, which will start at the output + of the merge + """ + defaults = { + "board": 19, + "filters_per_layer": 128, + "layers": 20, + "filter_width_1": 5 + } + # copy defaults, but override with anything in kwargs + params = defaults + params.update(kwargs) + + # create the network using Keras' functional API, + # since this isn't 'Sequential' + model_input = Input(shape=(params["input_dim"], params["board"], params["board"])) + + # create first layer + convolution_path = convolutional.Convolution2D( + input_shape=(), + nb_filter=params["filters_per_layer"], + nb_row=params["filter_width_1"], + nb_col=params["filter_width_1"], + init='uniform', + activation='linear', # relu activations done inside resnet modules + border_mode='same')(model_input) + + def add_resnet_unit(path, K, **params): + """Add a resnet unit to path starting at layer 'K', + adding as many (ReLU + Conv2D) modules as specified by n_skip_K + + Returns new path and next layer index, i.e. K + n_skip_K, in a tuple + """ + # loosely based on https://github.com/keunwoochoi/residual_block_keras + # (see also keras docs here: http://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers) + + block_input = path + # use n_skip_K if it is there, default to 1 + skip_key = "n_skip_%d" % K + n_skip = params.get(skip_key, 1) + for i in range(n_skip): + layer = K + i + # add BatchNorm + path = BatchNormalization()(path) + # add ReLU + path = Activation('relu')(path) + # use filter_width_K if it is there, otherwise use 3 + filter_key = "filter_width_%d" % layer + filter_width = params.get(filter_key, 3) + # add Conv2D + path = convolutional.Convolution2D( + nb_filter=params["filters_per_layer"], + nb_row=filter_width, + nb_col=filter_width, + init='uniform', + activation='linear', + border_mode='same')(path) + # Merge 'input layer' with the path + path = merge([block_input, path], mode='sum') + return path, K + n_skip + + # create all other layers + layer = 1 + while layer < params['layers']: + convolution_path, layer = add_resnet_unit(convolution_path, layer, **params) + if layer > params['layers']: + print "Due to skipping, ended with {} layers instead of {}".format(layer, params['layers']) + + # since each layer's activation was linear, need one more ReLu + convolution_path = Activation('relu')(convolution_path) + + # the last layer maps each featuer to a number + convolution_path = convolutional.Convolution2D( + nb_filter=1, + nb_row=1, + nb_col=1, + init='uniform', + border_mode='same')(convolution_path) + # flatten output + network_output = Flatten()(convolution_path) + # add a bias to each board location + network_output = Bias()(network_output) + # softmax makes it into a probability distribution + network_output = Activation('softmax')(network_output) + + return Model(input=[model_input], output=[network_output]) diff --git a/AlphaGo/preprocessing/game_converter.py b/AlphaGo/preprocessing/game_converter.py index de606aabd..6324d254b 100644 --- a/AlphaGo/preprocessing/game_converter.py +++ b/AlphaGo/preprocessing/game_converter.py @@ -10,209 +10,209 @@ class SizeMismatchError(Exception): - pass + pass class game_converter: - def __init__(self, features): - self.feature_processor = Preprocess(features) - self.n_features = self.feature_processor.output_dim - - def convert_game(self, file_name, bd_size): - """Read the given SGF file into an iterable of (input,output) pairs - for neural network training - - Each input is a GameState converted into one-hot neural net features - Each output is an action as an (x,y) pair (passes are skipped) - - If this game's size does not match bd_size, a SizeMismatchError is raised - """ - - with open(file_name, 'r') as file_object: - state_action_iterator = sgf_iter_states(file_object.read(), include_end=False) - - for (state, move, player) in state_action_iterator: - if state.size != bd_size: - raise SizeMismatchError() - if move != go.PASS_MOVE: - nn_input = self.feature_processor.state_to_tensor(state) - yield (nn_input, move) - - def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False): - """Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file - - Arguments: - - sgf_files : an iterable of relative or absolute paths to SGF files - - hdf5_file : the name of the HDF5 where features will be saved - - bd_size : side length of board of games that are loaded - - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note - that sgf.ParseException and go.IllegalMove exceptions are always skipped - - The resulting file has the following properties: - states : dataset with shape (n_data, n_features, board width, board height) - actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played) - file_offsets : group mapping from filenames to tuples of (index, length) - - For example, to find what positions in the dataset come from 'test.sgf': - index, length = file_offsets['test.sgf'] - test_states = states[index:index+length] - test_actions = actions[index:index+length] - """ - # TODO - also save feature list - - # make a hidden temporary file in case of a crash. - # on success, this is renamed to hdf5_file - tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) - h5f = h5.File(tmp_file, 'w') - - try: - # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset - states = h5f.require_dataset( - 'states', - dtype=np.uint8, - shape=(1, self.n_features, bd_size, bd_size), - maxshape=(None, self.n_features, bd_size, bd_size), # 'None' dimension allows it to grow arbitrarily - exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 - chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks - compression="lzf") - actions = h5f.require_dataset( - 'actions', - dtype=np.uint8, - shape=(1, 2), - maxshape=(None, 2), - exact=False, - chunks=(1024, 2), - compression="lzf") - # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast - file_offsets = h5f.require_group('file_offsets') - - if verbose: - print("created HDF5 dataset in {}".format(tmp_file)) - - next_idx = 0 - for file_name in sgf_files: - if verbose: - print(file_name) - # count number of state/action pairs yielded by this game - n_pairs = 0 - file_start_idx = next_idx - try: - for state, move in self.convert_game(file_name, bd_size): - if next_idx >= len(states): - states.resize((next_idx + 1, self.n_features, bd_size, bd_size)) - actions.resize((next_idx + 1, 2)) - states[next_idx] = state - actions[next_idx] = move - n_pairs += 1 - next_idx += 1 - except go.IllegalMove: - warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name) - except sgf.ParseException: - warnings.warn("Could not parse %s\n\tdropping game" % file_name) - except SizeMismatchError: - warnings.warn("Skipping %s; wrong board size" % file_name) - except Exception as e: - # catch everything else - if ignore_errors: - warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2) - else: - raise e - finally: - if n_pairs > 0: - # '/' has special meaning in HDF5 key names, so they are replaced with ':' here - file_name_key = file_name.replace('/', ':') - file_offsets[file_name_key] = [file_start_idx, n_pairs] - if verbose: - print("\t%d state/action pairs extracted" % n_pairs) - elif verbose: - print("\t-no usable data-") - except Exception as e: - print("sgfs_to_hdf5 failed") - os.remove(tmp_file) - raise e - - if verbose: - print("finished. renaming %s to %s" % (tmp_file, hdf5_file)) - - # processing complete; rename tmp_file to hdf5_file - h5f.close() - os.rename(tmp_file, hdf5_file) + def __init__(self, features): + self.feature_processor = Preprocess(features) + self.n_features = self.feature_processor.output_dim + + def convert_game(self, file_name, bd_size): + """Read the given SGF file into an iterable of (input,output) pairs + for neural network training + + Each input is a GameState converted into one-hot neural net features + Each output is an action as an (x,y) pair (passes are skipped) + + If this game's size does not match bd_size, a SizeMismatchError is raised + """ + + with open(file_name, 'r') as file_object: + state_action_iterator = sgf_iter_states(file_object.read(), include_end=False) + + for (state, move, player) in state_action_iterator: + if state.size != bd_size: + raise SizeMismatchError() + if move != go.PASS_MOVE: + nn_input = self.feature_processor.state_to_tensor(state) + yield (nn_input, move) + + def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False): + """Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file + + Arguments: + - sgf_files : an iterable of relative or absolute paths to SGF files + - hdf5_file : the name of the HDF5 where features will be saved + - bd_size : side length of board of games that are loaded + - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note + that sgf.ParseException and go.IllegalMove exceptions are always skipped + + The resulting file has the following properties: + states : dataset with shape (n_data, n_features, board width, board height) + actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played) + file_offsets : group mapping from filenames to tuples of (index, length) + + For example, to find what positions in the dataset come from 'test.sgf': + index, length = file_offsets['test.sgf'] + test_states = states[index:index+length] + test_actions = actions[index:index+length] + """ + # TODO - also save feature list + + # make a hidden temporary file in case of a crash. + # on success, this is renamed to hdf5_file + tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) + h5f = h5.File(tmp_file, 'w') + + try: + # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset + states = h5f.require_dataset( + 'states', + dtype=np.uint8, + shape=(1, self.n_features, bd_size, bd_size), + maxshape=(None, self.n_features, bd_size, bd_size), # 'None' dimension allows it to grow arbitrarily + exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 + chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks + compression="lzf") + actions = h5f.require_dataset( + 'actions', + dtype=np.uint8, + shape=(1, 2), + maxshape=(None, 2), + exact=False, + chunks=(1024, 2), + compression="lzf") + # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast + file_offsets = h5f.require_group('file_offsets') + + if verbose: + print("created HDF5 dataset in {}".format(tmp_file)) + + next_idx = 0 + for file_name in sgf_files: + if verbose: + print(file_name) + # count number of state/action pairs yielded by this game + n_pairs = 0 + file_start_idx = next_idx + try: + for state, move in self.convert_game(file_name, bd_size): + if next_idx >= len(states): + states.resize((next_idx + 1, self.n_features, bd_size, bd_size)) + actions.resize((next_idx + 1, 2)) + states[next_idx] = state + actions[next_idx] = move + n_pairs += 1 + next_idx += 1 + except go.IllegalMove: + warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name) + except sgf.ParseException: + warnings.warn("Could not parse %s\n\tdropping game" % file_name) + except SizeMismatchError: + warnings.warn("Skipping %s; wrong board size" % file_name) + except Exception as e: + # catch everything else + if ignore_errors: + warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2) + else: + raise e + finally: + if n_pairs > 0: + # '/' has special meaning in HDF5 key names, so they are replaced with ':' here + file_name_key = file_name.replace('/', ':') + file_offsets[file_name_key] = [file_start_idx, n_pairs] + if verbose: + print("\t%d state/action pairs extracted" % n_pairs) + elif verbose: + print("\t-no usable data-") + except Exception as e: + print("sgfs_to_hdf5 failed") + os.remove(tmp_file) + raise e + + if verbose: + print("finished. renaming %s to %s" % (tmp_file, hdf5_file)) + + # processing complete; rename tmp_file to hdf5_file + h5f.close() + os.rename(tmp_file, hdf5_file) def run_game_converter(cmd_line_args=None): - """Run conversions. command-line args may be passed in as a list - """ - import argparse - import sys - - parser = argparse.ArgumentParser( - description='Prepare SGF Go game files for training the neural network model.', - epilog="Available features are: board, ones, turns_since, liberties,\ - capture_size, self_atari_size, liberties_after, sensibleness, and zeros.\ - Ladder features are not currently implemented") - parser.add_argument("--features", "-f", help="Comma-separated list of features to compute and store or 'all'", default='all') - parser.add_argument("--outfile", "-o", help="Destination to write data (hdf5 file)", required=True) - parser.add_argument("--recurse", "-R", help="Set to recurse through directories searching for SGF files", default=False, action="store_true") - parser.add_argument("--directory", "-d", help="Directory containing SGF files to process. if not present, expects files from stdin", default=None) - parser.add_argument("--size", "-s", help="Size of the game board. SGFs not matching this are discarded with a warning", type=int, default=19) - parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") - - if cmd_line_args is None: - args = parser.parse_args() - else: - args = parser.parse_args(cmd_line_args) - - if args.features.lower() == 'all': - feature_list = [ - "board", - "ones", - "turns_since", - "liberties", - "capture_size", - "self_atari_size", - "liberties_after", - # "ladder_capture", - # "ladder_escape", - "sensibleness", - "zeros"] - else: - feature_list = args.features.split(",") - - if args.verbose: - print("using features", feature_list) - - converter = game_converter(feature_list) - - def _is_sgf(fname): - return fname.strip()[-4:] == ".sgf" - - def _walk_all_sgfs(root): - """a helper function/generator to get all SGF files in subdirectories of root - """ - for (dirpath, dirname, files) in os.walk(root): - for filename in files: - if _is_sgf(filename): - # yield the full (relative) path to the file - yield os.path.join(dirpath, filename) - - def _list_sgfs(path): - """helper function to get all SGF files in a directory (does not recurse) - """ - files = os.listdir(path) - return (os.path.join(path, f) for f in files if _is_sgf(f)) - - # get an iterator of SGF files according to command line args - if args.directory: - if args.recurse: - files = _walk_all_sgfs(args.directory) - else: - files = _list_sgfs(args.directory) - else: - files = (f.strip() for f in sys.stdin if _is_sgf(f)) - - converter.sgfs_to_hdf5(files, args.outfile, bd_size=args.size, verbose=args.verbose) + """Run conversions. command-line args may be passed in as a list + """ + import argparse + import sys + + parser = argparse.ArgumentParser( + description='Prepare SGF Go game files for training the neural network model.', + epilog="Available features are: board, ones, turns_since, liberties,\ + capture_size, self_atari_size, liberties_after, sensibleness, and zeros.\ + Ladder features are not currently implemented") + parser.add_argument("--features", "-f", help="Comma-separated list of features to compute and store or 'all'", default='all') + parser.add_argument("--outfile", "-o", help="Destination to write data (hdf5 file)", required=True) + parser.add_argument("--recurse", "-R", help="Set to recurse through directories searching for SGF files", default=False, action="store_true") + parser.add_argument("--directory", "-d", help="Directory containing SGF files to process. if not present, expects files from stdin", default=None) + parser.add_argument("--size", "-s", help="Size of the game board. SGFs not matching this are discarded with a warning", type=int, default=19) + parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") + + if cmd_line_args is None: + args = parser.parse_args() + else: + args = parser.parse_args(cmd_line_args) + + if args.features.lower() == 'all': + feature_list = [ + "board", + "ones", + "turns_since", + "liberties", + "capture_size", + "self_atari_size", + "liberties_after", + # "ladder_capture", + # "ladder_escape", + "sensibleness", + "zeros"] + else: + feature_list = args.features.split(",") + + if args.verbose: + print("using features", feature_list) + + converter = game_converter(feature_list) + + def _is_sgf(fname): + return fname.strip()[-4:] == ".sgf" + + def _walk_all_sgfs(root): + """a helper function/generator to get all SGF files in subdirectories of root + """ + for (dirpath, dirname, files) in os.walk(root): + for filename in files: + if _is_sgf(filename): + # yield the full (relative) path to the file + yield os.path.join(dirpath, filename) + + def _list_sgfs(path): + """helper function to get all SGF files in a directory (does not recurse) + """ + files = os.listdir(path) + return (os.path.join(path, f) for f in files if _is_sgf(f)) + + # get an iterator of SGF files according to command line args + if args.directory: + if args.recurse: + files = _walk_all_sgfs(args.directory) + else: + files = _list_sgfs(args.directory) + else: + files = (f.strip() for f in sys.stdin if _is_sgf(f)) + + converter.sgfs_to_hdf5(files, args.outfile, bd_size=args.size, verbose=args.verbose) if __name__ == '__main__': - run_game_converter() + run_game_converter() diff --git a/AlphaGo/preprocessing/preprocessing.py b/AlphaGo/preprocessing/preprocessing.py index 60732e3f5..12f9269ea 100644 --- a/AlphaGo/preprocessing/preprocessing.py +++ b/AlphaGo/preprocessing/preprocessing.py @@ -7,267 +7,267 @@ def get_board(state): - """A feature encoding WHITE BLACK and EMPTY on separate planes, but plane 0 - always refers to the current player and plane 1 to the opponent - """ - planes = np.zeros((3, state.size, state.size)) - planes[0, :, :] = state.board == state.current_player # own stone - planes[1, :, :] = state.board == -state.current_player # opponent stone - planes[2, :, :] = state.board == go.EMPTY # empty space - return planes + """A feature encoding WHITE BLACK and EMPTY on separate planes, but plane 0 + always refers to the current player and plane 1 to the opponent + """ + planes = np.zeros((3, state.size, state.size)) + planes[0, :, :] = state.board == state.current_player # own stone + planes[1, :, :] = state.board == -state.current_player # opponent stone + planes[2, :, :] = state.board == go.EMPTY # empty space + return planes def get_turns_since(state, maximum=8): - """A feature encoding the age of the stone at each location up to 'maximum' + """A feature encoding the age of the stone at each location up to 'maximum' - Note: - - the [maximum-1] plane is used for any stone with age greater than or equal to maximum - - EMPTY locations are all-zero features - """ - planes = np.zeros((maximum, state.size, state.size)) - for x in range(state.size): - for y in range(state.size): - if state.stone_ages[x][y] >= 0: - planes[min(state.stone_ages[x][y], maximum - 1), x, y] = 1 - return planes + Note: + - the [maximum-1] plane is used for any stone with age greater than or equal to maximum + - EMPTY locations are all-zero features + """ + planes = np.zeros((maximum, state.size, state.size)) + for x in range(state.size): + for y in range(state.size): + if state.stone_ages[x][y] >= 0: + planes[min(state.stone_ages[x][y], maximum - 1), x, y] = 1 + return planes def get_liberties(state, maximum=8): - """A feature encoding the number of liberties of the group connected to the stone at - each location - - Note: - - there is no zero-liberties plane; the 0th plane indicates groups in atari - - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum - - EMPTY locations are all-zero features - """ - planes = np.zeros((maximum, state.size, state.size)) - for i in range(maximum): - # single liberties in plane zero (groups won't have zero), double liberties in plane one, etc - planes[i, state.liberty_counts == i + 1] = 1 - # the "maximum-or-more" case on the backmost plane - planes[maximum - 1, state.liberty_counts >= maximum] = 1 - return planes + """A feature encoding the number of liberties of the group connected to the stone at + each location + + Note: + - there is no zero-liberties plane; the 0th plane indicates groups in atari + - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum + - EMPTY locations are all-zero features + """ + planes = np.zeros((maximum, state.size, state.size)) + for i in range(maximum): + # single liberties in plane zero (groups won't have zero), double liberties in plane one, etc + planes[i, state.liberty_counts == i + 1] = 1 + # the "maximum-or-more" case on the backmost plane + planes[maximum - 1, state.liberty_counts >= maximum] = 1 + return planes def get_capture_size(state, maximum=8): - """A feature encoding the number of opponent stones that would be captured by playing at each location, - up to 'maximum' - - Note: - - we currently *do* treat the 0th plane as "capturing zero stones" - - the [maximum-1] plane is used for any capturable group of size greater than or equal to maximum-1 - - the 0th plane is used for legal moves that would not result in capture - - illegal move locations are all-zero features - """ - planes = np.zeros((maximum, state.size, state.size)) - for (x, y) in state.get_legal_moves(): - # multiple disconnected groups may be captured. hence we loop over - # groups and count sizes if captured. - n_captured = 0 - for neighbor_group in state.get_groups_around((x, y)): - # if the neighboring group is opponent stones and they have - # one liberty, it must be (x,y) and we are capturing them - # (note suicide and ko are not an issue because they are not - # legal moves) - (gx, gy) = next(iter(neighbor_group)) - if (state.liberty_counts[gx][gy] == 1) and (state.board[gx, gy] != state.current_player): - n_captured += len(state.group_sets[gx][gy]) - planes[min(n_captured, maximum - 1), x, y] = 1 - return planes + """A feature encoding the number of opponent stones that would be captured by playing at each location, + up to 'maximum' + + Note: + - we currently *do* treat the 0th plane as "capturing zero stones" + - the [maximum-1] plane is used for any capturable group of size greater than or equal to maximum-1 + - the 0th plane is used for legal moves that would not result in capture + - illegal move locations are all-zero features + """ + planes = np.zeros((maximum, state.size, state.size)) + for (x, y) in state.get_legal_moves(): + # multiple disconnected groups may be captured. hence we loop over + # groups and count sizes if captured. + n_captured = 0 + for neighbor_group in state.get_groups_around((x, y)): + # if the neighboring group is opponent stones and they have + # one liberty, it must be (x,y) and we are capturing them + # (note suicide and ko are not an issue because they are not + # legal moves) + (gx, gy) = next(iter(neighbor_group)) + if (state.liberty_counts[gx][gy] == 1) and (state.board[gx, gy] != state.current_player): + n_captured += len(state.group_sets[gx][gy]) + planes[min(n_captured, maximum - 1), x, y] = 1 + return planes def get_self_atari_size(state, maximum=8): - """A feature encoding the size of the own-stone group that is put into atari by playing at a location - """ - planes = np.zeros((maximum, state.size, state.size)) - - for (x, y) in state.get_legal_moves(): - # make a copy of the liberty/group sets at (x,y) so we can manipulate them - lib_set_after = set(state.liberty_sets[x][y]) - group_set_after = set() - group_set_after.add((x, y)) - captured_stones = set() - for neighbor_group in state.get_groups_around((x, y)): - # if the neighboring group is of the same color as the current player - # then playing here will connect this stone to that group - (gx, gy) = next(iter(neighbor_group)) - if state.board[gx, gy] == state.current_player: - lib_set_after |= state.liberty_sets[gx][gy] - group_set_after |= state.group_sets[gx][gy] - # if instead neighboring group is opponent *and about to be captured* - # then we might gain new liberties - elif state.liberty_counts[gx][gy] == 1: - captured_stones |= state.group_sets[gx][gy] - # add captured stones to liberties if they are neighboring the 'group_set_after' - # i.e. if they will become liberties once capture is resolved - if len(captured_stones) > 0: - for (gx, gy) in group_set_after: - # intersection of group's neighbors and captured stones will become liberties - lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones - if (x, y) in lib_set_after: - lib_set_after.remove((x, y)) - # check if this move resulted in atari - if len(lib_set_after) == 1: - group_size = len(group_set_after) - # 0th plane used for size=1, so group_size-1 is the index - planes[min(group_size - 1, maximum - 1), x, y] = 1 - return planes + """A feature encoding the size of the own-stone group that is put into atari by playing at a location + """ + planes = np.zeros((maximum, state.size, state.size)) + + for (x, y) in state.get_legal_moves(): + # make a copy of the liberty/group sets at (x,y) so we can manipulate them + lib_set_after = set(state.liberty_sets[x][y]) + group_set_after = set() + group_set_after.add((x, y)) + captured_stones = set() + for neighbor_group in state.get_groups_around((x, y)): + # if the neighboring group is of the same color as the current player + # then playing here will connect this stone to that group + (gx, gy) = next(iter(neighbor_group)) + if state.board[gx, gy] == state.current_player: + lib_set_after |= state.liberty_sets[gx][gy] + group_set_after |= state.group_sets[gx][gy] + # if instead neighboring group is opponent *and about to be captured* + # then we might gain new liberties + elif state.liberty_counts[gx][gy] == 1: + captured_stones |= state.group_sets[gx][gy] + # add captured stones to liberties if they are neighboring the 'group_set_after' + # i.e. if they will become liberties once capture is resolved + if len(captured_stones) > 0: + for (gx, gy) in group_set_after: + # intersection of group's neighbors and captured stones will become liberties + lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones + if (x, y) in lib_set_after: + lib_set_after.remove((x, y)) + # check if this move resulted in atari + if len(lib_set_after) == 1: + group_size = len(group_set_after) + # 0th plane used for size=1, so group_size-1 is the index + planes[min(group_size - 1, maximum - 1), x, y] = 1 + return planes def get_liberties_after(state, maximum=8): - """A feature encoding what the number of liberties *would be* of the group connected to - the stone *if* played at a location - - Note: - - there is no zero-liberties plane; the 0th plane indicates groups in atari - - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum - - illegal move locations are all-zero features - """ - planes = np.zeros((maximum, state.size, state.size)) - # note - left as all zeros if not a legal move - for (x, y) in state.get_legal_moves(): - # make a copy of the set of liberties at (x,y) so we can add to it - lib_set_after = set(state.liberty_sets[x][y]) - group_set_after = set() - group_set_after.add((x, y)) - captured_stones = set() - for neighbor_group in state.get_groups_around((x, y)): - # if the neighboring group is of the same color as the current player - # then playing here will connect this stone to that group and - # therefore add in all that group's liberties - (gx, gy) = next(iter(neighbor_group)) - if state.board[gx, gy] == state.current_player: - lib_set_after |= state.liberty_sets[gx][gy] - group_set_after |= state.group_sets[gx][gy] - # if instead neighboring group is opponent *and about to be captured* - # then we might gain new liberties - elif state.liberty_counts[gx][gy] == 1: - captured_stones |= state.group_sets[gx][gy] - # add captured stones to liberties if they are neighboring the 'group_set_after' - # i.e. if they will become liberties once capture is resolved - if len(captured_stones) > 0: - for (gx, gy) in group_set_after: - # intersection of group's neighbors and captured stones will become liberties - lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones - # (x,y) itself may have made its way back in, but shouldn't count - # since it's clearly not a liberty after playing there - if (x, y) in lib_set_after: - lib_set_after.remove((x, y)) - planes[min(maximum - 1, len(lib_set_after) - 1), x, y] = 1 - return planes + """A feature encoding what the number of liberties *would be* of the group connected to + the stone *if* played at a location + + Note: + - there is no zero-liberties plane; the 0th plane indicates groups in atari + - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum + - illegal move locations are all-zero features + """ + planes = np.zeros((maximum, state.size, state.size)) + # note - left as all zeros if not a legal move + for (x, y) in state.get_legal_moves(): + # make a copy of the set of liberties at (x,y) so we can add to it + lib_set_after = set(state.liberty_sets[x][y]) + group_set_after = set() + group_set_after.add((x, y)) + captured_stones = set() + for neighbor_group in state.get_groups_around((x, y)): + # if the neighboring group is of the same color as the current player + # then playing here will connect this stone to that group and + # therefore add in all that group's liberties + (gx, gy) = next(iter(neighbor_group)) + if state.board[gx, gy] == state.current_player: + lib_set_after |= state.liberty_sets[gx][gy] + group_set_after |= state.group_sets[gx][gy] + # if instead neighboring group is opponent *and about to be captured* + # then we might gain new liberties + elif state.liberty_counts[gx][gy] == 1: + captured_stones |= state.group_sets[gx][gy] + # add captured stones to liberties if they are neighboring the 'group_set_after' + # i.e. if they will become liberties once capture is resolved + if len(captured_stones) > 0: + for (gx, gy) in group_set_after: + # intersection of group's neighbors and captured stones will become liberties + lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones + # (x,y) itself may have made its way back in, but shouldn't count + # since it's clearly not a liberty after playing there + if (x, y) in lib_set_after: + lib_set_after.remove((x, y)) + planes[min(maximum - 1, len(lib_set_after) - 1), x, y] = 1 + return planes def get_ladder_capture(state): - raise NotImplementedError() + raise NotImplementedError() def get_ladder_escape(state): - raise NotImplementedError() + raise NotImplementedError() def get_sensibleness(state): - """A move is 'sensible' if it is legal and if it does not fill the current_player's own eye - """ - feature = np.zeros((1, state.size, state.size)) - for (x, y) in state.get_legal_moves(include_eyes=False): - feature[0, x, y] = 1 - return feature + """A move is 'sensible' if it is legal and if it does not fill the current_player's own eye + """ + feature = np.zeros((1, state.size, state.size)) + for (x, y) in state.get_legal_moves(include_eyes=False): + feature[0, x, y] = 1 + return feature def get_legal(state): - """Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done - """ - feature = np.zeros((1, state.size, state.size)) - for (x, y) in state.get_legal_moves(): - feature[0, x, y] = 1 - return feature + """Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done + """ + feature = np.zeros((1, state.size, state.size)) + for (x, y) in state.get_legal_moves(): + feature[0, x, y] = 1 + return feature # named features and their sizes are defined here FEATURES = { - "board": { - "size": 3, - "function": get_board - }, - "ones": { - "size": 1, - "function": lambda state: np.ones((1, state.size, state.size)) - }, - "turns_since": { - "size": 8, - "function": get_turns_since - }, - "liberties": { - "size": 8, - "function": get_liberties - }, - "capture_size": { - "size": 8, - "function": get_capture_size - }, - "self_atari_size": { - "size": 8, - "function": get_self_atari_size - }, - "liberties_after": { - "size": 8, - "function": get_liberties_after - }, - "ladder_capture": { - "size": 1, - "function": get_ladder_capture - }, - "ladder_escape": { - "size": 1, - "function": get_ladder_escape - }, - "sensibleness": { - "size": 1, - "function": get_sensibleness - }, - "zeros": { - "size": 1, - "function": lambda state: np.zeros((1, state.size, state.size)) - }, - "legal": { - "size": 1, - "function": get_legal - } + "board": { + "size": 3, + "function": get_board + }, + "ones": { + "size": 1, + "function": lambda state: np.ones((1, state.size, state.size)) + }, + "turns_since": { + "size": 8, + "function": get_turns_since + }, + "liberties": { + "size": 8, + "function": get_liberties + }, + "capture_size": { + "size": 8, + "function": get_capture_size + }, + "self_atari_size": { + "size": 8, + "function": get_self_atari_size + }, + "liberties_after": { + "size": 8, + "function": get_liberties_after + }, + "ladder_capture": { + "size": 1, + "function": get_ladder_capture + }, + "ladder_escape": { + "size": 1, + "function": get_ladder_escape + }, + "sensibleness": { + "size": 1, + "function": get_sensibleness + }, + "zeros": { + "size": 1, + "function": lambda state: np.zeros((1, state.size, state.size)) + }, + "legal": { + "size": 1, + "function": get_legal + } } DEFAULT_FEATURES = [ - "board", "ones", "turns_since", "liberties", "capture_size", - "self_atari_size", "liberties_after", "ladder_capture", "ladder_escape", - "sensibleness", "zeros"] + "board", "ones", "turns_since", "liberties", "capture_size", + "self_atari_size", "liberties_after", "ladder_capture", "ladder_escape", + "sensibleness", "zeros"] class Preprocess(object): - """a class to convert from AlphaGo GameState objects to tensors of one-hot - features for NN inputs - """ - - def __init__(self, feature_list=DEFAULT_FEATURES): - """create a preprocessor object that will concatenate together the - given list of features - """ - - self.output_dim = 0 - self.feature_list = feature_list - self.processors = [None] * len(feature_list) - for i in range(len(feature_list)): - feat = feature_list[i].lower() - if feat in FEATURES: - self.processors[i] = FEATURES[feat]["function"] - self.output_dim += FEATURES[feat]["size"] - else: - raise ValueError("uknown feature: %s" % feat) - - def state_to_tensor(self, state): - """Convert a GameState to a Theano-compatible tensor - """ - feat_tensors = [proc(state) for proc in self.processors] - - # concatenate along feature dimension then add in a singleton 'batch' dimension - f, s = self.output_dim, state.size - return np.concatenate(feat_tensors).reshape((1, f, s, s)) + """a class to convert from AlphaGo GameState objects to tensors of one-hot + features for NN inputs + """ + + def __init__(self, feature_list=DEFAULT_FEATURES): + """create a preprocessor object that will concatenate together the + given list of features + """ + + self.output_dim = 0 + self.feature_list = feature_list + self.processors = [None] * len(feature_list) + for i in range(len(feature_list)): + feat = feature_list[i].lower() + if feat in FEATURES: + self.processors[i] = FEATURES[feat]["function"] + self.output_dim += FEATURES[feat]["size"] + else: + raise ValueError("uknown feature: %s" % feat) + + def state_to_tensor(self, state): + """Convert a GameState to a Theano-compatible tensor + """ + feat_tensors = [proc(state) for proc in self.processors] + + # concatenate along feature dimension then add in a singleton 'batch' dimension + f, s = self.output_dim, state.size + return np.concatenate(feat_tensors).reshape((1, f, s, s)) diff --git a/AlphaGo/training/reinforcement_policy_trainer.py b/AlphaGo/training/reinforcement_policy_trainer.py index b4ab67771..8bf36cc94 100644 --- a/AlphaGo/training/reinforcement_policy_trainer.py +++ b/AlphaGo/training/reinforcement_policy_trainer.py @@ -12,277 +12,277 @@ class BatchedReinforcementLearningSGD(Optimizer): - '''A Keras Optimizer that sums gradients together for each game, applying them only once the - winner is known. - - It is the responsibility of the calling code to call set_current_game() before each example to - tell the optimizer for which game gradients should be accumulated, and to call set_result() to - tell the optimizer what the sign of the gradient for each game should be and when all games are - over. - - Arguments - lr: float >= 0. Learning rate. - ng: int > 0. Number of games played in parallel. Each one has its own cumulative gradient. - ''' - def __init__(self, lr=0.01, ng=20, **kwargs): - super(BatchedReinforcementLearningSGD, self).__init__(**kwargs) - self.__dict__.update(locals()) - self.lr = K.variable(lr) - self.cumulative_gradients = [] - self.num_games = ng - self.game_idx = K.variable(0) # which gradient to accumulate in the next batch. - self.gradient_sign = [K.variable(0) for _ in range(ng)] - self.running_games = K.variable(self.num_games) - - def set_current_game(self, game_idx): - K.set_value(self.game_idx, game_idx) - - def set_result(self, game_idx, won_game): - '''Mark the outcome of the game at index game_idx. Once all games are complete, updates - are automatically triggered in the next call to a keras fit function. - ''' - K.set_value(self.gradient_sign[game_idx], +1 if won_game else -1) - # Note: using '-= 1' would create a new variable, which would invalidate the dependencies - # in get_updates(). - K.set_value(self.running_games, K.get_value(self.running_games) - 1) - - def get_updates(self, params, constraints, loss): - # Note: get_updates is called *once* by keras. Its job is to return a set of 'update - # operations' to any K.variable (e.g. model weights or self.num_games). Updates are applied - # whenever Keras' train_function is evaluated, i.e. in every batch. Model.fit_on_batch() - # will trigger exactly one update. All updates use the 'old' value of parameters - there is - # no dependency on the order of the list of updates. - self.updates = [] - # Get expressions for gradients of model parameters. - grads = self.get_gradients(loss, params) - # Create a set of accumulated gradients, one for each game. - shapes = [K.get_variable_shape(p) for p in params] - self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] for _ in range(self.num_games)] - - def conditional_update(cond, variable, new_value): - '''Helper function to create updates that only happen when cond is True. Writes to - self.updates and returns the new variable. - - Note: K.update(x, x) is cheap, but K.update_add(x, K.zeros_like(x)) can be expensive. - ''' - maybe_new_value = K.switch(cond, new_value, variable) - self.updates.append(K.update(variable, maybe_new_value)) - return maybe_new_value - - # Update cumulative gradient at index game_idx. This is done by returning an update for all - # gradients that is a no-op everywhere except for the game_idx'th one. When game_idx is - # changed by a call to set_current_game(), it will change the gradient that is getting - # accumulated. - # new_cumulative_gradients keeps references to the updated variables for use below in - # updating parameters with the freshly-accumulated gradients. - new_cumulative_gradients = [[None] * len(cgs) for cgs in self.cumulative_gradients] - for i, cgs in enumerate(self.cumulative_gradients): - for j, (g, cg) in enumerate(zip(grads, cgs)): - new_gradient = conditional_update(K.equal(self.game_idx, i), cg, cg + g) - new_cumulative_gradients[i][j] = new_gradient - - # Compute the net update to parameters, taking into account the sign of each cumulative - # gradient. - net_grads = [K.zeros_like(g) for g in grads] - for i, cgs in enumerate(new_cumulative_gradients): - for j, cg in enumerate(cgs): - net_grads[j] += self.gradient_sign[i] * cg - - # Trigger a full update when all games have finished. - self.trigger_update = K.lesser_equal(self.running_games, 0) - - # Update model parameters conditional on trigger_update. - for p, g in zip(params, net_grads): - new_p = p + g * self.lr - if p in constraints: - c = constraints[p] - new_p = c(new_p) - conditional_update(self.trigger_update, p, new_p) - - # 'reset' game counter and gradient signs when parameters are updated. - for sign in self.gradient_sign: - conditional_update(self.trigger_update, sign, K.variable(0)) - conditional_update(self.trigger_update, self.running_games, K.variable(self.num_games)) - return self.updates - - def get_config(self): - config = { - 'lr': float(K.get_value(self.lr)), - 'ng': self.num_games} - base_config = super(BatchedReinforcementLearningSGD, self).get_config() - return dict(list(base_config.items()) + list(config.items())) + '''A Keras Optimizer that sums gradients together for each game, applying them only once the + winner is known. + + It is the responsibility of the calling code to call set_current_game() before each example to + tell the optimizer for which game gradients should be accumulated, and to call set_result() to + tell the optimizer what the sign of the gradient for each game should be and when all games are + over. + + Arguments + lr: float >= 0. Learning rate. + ng: int > 0. Number of games played in parallel. Each one has its own cumulative gradient. + ''' + def __init__(self, lr=0.01, ng=20, **kwargs): + super(BatchedReinforcementLearningSGD, self).__init__(**kwargs) + self.__dict__.update(locals()) + self.lr = K.variable(lr) + self.cumulative_gradients = [] + self.num_games = ng + self.game_idx = K.variable(0) # which gradient to accumulate in the next batch. + self.gradient_sign = [K.variable(0) for _ in range(ng)] + self.running_games = K.variable(self.num_games) + + def set_current_game(self, game_idx): + K.set_value(self.game_idx, game_idx) + + def set_result(self, game_idx, won_game): + '''Mark the outcome of the game at index game_idx. Once all games are complete, updates + are automatically triggered in the next call to a keras fit function. + ''' + K.set_value(self.gradient_sign[game_idx], +1 if won_game else -1) + # Note: using '-= 1' would create a new variable, which would invalidate the dependencies + # in get_updates(). + K.set_value(self.running_games, K.get_value(self.running_games) - 1) + + def get_updates(self, params, constraints, loss): + # Note: get_updates is called *once* by keras. Its job is to return a set of 'update + # operations' to any K.variable (e.g. model weights or self.num_games). Updates are applied + # whenever Keras' train_function is evaluated, i.e. in every batch. Model.fit_on_batch() + # will trigger exactly one update. All updates use the 'old' value of parameters - there is + # no dependency on the order of the list of updates. + self.updates = [] + # Get expressions for gradients of model parameters. + grads = self.get_gradients(loss, params) + # Create a set of accumulated gradients, one for each game. + shapes = [K.get_variable_shape(p) for p in params] + self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] for _ in range(self.num_games)] + + def conditional_update(cond, variable, new_value): + '''Helper function to create updates that only happen when cond is True. Writes to + self.updates and returns the new variable. + + Note: K.update(x, x) is cheap, but K.update_add(x, K.zeros_like(x)) can be expensive. + ''' + maybe_new_value = K.switch(cond, new_value, variable) + self.updates.append(K.update(variable, maybe_new_value)) + return maybe_new_value + + # Update cumulative gradient at index game_idx. This is done by returning an update for all + # gradients that is a no-op everywhere except for the game_idx'th one. When game_idx is + # changed by a call to set_current_game(), it will change the gradient that is getting + # accumulated. + # new_cumulative_gradients keeps references to the updated variables for use below in + # updating parameters with the freshly-accumulated gradients. + new_cumulative_gradients = [[None] * len(cgs) for cgs in self.cumulative_gradients] + for i, cgs in enumerate(self.cumulative_gradients): + for j, (g, cg) in enumerate(zip(grads, cgs)): + new_gradient = conditional_update(K.equal(self.game_idx, i), cg, cg + g) + new_cumulative_gradients[i][j] = new_gradient + + # Compute the net update to parameters, taking into account the sign of each cumulative + # gradient. + net_grads = [K.zeros_like(g) for g in grads] + for i, cgs in enumerate(new_cumulative_gradients): + for j, cg in enumerate(cgs): + net_grads[j] += self.gradient_sign[i] * cg + + # Trigger a full update when all games have finished. + self.trigger_update = K.lesser_equal(self.running_games, 0) + + # Update model parameters conditional on trigger_update. + for p, g in zip(params, net_grads): + new_p = p + g * self.lr + if p in constraints: + c = constraints[p] + new_p = c(new_p) + conditional_update(self.trigger_update, p, new_p) + + # 'reset' game counter and gradient signs when parameters are updated. + for sign in self.gradient_sign: + conditional_update(self.trigger_update, sign, K.variable(0)) + conditional_update(self.trigger_update, self.running_games, K.variable(self.num_games)) + return self.updates + + def get_config(self): + config = { + 'lr': float(K.get_value(self.lr)), + 'ng': self.num_games} + base_config = super(BatchedReinforcementLearningSGD, self).get_config() + return dict(list(base_config.items()) + list(config.items())) def _make_training_pair(st, mv, preprocessor): - # Convert move to one-hot - st_tensor = preprocessor.state_to_tensor(st) - mv_tensor = np.zeros((1, st.size * st.size)) - mv_tensor[(0, flatten_idx(mv, st.size))] = 1 - return (st_tensor, mv_tensor) + # Convert move to one-hot + st_tensor = preprocessor.state_to_tensor(st) + mv_tensor = np.zeros((1, st.size * st.size)) + mv_tensor[(0, flatten_idx(mv, st.size))] = 1 + return (st_tensor, mv_tensor) def run_n_games(optimizer, learner, opponent, num_games): - '''Run num_games games to completion, calling train_batch() on each position the learner sees. - - (Note: optimizer only accumulates gradients in its update function until all games have finished) - ''' - board_size = learner.policy.model.input_shape[-1] - states = [GameState(size=board_size) for _ in range(num_games)] - learner_net = learner.policy.model - - # Start all odd games with moves by 'opponent'. Even games will have 'learner' black. - learner_color = [go.BLACK if i % 2 == 0 else go.WHITE for i in range(num_games)] - odd_states = states[1::2] - moves = opponent.get_moves(odd_states) - for st, mv in zip(odd_states, moves): - st.do_move(mv) - - current = learner - other = opponent - # Need to keep track of the index of unfinished states so that we can communicate which one is - # being updated to the optimizer. - idxs_to_unfinished_states = {i: states[i] for i in range(num_games)} - while len(idxs_to_unfinished_states) > 0: - # Get next moves by current player for all unfinished states. - moves = current.get_moves(idxs_to_unfinished_states.values()) - just_finished = [] - # Do each move to each state in order. - for (idx, state), mv in zip(idxs_to_unfinished_states.iteritems(), moves): - # Order is important here. We must first get the training pair on the unmodified state. - # Next, the state is updated and checked to see if the game is over. If it is over, the - # optimizer is notified via set_result. Finally, train_on_batch is called, which - # will trigger an update of all parameters only if set_result() has been called - # for all games already (so set_result must come before train_on_batch). - is_learnable = current is learner and mv is not go.PASS_MOVE - if is_learnable: - (X, y) = _make_training_pair(state, mv, learner.policy.preprocessor) - state.do_move(mv) - if state.is_end_of_game: - learner_is_winner = state.get_winner() == learner_color[idx] - optimizer.set_result(idx, learner_is_winner) - just_finished.append(idx) - if is_learnable: - optimizer.set_current_game(idx) - learner_net.train_on_batch(X, y) - - # Remove games that have finished from dict. - for idx in just_finished: - del idxs_to_unfinished_states[idx] - - # Swap 'current' and 'other' for next turn. - current, other = other, current - - # Return the win ratio. - wins = sum(state.get_winner() == pc for (state, pc) in zip(states, learner_color)) - return float(wins) / num_games + '''Run num_games games to completion, calling train_batch() on each position the learner sees. + + (Note: optimizer only accumulates gradients in its update function until all games have finished) + ''' + board_size = learner.policy.model.input_shape[-1] + states = [GameState(size=board_size) for _ in range(num_games)] + learner_net = learner.policy.model + + # Start all odd games with moves by 'opponent'. Even games will have 'learner' black. + learner_color = [go.BLACK if i % 2 == 0 else go.WHITE for i in range(num_games)] + odd_states = states[1::2] + moves = opponent.get_moves(odd_states) + for st, mv in zip(odd_states, moves): + st.do_move(mv) + + current = learner + other = opponent + # Need to keep track of the index of unfinished states so that we can communicate which one is + # being updated to the optimizer. + idxs_to_unfinished_states = {i: states[i] for i in range(num_games)} + while len(idxs_to_unfinished_states) > 0: + # Get next moves by current player for all unfinished states. + moves = current.get_moves(idxs_to_unfinished_states.values()) + just_finished = [] + # Do each move to each state in order. + for (idx, state), mv in zip(idxs_to_unfinished_states.iteritems(), moves): + # Order is important here. We must first get the training pair on the unmodified state. + # Next, the state is updated and checked to see if the game is over. If it is over, the + # optimizer is notified via set_result. Finally, train_on_batch is called, which + # will trigger an update of all parameters only if set_result() has been called + # for all games already (so set_result must come before train_on_batch). + is_learnable = current is learner and mv is not go.PASS_MOVE + if is_learnable: + (X, y) = _make_training_pair(state, mv, learner.policy.preprocessor) + state.do_move(mv) + if state.is_end_of_game: + learner_is_winner = state.get_winner() == learner_color[idx] + optimizer.set_result(idx, learner_is_winner) + just_finished.append(idx) + if is_learnable: + optimizer.set_current_game(idx) + learner_net.train_on_batch(X, y) + + # Remove games that have finished from dict. + for idx in just_finished: + del idxs_to_unfinished_states[idx] + + # Swap 'current' and 'other' for next turn. + current, other = other, current + + # Return the win ratio. + wins = sum(state.get_winner() == pc for (state, pc) in zip(states, learner_color)) + return float(wins) / num_games def run_training(cmd_line_args=None): - import argparse - parser = argparse.ArgumentParser(description='Perform reinforcement learning to improve given policy network. Second phase of pipeline.') - parser.add_argument("model_json", help="Path to policy model JSON.") - parser.add_argument("initial_weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).") - parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.") - parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.001)", type=float, default=0.001) - parser.add_argument("--policy-temp", help="Distribution temperature of players using policies (Default: 0.67)", type=float, default=0.67) - parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 500)", type=int, default=500) - parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 20)", type=int, default=20) - parser.add_argument("--move-limit", help="Maximum number of moves per game", type=int, default=500) - parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 10000)", type=int, default=10000) - parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true") - parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") - # Baseline function (TODO) default lambda state: 0 (receives either file - # paths to JSON and weights or None, in which case it uses default baseline 0) - if cmd_line_args is None: - args = parser.parse_args() - else: - args = parser.parse_args(cmd_line_args) - - ZEROTH_FILE = "weights.00000.hdf5" - - if args.resume: - if not os.path.exists(os.path.join(args.out_directory, "metadata.json")): - raise ValueError("Cannot resume without existing output directory") - - if not os.path.exists(args.out_directory): - if args.verbose: - print "creating output directory {}".format(args.out_directory) - os.makedirs(args.out_directory) - - if not args.resume: - # make a copy of weights file, "weights.00000.hdf5" in the output directory - copyfile(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) - if args.verbose: - print "copied {} to {}".format(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) - player_weights = ZEROTH_FILE - else: - # if resuming, we expect initial_weights to be just a "weights.#####.hdf5" file, not a full path - args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights)) - if not os.path.exists(args.initial_weights): - raise ValueError("Cannot resume; weights {} do not exist".format(args.initial_weights)) - elif args.verbose: - print "Resuming with weights {}".format(args.initial_weights) - player_weights = os.path.basename(args.initial_weights) - - # Set initial conditions - policy = CNNPolicy.load_model(args.model_json) - policy.model.load_weights(args.initial_weights) - player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, move_limit=args.move_limit) - - # different opponents come from simply changing the weights of 'opponent.policy.model'. That - # is, only 'opp_policy' needs to be changed, and 'opponent' will change. - opp_policy = CNNPolicy.load_model(args.model_json) - opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, move_limit=args.move_limit) - - if args.verbose: - print "created player and opponent with temperature {}".format(args.policy_temp) - - if not args.resume: - metadata = { - "model_file": args.model_json, - "init_weights": args.initial_weights, - "learning_rate": args.learning_rate, - "temperature": args.policy_temp, - "game_batch": args.game_batch, - "opponents": [ZEROTH_FILE], # which weights from which to sample an opponent each batch - "win_ratio": {} # map from player to tuple of (opponent, win ratio) Useful for validating in lieu of 'accuracy/loss' - } - else: - with open(os.path.join(args.out_directory, "metadata.json"), "r") as f: - metadata = json.load(f) - - # Append args of current run to history of full command args. - metadata["cmd_line_args"] = metadata.get("cmd_line_args", []).append(vars(args)) - - def save_metadata(): - with open(os.path.join(args.out_directory, "metadata.json"), "w") as f: - json.dump(metadata, f, sort_keys=True, indent=2) - - optimizer = BatchedReinforcementLearningSGD(lr=args.learning_rate, ng=args.game_batch) - player.policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) - for i_iter in xrange(1, args.iterations + 1): - # Randomly choose opponent from pool (possibly self), and playing game_batch games against - # them. - opp_weights = np.random.choice(metadata["opponents"]) - opp_path = os.path.join(args.out_directory, opp_weights) - - # Load new weights into opponent's network, but keep the same opponent object. - opponent.policy.model.load_weights(opp_path) - if args.verbose: - print "Batch {}\tsampled opponent is {}".format(i_iter, opp_weights) - - # Run games (and learn from results). Keep track of the win ratio vs each opponent over time. - win_ratio = run_n_games(optimizer, player, opponent, args.game_batch) - metadata["win_ratio"][player_weights] = (opp_weights, win_ratio) - - # Save all intermediate models. - player_weights = "weights.%05d.hdf5" % i_iter - player.policy.model.save_weights(os.path.join(args.out_directory, player_weights)) - - # Add player to batch of oppenents once in a while. - if i_iter % args.save_every == 0: - metadata["opponents"].append(player_weights) - save_metadata() + import argparse + parser = argparse.ArgumentParser(description='Perform reinforcement learning to improve given policy network. Second phase of pipeline.') + parser.add_argument("model_json", help="Path to policy model JSON.") + parser.add_argument("initial_weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).") + parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.") + parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.001)", type=float, default=0.001) + parser.add_argument("--policy-temp", help="Distribution temperature of players using policies (Default: 0.67)", type=float, default=0.67) + parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 500)", type=int, default=500) + parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 20)", type=int, default=20) + parser.add_argument("--move-limit", help="Maximum number of moves per game", type=int, default=500) + parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 10000)", type=int, default=10000) + parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true") + parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") + # Baseline function (TODO) default lambda state: 0 (receives either file + # paths to JSON and weights or None, in which case it uses default baseline 0) + if cmd_line_args is None: + args = parser.parse_args() + else: + args = parser.parse_args(cmd_line_args) + + ZEROTH_FILE = "weights.00000.hdf5" + + if args.resume: + if not os.path.exists(os.path.join(args.out_directory, "metadata.json")): + raise ValueError("Cannot resume without existing output directory") + + if not os.path.exists(args.out_directory): + if args.verbose: + print "creating output directory {}".format(args.out_directory) + os.makedirs(args.out_directory) + + if not args.resume: + # make a copy of weights file, "weights.00000.hdf5" in the output directory + copyfile(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) + if args.verbose: + print "copied {} to {}".format(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) + player_weights = ZEROTH_FILE + else: + # if resuming, we expect initial_weights to be just a "weights.#####.hdf5" file, not a full path + args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights)) + if not os.path.exists(args.initial_weights): + raise ValueError("Cannot resume; weights {} do not exist".format(args.initial_weights)) + elif args.verbose: + print "Resuming with weights {}".format(args.initial_weights) + player_weights = os.path.basename(args.initial_weights) + + # Set initial conditions + policy = CNNPolicy.load_model(args.model_json) + policy.model.load_weights(args.initial_weights) + player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, move_limit=args.move_limit) + + # different opponents come from simply changing the weights of 'opponent.policy.model'. That + # is, only 'opp_policy' needs to be changed, and 'opponent' will change. + opp_policy = CNNPolicy.load_model(args.model_json) + opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, move_limit=args.move_limit) + + if args.verbose: + print "created player and opponent with temperature {}".format(args.policy_temp) + + if not args.resume: + metadata = { + "model_file": args.model_json, + "init_weights": args.initial_weights, + "learning_rate": args.learning_rate, + "temperature": args.policy_temp, + "game_batch": args.game_batch, + "opponents": [ZEROTH_FILE], # which weights from which to sample an opponent each batch + "win_ratio": {} # map from player to tuple of (opponent, win ratio) Useful for validating in lieu of 'accuracy/loss' + } + else: + with open(os.path.join(args.out_directory, "metadata.json"), "r") as f: + metadata = json.load(f) + + # Append args of current run to history of full command args. + metadata["cmd_line_args"] = metadata.get("cmd_line_args", []).append(vars(args)) + + def save_metadata(): + with open(os.path.join(args.out_directory, "metadata.json"), "w") as f: + json.dump(metadata, f, sort_keys=True, indent=2) + + optimizer = BatchedReinforcementLearningSGD(lr=args.learning_rate, ng=args.game_batch) + player.policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) + for i_iter in xrange(1, args.iterations + 1): + # Randomly choose opponent from pool (possibly self), and playing game_batch games against + # them. + opp_weights = np.random.choice(metadata["opponents"]) + opp_path = os.path.join(args.out_directory, opp_weights) + + # Load new weights into opponent's network, but keep the same opponent object. + opponent.policy.model.load_weights(opp_path) + if args.verbose: + print "Batch {}\tsampled opponent is {}".format(i_iter, opp_weights) + + # Run games (and learn from results). Keep track of the win ratio vs each opponent over time. + win_ratio = run_n_games(optimizer, player, opponent, args.game_batch) + metadata["win_ratio"][player_weights] = (opp_weights, win_ratio) + + # Save all intermediate models. + player_weights = "weights.%05d.hdf5" % i_iter + player.policy.model.save_weights(os.path.join(args.out_directory, player_weights)) + + # Add player to batch of oppenents once in a while. + if i_iter % args.save_every == 0: + metadata["opponents"].append(player_weights) + save_metadata() if __name__ == '__main__': - run_training() + run_training() diff --git a/AlphaGo/training/supervised_policy_trainer.py b/AlphaGo/training/supervised_policy_trainer.py index 7e04dd6e5..ec247812c 100644 --- a/AlphaGo/training/supervised_policy_trainer.py +++ b/AlphaGo/training/supervised_policy_trainer.py @@ -8,218 +8,218 @@ def one_hot_action(action, size=19): - """Convert an (x,y) action into a size x size array of zeros with a 1 at x,y - """ - categorical = np.zeros((size, size)) - categorical[action] = 1 - return categorical + """Convert an (x,y) action into a size x size array of zeros with a 1 at x,y + """ + categorical = np.zeros((size, size)) + categorical[action] = 1 + return categorical def shuffled_hdf5_batch_generator(state_dataset, action_dataset, indices, batch_size, transforms=[]): - """A generator of batches of training data for use with the fit_generator function - of Keras. Data is accessed in the order of the given indices for shuffling. - """ - state_batch_shape = (batch_size,) + state_dataset.shape[1:] - game_size = state_batch_shape[-1] - Xbatch = np.zeros(state_batch_shape) - Ybatch = np.zeros((batch_size, game_size * game_size)) - batch_idx = 0 - while True: - for data_idx in indices: - # choose a random transformation of the data (rotations/reflections of the board) - transform = np.random.choice(transforms) - # get state from dataset and transform it. - # loop comprehension is used so that the transformation acts on the 3rd and 4th dimensions - state = np.array([transform(plane) for plane in state_dataset[data_idx]]) - # must be cast to a tuple so that it is interpreted as (x,y) not [(x,:), (y,:)] - action_xy = tuple(action_dataset[data_idx]) - action = transform(one_hot_action(action_xy, game_size)) - Xbatch[batch_idx] = state - Ybatch[batch_idx] = action.flatten() - batch_idx += 1 - if batch_idx == batch_size: - batch_idx = 0 - yield (Xbatch, Ybatch) + """A generator of batches of training data for use with the fit_generator function + of Keras. Data is accessed in the order of the given indices for shuffling. + """ + state_batch_shape = (batch_size,) + state_dataset.shape[1:] + game_size = state_batch_shape[-1] + Xbatch = np.zeros(state_batch_shape) + Ybatch = np.zeros((batch_size, game_size * game_size)) + batch_idx = 0 + while True: + for data_idx in indices: + # choose a random transformation of the data (rotations/reflections of the board) + transform = np.random.choice(transforms) + # get state from dataset and transform it. + # loop comprehension is used so that the transformation acts on the 3rd and 4th dimensions + state = np.array([transform(plane) for plane in state_dataset[data_idx]]) + # must be cast to a tuple so that it is interpreted as (x,y) not [(x,:), (y,:)] + action_xy = tuple(action_dataset[data_idx]) + action = transform(one_hot_action(action_xy, game_size)) + Xbatch[batch_idx] = state + Ybatch[batch_idx] = action.flatten() + batch_idx += 1 + if batch_idx == batch_size: + batch_idx = 0 + yield (Xbatch, Ybatch) class MetadataWriterCallback(Callback): - def __init__(self, path): - self.file = path - self.metadata = { - "epochs": [], - "best_epoch": 0 - } + def __init__(self, path): + self.file = path + self.metadata = { + "epochs": [], + "best_epoch": 0 + } - def on_epoch_end(self, epoch, logs={}): - # in case appending to logs (resuming training), get epoch number ourselves - epoch = len(self.metadata["epochs"]) + def on_epoch_end(self, epoch, logs={}): + # in case appending to logs (resuming training), get epoch number ourselves + epoch = len(self.metadata["epochs"]) - self.metadata["epochs"].append(logs) + self.metadata["epochs"].append(logs) - if "val_loss" in logs: - key = "val_loss" - else: - key = "loss" + if "val_loss" in logs: + key = "val_loss" + else: + key = "loss" - best_loss = self.metadata["epochs"][self.metadata["best_epoch"]][key] - if logs.get(key) < best_loss: - self.metadata["best_epoch"] = epoch + best_loss = self.metadata["epochs"][self.metadata["best_epoch"]][key] + if logs.get(key) < best_loss: + self.metadata["best_epoch"] = epoch - with open(self.file, "w") as f: - json.dump(self.metadata, f, indent=2) + with open(self.file, "w") as f: + json.dump(self.metadata, f, indent=2) BOARD_TRANSFORMATIONS = { - "noop": lambda feature: feature, - "rot90": lambda feature: np.rot90(feature, 1), - "rot180": lambda feature: np.rot90(feature, 2), - "rot270": lambda feature: np.rot90(feature, 3), - "fliplr": lambda feature: np.fliplr(feature), - "flipud": lambda feature: np.flipud(feature), - "diag1": lambda feature: np.transpose(feature), - "diag2": lambda feature: np.fliplr(np.rot90(feature, 1)) + "noop": lambda feature: feature, + "rot90": lambda feature: np.rot90(feature, 1), + "rot180": lambda feature: np.rot90(feature, 2), + "rot270": lambda feature: np.rot90(feature, 3), + "fliplr": lambda feature: np.fliplr(feature), + "flipud": lambda feature: np.flipud(feature), + "diag1": lambda feature: np.transpose(feature), + "diag2": lambda feature: np.fliplr(np.rot90(feature, 1)) } def run_training(cmd_line_args=None): - """Run training. command-line args may be passed in as a list - """ - import argparse - parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.') - # required args - parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())") - parser.add_argument("train_data", help="A .h5 file of training data") - parser.add_argument("out_directory", help="directory where metadata and weights will be saved") - # frequently used args - parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16) - parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10) - parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None) - parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03) - parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001) - parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") - # slightly fancier args - parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None) - parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02]) - parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2') - # TODO - an argument to specify which transformations to use, put it in metadata - - if cmd_line_args is None: - args = parser.parse_args() - else: - args = parser.parse_args(cmd_line_args) - - # TODO - what follows here should be refactored into a series of small functions - - resume = args.weights is not None - - if args.verbose: - if resume: - print "trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights)) - else: - if os.path.exists(args.out_directory): - print "directory %s exists. any previous data will be overwritten" % args.out_directory - else: - print "starting fresh output directory %s" % args.out_directory - - # load model from json spec - model = CNNPolicy.load_model(args.model).model - if resume: - model.load_weights(os.path.join(args.out_directory, args.weights)) - - # TODO - (waiting on game_converter) verify that features of model match features of training data - dataset = h5.File(args.train_data) - n_total_data = len(dataset["states"]) - n_train_data = int(args.train_val_test[0] * n_total_data) - # Need to make sure training data is divisible by minibatch size or get warning mentioning accuracy from keras - n_train_data = n_train_data - (n_train_data % args.minibatch) - n_val_data = n_total_data - n_train_data - # n_test_data = n_total_data - (n_train_data + n_val_data) - - if args.verbose: - print "datset loaded" - print "\t%d total samples" % n_total_data - print "\t%d training samples" % n_train_data - print "\t%d validaion samples" % n_val_data - - # ensure output directory is available - if not os.path.exists(args.out_directory): - os.makedirs(args.out_directory) - - # create metadata file and the callback object that will write to it - meta_file = os.path.join(args.out_directory, "metadata.json") - meta_writer = MetadataWriterCallback(meta_file) - # load prior data if it already exists - if os.path.exists(meta_file) and resume: - with open(meta_file, "r") as f: - meta_writer.metadata = json.load(f) - if args.verbose: - print "previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"]) - elif args.verbose: - print "starting with empty metadata" - # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add in anything else we like here - # TODO - model and train_data are saved in meta_file; check that they match (and make args optional when restarting?) - meta_writer.metadata["training_data"] = args.train_data - meta_writer.metadata["model_file"] = args.model - # Record all command line args in a list so that all args are recorded even when training is stopped and resumed. - meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []).append(vars(args)) - - # create ModelCheckpoint to save weights every epoch - checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5") - checkpointer = ModelCheckpoint(checkpoint_template) - - # load precomputed random-shuffle indices or create them - # TODO - save each train/val/test indices separately so there's no danger of - # changing args.train_val_test when resuming - shuffle_file = os.path.join(args.out_directory, "shuffle.npz") - if os.path.exists(shuffle_file) and resume: - with open(shuffle_file, "r") as f: - shuffle_indices = np.load(f) - if args.verbose: - print "loading previous data shuffling indices" - else: - # create shuffled indices - shuffle_indices = np.random.permutation(n_total_data) - with open(shuffle_file, "w") as f: - np.save(f, shuffle_indices) - if args.verbose: - print "created new data shuffling indices" - # training indices are the first consecutive set of shuffled indices, val next, then test gets the remainder - train_indices = shuffle_indices[0:n_train_data] - val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data] - # test_indices = shuffle_indices[n_train_data + n_val_data:] - - symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")] - - # create dataset generators - train_data_generator = shuffled_hdf5_batch_generator( - dataset["states"], - dataset["actions"], - train_indices, - args.minibatch, - symmetries) - val_data_generator = shuffled_hdf5_batch_generator( - dataset["states"], - dataset["actions"], - val_indices, - args.minibatch, - symmetries) - - sgd = SGD(lr=args.learning_rate, decay=args.decay) - model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"]) - - samples_per_epoch = args.epoch_length or n_train_data - - if args.verbose: - print "STARTING TRAINING" - - model.fit_generator( - generator=train_data_generator, - samples_per_epoch=samples_per_epoch, - nb_epoch=args.epochs, - callbacks=[checkpointer, meta_writer], - validation_data=val_data_generator, - nb_val_samples=n_val_data) + """Run training. command-line args may be passed in as a list + """ + import argparse + parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.') + # required args + parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())") + parser.add_argument("train_data", help="A .h5 file of training data") + parser.add_argument("out_directory", help="directory where metadata and weights will be saved") + # frequently used args + parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16) + parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10) + parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None) + parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03) + parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001) + parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") + # slightly fancier args + parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None) + parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02]) + parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2') + # TODO - an argument to specify which transformations to use, put it in metadata + + if cmd_line_args is None: + args = parser.parse_args() + else: + args = parser.parse_args(cmd_line_args) + + # TODO - what follows here should be refactored into a series of small functions + + resume = args.weights is not None + + if args.verbose: + if resume: + print "trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights)) + else: + if os.path.exists(args.out_directory): + print "directory %s exists. any previous data will be overwritten" % args.out_directory + else: + print "starting fresh output directory %s" % args.out_directory + + # load model from json spec + model = CNNPolicy.load_model(args.model).model + if resume: + model.load_weights(os.path.join(args.out_directory, args.weights)) + + # TODO - (waiting on game_converter) verify that features of model match features of training data + dataset = h5.File(args.train_data) + n_total_data = len(dataset["states"]) + n_train_data = int(args.train_val_test[0] * n_total_data) + # Need to make sure training data is divisible by minibatch size or get warning mentioning accuracy from keras + n_train_data = n_train_data - (n_train_data % args.minibatch) + n_val_data = n_total_data - n_train_data + # n_test_data = n_total_data - (n_train_data + n_val_data) + + if args.verbose: + print "datset loaded" + print "\t%d total samples" % n_total_data + print "\t%d training samples" % n_train_data + print "\t%d validaion samples" % n_val_data + + # ensure output directory is available + if not os.path.exists(args.out_directory): + os.makedirs(args.out_directory) + + # create metadata file and the callback object that will write to it + meta_file = os.path.join(args.out_directory, "metadata.json") + meta_writer = MetadataWriterCallback(meta_file) + # load prior data if it already exists + if os.path.exists(meta_file) and resume: + with open(meta_file, "r") as f: + meta_writer.metadata = json.load(f) + if args.verbose: + print "previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"]) + elif args.verbose: + print "starting with empty metadata" + # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add in anything else we like here + # TODO - model and train_data are saved in meta_file; check that they match (and make args optional when restarting?) + meta_writer.metadata["training_data"] = args.train_data + meta_writer.metadata["model_file"] = args.model + # Record all command line args in a list so that all args are recorded even when training is stopped and resumed. + meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []).append(vars(args)) + + # create ModelCheckpoint to save weights every epoch + checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5") + checkpointer = ModelCheckpoint(checkpoint_template) + + # load precomputed random-shuffle indices or create them + # TODO - save each train/val/test indices separately so there's no danger of + # changing args.train_val_test when resuming + shuffle_file = os.path.join(args.out_directory, "shuffle.npz") + if os.path.exists(shuffle_file) and resume: + with open(shuffle_file, "r") as f: + shuffle_indices = np.load(f) + if args.verbose: + print "loading previous data shuffling indices" + else: + # create shuffled indices + shuffle_indices = np.random.permutation(n_total_data) + with open(shuffle_file, "w") as f: + np.save(f, shuffle_indices) + if args.verbose: + print "created new data shuffling indices" + # training indices are the first consecutive set of shuffled indices, val next, then test gets the remainder + train_indices = shuffle_indices[0:n_train_data] + val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data] + # test_indices = shuffle_indices[n_train_data + n_val_data:] + + symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")] + + # create dataset generators + train_data_generator = shuffled_hdf5_batch_generator( + dataset["states"], + dataset["actions"], + train_indices, + args.minibatch, + symmetries) + val_data_generator = shuffled_hdf5_batch_generator( + dataset["states"], + dataset["actions"], + val_indices, + args.minibatch, + symmetries) + + sgd = SGD(lr=args.learning_rate, decay=args.decay) + model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"]) + + samples_per_epoch = args.epoch_length or n_train_data + + if args.verbose: + print "STARTING TRAINING" + + model.fit_generator( + generator=train_data_generator, + samples_per_epoch=samples_per_epoch, + nb_epoch=args.epochs, + callbacks=[checkpointer, meta_writer], + validation_data=val_data_generator, + nb_val_samples=n_val_data) if __name__ == '__main__': - run_training() + run_training() diff --git a/AlphaGo/util.py b/AlphaGo/util.py index b801a397f..6acb5062d 100644 --- a/AlphaGo/util.py +++ b/AlphaGo/util.py @@ -8,116 +8,116 @@ def flatten_idx(position, size): - (x, y) = position - return x * size + y + (x, y) = position + return x * size + y def unflatten_idx(idx, size): - x, y = divmod(idx, size) - return (x, y) + x, y = divmod(idx, size) + return (x, y) def _parse_sgf_move(node_value): - """Given a well-formed move string, return either PASS_MOVE or the (x, y) position - """ - if node_value == '' or node_value == 'tt': - return go.PASS_MOVE - else: - # GameState expects (x, y) where x is column and y is row - col = LETTERS.index(node_value[0].upper()) - row = LETTERS.index(node_value[1].upper()) - return (col, row) + """Given a well-formed move string, return either PASS_MOVE or the (x, y) position + """ + if node_value == '' or node_value == 'tt': + return go.PASS_MOVE + else: + # GameState expects (x, y) where x is column and y is row + col = LETTERS.index(node_value[0].upper()) + row = LETTERS.index(node_value[1].upper()) + return (col, row) def _sgf_init_gamestate(sgf_root): - """Helper function to set up a GameState object from the root node - of an SGF file - """ - props = sgf_root.properties - s_size = props.get('SZ', ['19'])[0] - s_player = props.get('PL', ['B'])[0] - # init board with specified size - gs = go.GameState(int(s_size)) - # handle 'add black' property - if 'AB' in props: - for stone in props['AB']: - gs.do_move(_parse_sgf_move(stone), go.BLACK) - # handle 'add white' property - if 'AW' in props: - for stone in props['AW']: - gs.do_move(_parse_sgf_move(stone), go.WHITE) - # setup done; set player according to 'PL' property - gs.current_player = go.BLACK if s_player == 'B' else go.WHITE - return gs + """Helper function to set up a GameState object from the root node + of an SGF file + """ + props = sgf_root.properties + s_size = props.get('SZ', ['19'])[0] + s_player = props.get('PL', ['B'])[0] + # init board with specified size + gs = go.GameState(int(s_size)) + # handle 'add black' property + if 'AB' in props: + for stone in props['AB']: + gs.do_move(_parse_sgf_move(stone), go.BLACK) + # handle 'add white' property + if 'AW' in props: + for stone in props['AW']: + gs.do_move(_parse_sgf_move(stone), go.WHITE) + # setup done; set player according to 'PL' property + gs.current_player = go.BLACK if s_player == 'B' else go.WHITE + return gs def sgf_to_gamestate(sgf_string): - """Creates a GameState object from the first game in the given collection - """ - # Don't Repeat Yourself; parsing handled by sgf_iter_states - for (gs, move, player) in sgf_iter_states(sgf_string, True): - pass - # gs has been updated in-place to the final state by the time - # sgf_iter_states returns - return gs + """Creates a GameState object from the first game in the given collection + """ + # Don't Repeat Yourself; parsing handled by sgf_iter_states + for (gs, move, player) in sgf_iter_states(sgf_string, True): + pass + # gs has been updated in-place to the final state by the time + # sgf_iter_states returns + return gs def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown', white_player_name='Unknown', size=19, komi=7.5): - """Creates a simplified sgf for viewing playouts or positions - """ - str_list = [] - # Game info - str_list.append('(;GM[1]FF[4]CA[UTF-8]') - str_list.append('SZ[{}]'.format(size)) - str_list.append('KM[{}]'.format(komi)) - str_list.append('PB[{}]'.format(black_player_name)) - str_list.append('PW[{}]'.format(white_player_name)) - cycle_string = 'BW' - # Handle handicaps - if len(gamestate.handicaps) > 0: - cycle_string = 'WB' - str_list.append('HA[{}]'.format(len(gamestate.handicaps))) - str_list.append(';AB') - for handicap in gamestate.handicaps: - str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), LETTERS[handicap[1]].lower())) - # Move list - for move, color in zip(gamestate.history, itertools.cycle(cycle_string)): - # Move color prefix - str_list.append(';{}'.format(color)) - # Move coordinates - if move is None: - str_list.append('[tt]') - else: - str_list.append('[{}{}]'.format(LETTERS[move[0]].lower(), LETTERS[move[1]].lower())) - str_list.append(')') - with open(os.path.join(path, filename), "w") as f: - f.write(''.join(str_list)) + """Creates a simplified sgf for viewing playouts or positions + """ + str_list = [] + # Game info + str_list.append('(;GM[1]FF[4]CA[UTF-8]') + str_list.append('SZ[{}]'.format(size)) + str_list.append('KM[{}]'.format(komi)) + str_list.append('PB[{}]'.format(black_player_name)) + str_list.append('PW[{}]'.format(white_player_name)) + cycle_string = 'BW' + # Handle handicaps + if len(gamestate.handicaps) > 0: + cycle_string = 'WB' + str_list.append('HA[{}]'.format(len(gamestate.handicaps))) + str_list.append(';AB') + for handicap in gamestate.handicaps: + str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), LETTERS[handicap[1]].lower())) + # Move list + for move, color in zip(gamestate.history, itertools.cycle(cycle_string)): + # Move color prefix + str_list.append(';{}'.format(color)) + # Move coordinates + if move is None: + str_list.append('[tt]') + else: + str_list.append('[{}{}]'.format(LETTERS[move[0]].lower(), LETTERS[move[1]].lower())) + str_list.append(')') + with open(os.path.join(path, filename), "w") as f: + f.write(''.join(str_list)) def sgf_iter_states(sgf_string, include_end=True): - """Iterates over (GameState, move, player) tuples in the first game of the given SGF file. - - Ignores variations - only the main line is returned. - The state object is modified in-place, so don't try to, for example, keep track of it through time - - If include_end is False, the final tuple yielded is the penultimate state, but the state - will still be left in the final position at the end of iteration because 'gs' is modified - in-place the state. See sgf_to_gamestate - """ - collection = sgf.parse(sgf_string) - game = collection[0] - gs = _sgf_init_gamestate(game.root) - if game.rest is not None: - for node in game.rest: - props = node.properties - if 'W' in props: - move = _parse_sgf_move(props['W'][0]) - player = go.WHITE - elif 'B' in props: - move = _parse_sgf_move(props['B'][0]) - player = go.BLACK - yield (gs, move, player) - # update state to n+1 - gs.do_move(move, player) - if include_end: - yield (gs, None, None) + """Iterates over (GameState, move, player) tuples in the first game of the given SGF file. + + Ignores variations - only the main line is returned. + The state object is modified in-place, so don't try to, for example, keep track of it through time + + If include_end is False, the final tuple yielded is the penultimate state, but the state + will still be left in the final position at the end of iteration because 'gs' is modified + in-place the state. See sgf_to_gamestate + """ + collection = sgf.parse(sgf_string) + game = collection[0] + gs = _sgf_init_gamestate(game.root) + if game.rest is not None: + for node in game.rest: + props = node.properties + if 'W' in props: + move = _parse_sgf_move(props['W'][0]) + player = go.WHITE + elif 'B' in props: + move = _parse_sgf_move(props['B'][0]) + player = go.BLACK + yield (gs, move, player) + # update state to n+1 + gs.do_move(move, player) + if include_end: + yield (gs, None, None) diff --git a/benchmarks/preprocessing_benchmark.py b/benchmarks/preprocessing_benchmark.py index 922c681ce..635c0fd55 100644 --- a/benchmarks/preprocessing_benchmark.py +++ b/benchmarks/preprocessing_benchmark.py @@ -9,8 +9,8 @@ def run_convert_game(): - for traindata in gc.convert_game(*args): - pass + for traindata in gc.convert_game(*args): + pass prof.runcall(run_convert_game) prof.dump_stats('bench_results.prof') diff --git a/benchmarks/reinforcement_policy_training_benchmark.py b/benchmarks/reinforcement_policy_training_benchmark.py index c016a42ec..4fc66407c 100644 --- a/benchmarks/reinforcement_policy_training_benchmark.py +++ b/benchmarks/reinforcement_policy_training_benchmark.py @@ -15,9 +15,9 @@ stats_file = os.path.join(datadir, 'reinforcement_policy_trainer.prof') if not os.path.exists(datadir): - os.makedirs(datadir) + os.makedirs(datadir) if not os.path.exists(weights): - policy.model.save_weights(weights) + policy.model.save_weights(weights) policy.save_model(modelfile) profile = Profile() diff --git a/benchmarks/supervised_policy_training_benchmark.py b/benchmarks/supervised_policy_training_benchmark.py index 0293f7af9..d84676611 100644 --- a/benchmarks/supervised_policy_training_benchmark.py +++ b/benchmarks/supervised_policy_training_benchmark.py @@ -14,7 +14,7 @@ def run_supervised_policy_training(): - run_training(*arguments) + run_training(*arguments) profile.runcall(run_supervised_policy_training) profile.dump_stats('supervised_policy_training_bench_results.prof') diff --git a/interface/Play.py b/interface/Play.py index 57d750cc9..58e3fe56d 100644 --- a/interface/Play.py +++ b/interface/Play.py @@ -3,32 +3,32 @@ class play_match(object): - """Interface to handle play between two players.""" - def __init__(self, player1, player2, save_dir=None, size=19): - # super(ClassName, self).__init__() - self.player1 = player1 - self.player2 = player2 - self.state = GameState(size=size) - # I Propose that GameState should take a top-level save directory, - # then automatically generate the specific file name + """Interface to handle play between two players.""" + def __init__(self, player1, player2, save_dir=None, size=19): + # super(ClassName, self).__init__() + self.player1 = player1 + self.player2 = player2 + self.state = GameState(size=size) + # I Propose that GameState should take a top-level save directory, + # then automatically generate the specific file name - def _play(self, player): - move = player.get_move(self.state) - # TODO: Fix is_eye? - self.state.do_move(move) # Return max prob sensible legal move - # self.state.write_to_disk() - if len(self.state.history) > 1: - if self.state.history[-1] is None and self.state.history[-2] is None \ - and self.state.current_player == -1: - end_of_game = True - else: - end_of_game = False - else: - end_of_game = False - return end_of_game + def _play(self, player): + move = player.get_move(self.state) + # TODO: Fix is_eye? + self.state.do_move(move) # Return max prob sensible legal move + # self.state.write_to_disk() + if len(self.state.history) > 1: + if self.state.history[-1] is None and self.state.history[-2] is None \ + and self.state.current_player == -1: + end_of_game = True + else: + end_of_game = False + else: + end_of_game = False + return end_of_game - def play(self): - """Play one turn, update game state, save to disk""" - end_of_game = self._play(self.player1) - # This is incorrect. - return end_of_game + def play(self): + """Play one turn, update game state, save to disk""" + end_of_game = self._play(self.player1) + # This is incorrect. + return end_of_game diff --git a/interface/gtp_wrapper.py b/interface/gtp_wrapper.py index 437aa4ff8..89d35f242 100644 --- a/interface/gtp_wrapper.py +++ b/interface/gtp_wrapper.py @@ -6,148 +6,148 @@ def run_gnugo(sgf_file_name, command): - from distutils import spawn - if spawn.find_executable('gnugo'): - from subprocess import Popen, PIPE - p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], stdout=PIPE, stdin=PIPE, stderr=PIPE) - out_bytes = p.communicate(input=command)[0] - return out_bytes.decode('utf-8')[2:] - else: - return '' + from distutils import spawn + if spawn.find_executable('gnugo'): + from subprocess import Popen, PIPE + p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], stdout=PIPE, stdin=PIPE, stderr=PIPE) + out_bytes = p.communicate(input=command)[0] + return out_bytes.decode('utf-8')[2:] + else: + return '' class ExtendedGtpEngine(gtp.Engine): - recommended_handicaps = { - 2: "D4 Q16", - 3: "D4 Q16 D16", - 4: "D4 Q16 D16 Q4", - 5: "D4 Q16 D16 Q4 K10", - 6: "D4 Q16 D16 Q4 D10 Q10", - 7: "D4 Q16 D16 Q4 D10 Q10 K10", - 8: "D4 Q16 D16 Q4 D10 Q10 K4 K16", - 9: "D4 Q16 D16 Q4 D10 Q10 K4 K16 K10" - } - - def call_gnugo(self, sgf_file_name, command): - try: - pool = multiprocessing.Pool(processes=1) - result = pool.apply_async(run_gnugo, (sgf_file_name, command)) - output = result.get(timeout=10) - pool.close() - return output - except multiprocessing.TimeoutError: - pool.terminate() - # if can't get answer from GnuGo, return no result - return '' - - def cmd_time_left(self, arguments): - pass - - def cmd_place_free_handicap(self, arguments): - try: - number_of_stones = int(arguments) - except Exception: - raise ValueError('Number of handicaps could not be parsed: {}'.format(arguments)) - if number_of_stones < 2 or number_of_stones > 9: - raise ValueError('Invalid number of handicap stones: {}'.format(number_of_stones)) - vertex_string = ExtendedGtpEngine.recommended_handicaps[number_of_stones] - self.cmd_set_free_handicap(vertex_string) - return vertex_string - - def cmd_set_free_handicap(self, arguments): - vertices = arguments.strip().split() - moves = [gtp.parse_vertex(vertex) for vertex in vertices] - self._game.place_handicaps(moves) - - def cmd_final_score(self, arguments): - sgf_file_name = self._game.get_current_state_as_sgf() - return self.call_gnugo(sgf_file_name, 'final_score\n') - - def cmd_final_status_list(self, arguments): - sgf_file_name = self._game.get_current_state_as_sgf() - return self.call_gnugo(sgf_file_name, 'final_status_list {}\n'.format(arguments)) - - def cmd_load_sgf(self, arguments): - pass - - def cmd_save_sgf(self, arguments): - pass - - # def cmd_kgs_genmove_cleanup(self, arguments): - # return self.cmd_genmove(arguments) + recommended_handicaps = { + 2: "D4 Q16", + 3: "D4 Q16 D16", + 4: "D4 Q16 D16 Q4", + 5: "D4 Q16 D16 Q4 K10", + 6: "D4 Q16 D16 Q4 D10 Q10", + 7: "D4 Q16 D16 Q4 D10 Q10 K10", + 8: "D4 Q16 D16 Q4 D10 Q10 K4 K16", + 9: "D4 Q16 D16 Q4 D10 Q10 K4 K16 K10" + } + + def call_gnugo(self, sgf_file_name, command): + try: + pool = multiprocessing.Pool(processes=1) + result = pool.apply_async(run_gnugo, (sgf_file_name, command)) + output = result.get(timeout=10) + pool.close() + return output + except multiprocessing.TimeoutError: + pool.terminate() + # if can't get answer from GnuGo, return no result + return '' + + def cmd_time_left(self, arguments): + pass + + def cmd_place_free_handicap(self, arguments): + try: + number_of_stones = int(arguments) + except Exception: + raise ValueError('Number of handicaps could not be parsed: {}'.format(arguments)) + if number_of_stones < 2 or number_of_stones > 9: + raise ValueError('Invalid number of handicap stones: {}'.format(number_of_stones)) + vertex_string = ExtendedGtpEngine.recommended_handicaps[number_of_stones] + self.cmd_set_free_handicap(vertex_string) + return vertex_string + + def cmd_set_free_handicap(self, arguments): + vertices = arguments.strip().split() + moves = [gtp.parse_vertex(vertex) for vertex in vertices] + self._game.place_handicaps(moves) + + def cmd_final_score(self, arguments): + sgf_file_name = self._game.get_current_state_as_sgf() + return self.call_gnugo(sgf_file_name, 'final_score\n') + + def cmd_final_status_list(self, arguments): + sgf_file_name = self._game.get_current_state_as_sgf() + return self.call_gnugo(sgf_file_name, 'final_status_list {}\n'.format(arguments)) + + def cmd_load_sgf(self, arguments): + pass + + def cmd_save_sgf(self, arguments): + pass + + # def cmd_kgs_genmove_cleanup(self, arguments): + # return self.cmd_genmove(arguments) class GTPGameConnector(object): - """A class implementing the functions of a 'game' object required by the GTP - Engine by wrapping a GameState and Player instance - """ - - def __init__(self, player): - self._state = go.GameState(enforce_superko=True) - self._player = player - - def clear(self): - self._state = go.GameState(self._state.size, enforce_superko=True) - - def make_move(self, color, vertex): - # vertex in GTP language is 1-indexed, whereas GameState's are zero-indexed - try: - if vertex == gtp.PASS: - self._state.do_move(go.PASS_MOVE) - else: - (x, y) = vertex - self._state.do_move((x - 1, y - 1), color) - return True - except go.IllegalMove: - return False - - def set_size(self, n): - self._state = go.GameState(n, enforce_superko=True) - - def set_komi(self, k): - self._state.komi = k - - def get_move(self, color): - self._state.current_player = color - move = self._player.get_move(self._state) - if move == go.PASS_MOVE: - return gtp.PASS - else: - (x, y) = move - return (x + 1, y + 1) - - def get_current_state_as_sgf(self): - from tempfile import NamedTemporaryFile - temp_file = NamedTemporaryFile(delete=False) - save_gamestate_to_sgf(self._state, '', temp_file.name) - return temp_file.name - - def place_handicaps(self, vertices): - actions = [] - for vertex in vertices: - (x, y) = vertex - actions.append((x - 1, y - 1)) - self._state.place_handicaps(actions) + """A class implementing the functions of a 'game' object required by the GTP + Engine by wrapping a GameState and Player instance + """ + + def __init__(self, player): + self._state = go.GameState(enforce_superko=True) + self._player = player + + def clear(self): + self._state = go.GameState(self._state.size, enforce_superko=True) + + def make_move(self, color, vertex): + # vertex in GTP language is 1-indexed, whereas GameState's are zero-indexed + try: + if vertex == gtp.PASS: + self._state.do_move(go.PASS_MOVE) + else: + (x, y) = vertex + self._state.do_move((x - 1, y - 1), color) + return True + except go.IllegalMove: + return False + + def set_size(self, n): + self._state = go.GameState(n, enforce_superko=True) + + def set_komi(self, k): + self._state.komi = k + + def get_move(self, color): + self._state.current_player = color + move = self._player.get_move(self._state) + if move == go.PASS_MOVE: + return gtp.PASS + else: + (x, y) = move + return (x + 1, y + 1) + + def get_current_state_as_sgf(self): + from tempfile import NamedTemporaryFile + temp_file = NamedTemporaryFile(delete=False) + save_gamestate_to_sgf(self._state, '', temp_file.name) + return temp_file.name + + def place_handicaps(self, vertices): + actions = [] + for vertex in vertices: + (x, y) = vertex + actions.append((x - 1, y - 1)) + self._state.place_handicaps(actions) def run_gtp(player_obj, inpt_fn=None, name="Gtp Player", version="0.0"): - gtp_game = GTPGameConnector(player_obj) - gtp_engine = ExtendedGtpEngine(gtp_game, name, version) - if inpt_fn is None: - inpt_fn = raw_input - - sys.stderr.write("GTP engine ready\n") - sys.stderr.flush() - while not gtp_engine.disconnect: - inpt = inpt_fn() - # handle either single lines at a time - # or multiple commands separated by '\n' - try: - cmd_list = inpt.split("\n") - except: - cmd_list = [inpt] - for cmd in cmd_list: - engine_reply = gtp_engine.send(cmd) - sys.stdout.write(engine_reply) - sys.stdout.flush() + gtp_game = GTPGameConnector(player_obj) + gtp_engine = ExtendedGtpEngine(gtp_game, name, version) + if inpt_fn is None: + inpt_fn = raw_input + + sys.stderr.write("GTP engine ready\n") + sys.stderr.flush() + while not gtp_engine.disconnect: + inpt = inpt_fn() + # handle either single lines at a time + # or multiple commands separated by '\n' + try: + cmd_list = inpt.split("\n") + except: + cmd_list = [inpt] + for cmd in cmd_list: + engine_reply = gtp_engine.send(cmd) + sys.stdout.write(engine_reply) + sys.stdout.flush() diff --git a/tests/test_game_converter.py b/tests/test_game_converter.py index 0d75c11e5..e0f4963f7 100644 --- a/tests/test_game_converter.py +++ b/tests/test_game_converter.py @@ -5,22 +5,22 @@ class TestSGFLoading(unittest.TestCase): - def test_ab_aw(self): - with open('tests/test_data/sgf/ab_aw.sgf', 'r') as f: - sgf_to_gamestate(f.read()) + def test_ab_aw(self): + with open('tests/test_data/sgf/ab_aw.sgf', 'r') as f: + sgf_to_gamestate(f.read()) class TestCmdlineConverter(unittest.TestCase): - def test_directory_conversion(self): - args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data/sgf/'] - run_game_converter(args) - os.remove('.tmp.testing.h5') + def test_directory_conversion(self): + args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data/sgf/'] + run_game_converter(args) + os.remove('.tmp.testing.h5') - def test_directory_walk(self): - args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data', '--recurse'] - run_game_converter(args) - os.remove('.tmp.testing.h5') + def test_directory_walk(self): + args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data', '--recurse'] + run_game_converter(args) + os.remove('.tmp.testing.h5') if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_gamestate.py b/tests/test_gamestate.py index 9f3e9a7f9..f1f28818b 100644 --- a/tests/test_gamestate.py +++ b/tests/test_gamestate.py @@ -6,159 +6,159 @@ class TestKo(unittest.TestCase): - def test_standard_ko(self): - gs = GameState(size=9) - gs.do_move((1, 0)) # B - gs.do_move((2, 0)) # W - gs.do_move((0, 1)) # B - gs.do_move((3, 1)) # W - gs.do_move((1, 2)) # B - gs.do_move((2, 2)) # W - gs.do_move((2, 1)) # B - - gs.do_move((1, 1)) # W trigger capture and ko - - self.assertEqual(gs.num_black_prisoners, 1) - self.assertEqual(gs.num_white_prisoners, 0) - - self.assertFalse(gs.is_legal((2, 1))) - - gs.do_move((5, 5)) - gs.do_move((5, 6)) - - self.assertTrue(gs.is_legal((2, 1))) - - def test_snapback_is_not_ko(self): - gs = GameState(size=5) - # B o W B . - # W W B . . - # . . . . . - # . . . . . - # . . . . . - # here, imagine black plays at 'o' capturing - # the white stone at (2, 0). White may play - # again at (2, 0) to capture the black stones - # at (0, 0), (1, 0). this is 'snapback' not 'ko' - # since it doesn't return the game to a - # previous position - B = [(0, 0), (2, 1), (3, 0)] - W = [(0, 1), (1, 1), (2, 0)] - for (b, w) in zip(B, W): - gs.do_move(b) - gs.do_move(w) - # do the capture of the single white stone - gs.do_move((1, 0)) - # there should be no ko - self.assertIsNone(gs.ko) - self.assertTrue(gs.is_legal((2, 0))) - # now play the snapback - gs.do_move((2, 0)) - # check that the numbers worked out - self.assertEqual(gs.num_black_prisoners, 2) - self.assertEqual(gs.num_white_prisoners, 1) - - def test_positional_superko(self): - move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)] - - gs = GameState(size=9) - for move in move_list: - gs.do_move(move) - self.assertTrue(gs.is_legal((1, 0))) - - gs = GameState(size=9, enforce_superko=True) - for move in move_list: - gs.do_move(move) - self.assertFalse(gs.is_legal((1, 0))) + def test_standard_ko(self): + gs = GameState(size=9) + gs.do_move((1, 0)) # B + gs.do_move((2, 0)) # W + gs.do_move((0, 1)) # B + gs.do_move((3, 1)) # W + gs.do_move((1, 2)) # B + gs.do_move((2, 2)) # W + gs.do_move((2, 1)) # B + + gs.do_move((1, 1)) # W trigger capture and ko + + self.assertEqual(gs.num_black_prisoners, 1) + self.assertEqual(gs.num_white_prisoners, 0) + + self.assertFalse(gs.is_legal((2, 1))) + + gs.do_move((5, 5)) + gs.do_move((5, 6)) + + self.assertTrue(gs.is_legal((2, 1))) + + def test_snapback_is_not_ko(self): + gs = GameState(size=5) + # B o W B . + # W W B . . + # . . . . . + # . . . . . + # . . . . . + # here, imagine black plays at 'o' capturing + # the white stone at (2, 0). White may play + # again at (2, 0) to capture the black stones + # at (0, 0), (1, 0). this is 'snapback' not 'ko' + # since it doesn't return the game to a + # previous position + B = [(0, 0), (2, 1), (3, 0)] + W = [(0, 1), (1, 1), (2, 0)] + for (b, w) in zip(B, W): + gs.do_move(b) + gs.do_move(w) + # do the capture of the single white stone + gs.do_move((1, 0)) + # there should be no ko + self.assertIsNone(gs.ko) + self.assertTrue(gs.is_legal((2, 0))) + # now play the snapback + gs.do_move((2, 0)) + # check that the numbers worked out + self.assertEqual(gs.num_black_prisoners, 2) + self.assertEqual(gs.num_white_prisoners, 1) + + def test_positional_superko(self): + move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)] + + gs = GameState(size=9) + for move in move_list: + gs.do_move(move) + self.assertTrue(gs.is_legal((1, 0))) + + gs = GameState(size=9, enforce_superko=True) + for move in move_list: + gs.do_move(move) + self.assertFalse(gs.is_legal((1, 0))) class TestEye(unittest.TestCase): - def test_simple_eye(self): - - # create a black eye in top left (1, 1), white in bottom right (5, 5) - - gs = GameState(size=7) - gs.do_move((1, 0)) # B - gs.do_move((5, 4)) # W - gs.do_move((2, 1)) # B - gs.do_move((6, 5)) # W - gs.do_move((1, 2)) # B - gs.do_move((5, 6)) # W - gs.do_move((0, 1)) # B - gs.do_move((4, 5)) # W - - # test black eye top left - self.assertTrue(gs.is_eyeish((1, 1), go.BLACK)) - self.assertFalse(gs.is_eyeish((1, 1), go.WHITE)) - - # test white eye bottom right - self.assertTrue(gs.is_eyeish((5, 5), go.WHITE)) - self.assertFalse(gs.is_eyeish((5, 5), go.BLACK)) - - # test no eye in other random positions - self.assertFalse(gs.is_eyeish((1, 0), go.BLACK)) - self.assertFalse(gs.is_eyeish((1, 0), go.WHITE)) - self.assertFalse(gs.is_eyeish((2, 2), go.BLACK)) - self.assertFalse(gs.is_eyeish((2, 2), go.WHITE)) - - def test_true_eye(self): - gs = GameState(size=7) - gs.do_move((1, 0), go.BLACK) - gs.do_move((0, 1), go.BLACK) - - # false eye at 0, 0 - self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) - self.assertFalse(gs.is_eye((0, 0), go.BLACK)) - - # make it a true eye by turning the corner (1, 1) into an eye itself - gs.do_move((1, 2), go.BLACK) - gs.do_move((2, 1), go.BLACK) - gs.do_move((2, 2), go.BLACK) - gs.do_move((0, 2), go.BLACK) - - self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) - self.assertTrue(gs.is_eye((0, 0), go.BLACK)) - self.assertTrue(gs.is_eye((1, 1), go.BLACK)) - - def test_eye_recursion(self): - # a checkerboard pattern of black is 'technically' all true eyes - # mutually supporting each other - gs = GameState(7) - for x in range(gs.size): - for y in range(gs.size): - if (x + y) % 2 == 1: - gs.do_move((x, y), go.BLACK) - self.assertTrue(gs.is_eye((0, 0), go.BLACK)) - - def test_liberties_after_capture(self): - # creates 3x3 black group in the middle, that is then all captured - # ...then an assertion is made that the resulting liberties after - # capture are the same as if the group had never been there - gs_capture = GameState(7) - gs_reference = GameState(7) - # add in 3x3 black stones - for x in range(2, 5): - for y in range(2, 5): - gs_capture.do_move((x, y), go.BLACK) - # surround the black group with white stones - # and set the same white stones in gs_reference - for x in range(2, 5): - gs_capture.do_move((x, 1), go.WHITE) - gs_capture.do_move((x, 5), go.WHITE) - gs_reference.do_move((x, 1), go.WHITE) - gs_reference.do_move((x, 5), go.WHITE) - gs_capture.do_move((1, 1), go.WHITE) - gs_reference.do_move((1, 1), go.WHITE) - for y in range(2, 5): - gs_capture.do_move((1, y), go.WHITE) - gs_capture.do_move((5, y), go.WHITE) - gs_reference.do_move((1, y), go.WHITE) - gs_reference.do_move((5, y), go.WHITE) - - # board configuration and liberties of gs_capture and of gs_reference should be identical - self.assertTrue(np.all(gs_reference.board == gs_capture.board)) - self.assertTrue(np.all(gs_reference.liberty_counts == gs_capture.liberty_counts)) + def test_simple_eye(self): + + # create a black eye in top left (1, 1), white in bottom right (5, 5) + + gs = GameState(size=7) + gs.do_move((1, 0)) # B + gs.do_move((5, 4)) # W + gs.do_move((2, 1)) # B + gs.do_move((6, 5)) # W + gs.do_move((1, 2)) # B + gs.do_move((5, 6)) # W + gs.do_move((0, 1)) # B + gs.do_move((4, 5)) # W + + # test black eye top left + self.assertTrue(gs.is_eyeish((1, 1), go.BLACK)) + self.assertFalse(gs.is_eyeish((1, 1), go.WHITE)) + + # test white eye bottom right + self.assertTrue(gs.is_eyeish((5, 5), go.WHITE)) + self.assertFalse(gs.is_eyeish((5, 5), go.BLACK)) + + # test no eye in other random positions + self.assertFalse(gs.is_eyeish((1, 0), go.BLACK)) + self.assertFalse(gs.is_eyeish((1, 0), go.WHITE)) + self.assertFalse(gs.is_eyeish((2, 2), go.BLACK)) + self.assertFalse(gs.is_eyeish((2, 2), go.WHITE)) + + def test_true_eye(self): + gs = GameState(size=7) + gs.do_move((1, 0), go.BLACK) + gs.do_move((0, 1), go.BLACK) + + # false eye at 0, 0 + self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) + self.assertFalse(gs.is_eye((0, 0), go.BLACK)) + + # make it a true eye by turning the corner (1, 1) into an eye itself + gs.do_move((1, 2), go.BLACK) + gs.do_move((2, 1), go.BLACK) + gs.do_move((2, 2), go.BLACK) + gs.do_move((0, 2), go.BLACK) + + self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) + self.assertTrue(gs.is_eye((0, 0), go.BLACK)) + self.assertTrue(gs.is_eye((1, 1), go.BLACK)) + + def test_eye_recursion(self): + # a checkerboard pattern of black is 'technically' all true eyes + # mutually supporting each other + gs = GameState(7) + for x in range(gs.size): + for y in range(gs.size): + if (x + y) % 2 == 1: + gs.do_move((x, y), go.BLACK) + self.assertTrue(gs.is_eye((0, 0), go.BLACK)) + + def test_liberties_after_capture(self): + # creates 3x3 black group in the middle, that is then all captured + # ...then an assertion is made that the resulting liberties after + # capture are the same as if the group had never been there + gs_capture = GameState(7) + gs_reference = GameState(7) + # add in 3x3 black stones + for x in range(2, 5): + for y in range(2, 5): + gs_capture.do_move((x, y), go.BLACK) + # surround the black group with white stones + # and set the same white stones in gs_reference + for x in range(2, 5): + gs_capture.do_move((x, 1), go.WHITE) + gs_capture.do_move((x, 5), go.WHITE) + gs_reference.do_move((x, 1), go.WHITE) + gs_reference.do_move((x, 5), go.WHITE) + gs_capture.do_move((1, 1), go.WHITE) + gs_reference.do_move((1, 1), go.WHITE) + for y in range(2, 5): + gs_capture.do_move((1, y), go.WHITE) + gs_capture.do_move((5, y), go.WHITE) + gs_reference.do_move((1, y), go.WHITE) + gs_reference.do_move((5, y), go.WHITE) + + # board configuration and liberties of gs_capture and of gs_reference should be identical + self.assertTrue(np.all(gs_reference.board == gs_capture.board)) + self.assertTrue(np.all(gs_reference.liberty_counts == gs_capture.liberty_counts)) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_gtp_wrapper.py b/tests/test_gtp_wrapper.py index 5ab4ddb7f..345b09afa 100644 --- a/tests/test_gtp_wrapper.py +++ b/tests/test_gtp_wrapper.py @@ -5,26 +5,26 @@ class PassPlayer(object): - def get_move(self, state): - return go.PASS_MOVE + def get_move(self, state): + return go.PASS_MOVE class TestGTPProcess(unittest.TestCase): - def test_run_commands(self): - def stdin_simulator(): - return "\n".join([ - "1 name", - "2 boardsize 19", - "3 clear_board", - "4 genmove black", - "5 genmove white", - "99 quit"]) + def test_run_commands(self): + def stdin_simulator(): + return "\n".join([ + "1 name", + "2 boardsize 19", + "3 clear_board", + "4 genmove black", + "5 genmove white", + "99 quit"]) - gtp_proc = Process(target=run_gtp, args=(PassPlayer(), stdin_simulator)) - gtp_proc.start() - gtp_proc.join(timeout=1) + gtp_proc = Process(target=run_gtp, args=(PassPlayer(), stdin_simulator)) + gtp_proc.start() + gtp_proc.join(timeout=1) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_liberties.py b/tests/test_liberties.py index 4ea0bb728..3bcca7a6b 100644 --- a/tests/test_liberties.py +++ b/tests/test_liberties.py @@ -4,40 +4,40 @@ class TestLiberties(unittest.TestCase): - def setUp(self): - self.s = GameState() - self.s.do_move((4, 5)) - self.s.do_move((5, 5)) - self.s.do_move((5, 6)) - self.s.do_move((10, 10)) - self.s.do_move((4, 6)) - self.s.do_move((10, 11)) - self.s.do_move((6, 6)) - self.s.do_move((9, 10)) - - def test_curr_liberties(self): - self.assertEqual(self.s.liberty_counts[5][5], 2) - self.assertEqual(self.s.liberty_counts[4][5], 8) - self.assertEqual(self.s.liberty_counts[5][6], 8) - - def test_neighbors_edge_cases(self): - - st = GameState() - st.do_move((0, 0)) # B B . . . . . - st.do_move((5, 5)) # B W . . . . . - st.do_move((0, 1)) # . . . . . . . - st.do_move((6, 6)) # . . . . . . . - st.do_move((1, 0)) # . . . . . W . - st.do_move((1, 1)) # . . . . . . W - - # get_group in the corner - self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner") - - # get_group of an empty space - self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space") - - # get_group of a single piece - self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece") + def setUp(self): + self.s = GameState() + self.s.do_move((4, 5)) + self.s.do_move((5, 5)) + self.s.do_move((5, 6)) + self.s.do_move((10, 10)) + self.s.do_move((4, 6)) + self.s.do_move((10, 11)) + self.s.do_move((6, 6)) + self.s.do_move((9, 10)) + + def test_curr_liberties(self): + self.assertEqual(self.s.liberty_counts[5][5], 2) + self.assertEqual(self.s.liberty_counts[4][5], 8) + self.assertEqual(self.s.liberty_counts[5][6], 8) + + def test_neighbors_edge_cases(self): + + st = GameState() + st.do_move((0, 0)) # B B . . . . . + st.do_move((5, 5)) # B W . . . . . + st.do_move((0, 1)) # . . . . . . . + st.do_move((6, 6)) # . . . . . . . + st.do_move((1, 0)) # . . . . . W . + st.do_move((1, 1)) # . . . . . . W + + # get_group in the corner + self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner") + + # get_group of an empty space + self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space") + + # get_group of a single piece + self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece") if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_mcts.py b/tests/test_mcts.py index c6ab6f803..3537515a9 100644 --- a/tests/test_mcts.py +++ b/tests/test_mcts.py @@ -6,109 +6,109 @@ class TestTreeNode(unittest.TestCase): - def setUp(self): - self.gs = GameState() - self.node = TreeNode(None, 1.0) - - def test_selection(self): - self.node.expand(dummy_policy(self.gs)) - action, next_node = self.node.select() - self.assertEqual(action, (18, 18)) # according to the dummy policy below - self.assertIsNotNone(next_node) - - def test_expansion(self): - self.assertEqual(0, len(self.node._children)) - self.node.expand(dummy_policy(self.gs)) - self.assertEqual(19 * 19, len(self.node._children)) - for a, p in dummy_policy(self.gs): - self.assertEqual(p, self.node._children[a]._P) - - def test_update(self): - self.node.expand(dummy_policy(self.gs)) - child = self.node._children[(18, 18)] - # Note: the root must be updated first for the visit count to work. - self.node.update(leaf_value=1.0, c_puct=5.0) - child.update(leaf_value=1.0, c_puct=5.0) - expected_score = 1.0 + 5.0 * dummy_distribution[-1] * 0.5 - self.assertEqual(expected_score, child.get_value()) - # After a second update, the Q value should be the average of the two, and the u value - # should be multiplied by sqrt(parent visits) / (node visits + 1) (which was simply equal - # to 0.5 before) - self.node.update(leaf_value=0.0, c_puct=5.0) - child.update(leaf_value=0.0, c_puct=5.0) - expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0 - self.assertEqual(expected_score, child.get_value()) - - def test_update_recursive(self): - # Assertions are identical to test_treenode_update. - self.node.expand(dummy_policy(self.gs)) - child = self.node._children[(18, 18)] - child.update_recursive(leaf_value=1.0, c_puct=5.0) - expected_score = 1.0 + 5.0 * dummy_distribution[-1] / 2.0 - self.assertEqual(expected_score, child.get_value()) - child.update_recursive(leaf_value=0.0, c_puct=5.0) - expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0 - self.assertEqual(expected_score, child.get_value()) + def setUp(self): + self.gs = GameState() + self.node = TreeNode(None, 1.0) + + def test_selection(self): + self.node.expand(dummy_policy(self.gs)) + action, next_node = self.node.select() + self.assertEqual(action, (18, 18)) # according to the dummy policy below + self.assertIsNotNone(next_node) + + def test_expansion(self): + self.assertEqual(0, len(self.node._children)) + self.node.expand(dummy_policy(self.gs)) + self.assertEqual(19 * 19, len(self.node._children)) + for a, p in dummy_policy(self.gs): + self.assertEqual(p, self.node._children[a]._P) + + def test_update(self): + self.node.expand(dummy_policy(self.gs)) + child = self.node._children[(18, 18)] + # Note: the root must be updated first for the visit count to work. + self.node.update(leaf_value=1.0, c_puct=5.0) + child.update(leaf_value=1.0, c_puct=5.0) + expected_score = 1.0 + 5.0 * dummy_distribution[-1] * 0.5 + self.assertEqual(expected_score, child.get_value()) + # After a second update, the Q value should be the average of the two, and the u value + # should be multiplied by sqrt(parent visits) / (node visits + 1) (which was simply equal + # to 0.5 before) + self.node.update(leaf_value=0.0, c_puct=5.0) + child.update(leaf_value=0.0, c_puct=5.0) + expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0 + self.assertEqual(expected_score, child.get_value()) + + def test_update_recursive(self): + # Assertions are identical to test_treenode_update. + self.node.expand(dummy_policy(self.gs)) + child = self.node._children[(18, 18)] + child.update_recursive(leaf_value=1.0, c_puct=5.0) + expected_score = 1.0 + 5.0 * dummy_distribution[-1] / 2.0 + self.assertEqual(expected_score, child.get_value()) + child.update_recursive(leaf_value=0.0, c_puct=5.0) + expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0 + self.assertEqual(expected_score, child.get_value()) class TestMCTS(unittest.TestCase): - def setUp(self): - self.gs = GameState() - self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2) - - def _count_expansions(self): - """Helper function to count the number of expansions past the root using the dummy policy - """ - node = self.mcts._root - expansions = 0 - # Loop over actions in decreasing probability. - for action, _ in sorted(dummy_policy(self.gs), key=lambda (a, p): p, reverse=True): - if action in node._children: - expansions += 1 - node = node._children[action] - else: - break - return expansions - - def test_playout(self): - self.mcts._playout(self.gs.copy(), 8) - # Assert that the most likely child was visited (according to the dummy policy below). - self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) - # Assert that the search depth expanded nodes 8 times. - self.assertEqual(8, self._count_expansions()) - - def test_playout_with_pass(self): - # Test that playout handles the end of the game (i.e. passing/no moves). Mock this by - # creating a policy that returns nothing after 4 moves. - def stop_early_policy(state): - if len(state.history) <= 4: - return dummy_policy(state) - else: - return [] - self.mcts = MCTS(dummy_value, stop_early_policy, stop_early_policy, n_playout=2) - self.mcts._playout(self.gs.copy(), 8) - # Assert that (18, 18) and (18, 17) are still only visited once. - self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) - # Assert that no expansions happened after reaching the "end" in 4 moves. - self.assertEqual(5, self._count_expansions()) - - def test_get_move(self): - move = self.mcts.get_move(self.gs) - self.mcts.update_with_move(move) - # success if no errors - - def test_update_with_move(self): - move = self.mcts.get_move(self.gs) - self.gs.do_move(move) - self.mcts.update_with_move(move) - # Assert that the new root still has children. - self.assertTrue(len(self.mcts._root._children) > 0) - # Assert that the new root has no parent (the rest of the tree will be garbage collected). - self.assertIsNone(self.mcts._root._parent) - # Assert that the next best move according to the root is (18, 17), according to the - # dummy policy below. - self.assertEqual((18, 17), self.mcts._root.select()[0]) + def setUp(self): + self.gs = GameState() + self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2) + + def _count_expansions(self): + """Helper function to count the number of expansions past the root using the dummy policy + """ + node = self.mcts._root + expansions = 0 + # Loop over actions in decreasing probability. + for action, _ in sorted(dummy_policy(self.gs), key=lambda (a, p): p, reverse=True): + if action in node._children: + expansions += 1 + node = node._children[action] + else: + break + return expansions + + def test_playout(self): + self.mcts._playout(self.gs.copy(), 8) + # Assert that the most likely child was visited (according to the dummy policy below). + self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) + # Assert that the search depth expanded nodes 8 times. + self.assertEqual(8, self._count_expansions()) + + def test_playout_with_pass(self): + # Test that playout handles the end of the game (i.e. passing/no moves). Mock this by + # creating a policy that returns nothing after 4 moves. + def stop_early_policy(state): + if len(state.history) <= 4: + return dummy_policy(state) + else: + return [] + self.mcts = MCTS(dummy_value, stop_early_policy, stop_early_policy, n_playout=2) + self.mcts._playout(self.gs.copy(), 8) + # Assert that (18, 18) and (18, 17) are still only visited once. + self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) + # Assert that no expansions happened after reaching the "end" in 4 moves. + self.assertEqual(5, self._count_expansions()) + + def test_get_move(self): + move = self.mcts.get_move(self.gs) + self.mcts.update_with_move(move) + # success if no errors + + def test_update_with_move(self): + move = self.mcts.get_move(self.gs) + self.gs.do_move(move) + self.mcts.update_with_move(move) + # Assert that the new root still has children. + self.assertTrue(len(self.mcts._root._children) > 0) + # Assert that the new root has no parent (the rest of the tree will be garbage collected). + self.assertIsNone(self.mcts._root._parent) + # Assert that the next best move according to the root is (18, 17), according to the + # dummy policy below. + self.assertEqual((18, 17), self.mcts._root.select()[0]) # A distribution over positions that is smallest at (0,0) and largest at (18,18) @@ -117,17 +117,17 @@ def test_update_with_move(self): def dummy_policy(state): - moves = state.get_legal_moves(include_eyes=False) - return zip(moves, dummy_distribution) + moves = state.get_legal_moves(include_eyes=False) + return zip(moves, dummy_distribution) # Rollout is a clone of the policy function. dummy_rollout = dummy_policy def dummy_value(state): - # it's not very confident - return 0.0 + # it's not very confident + return 0.0 if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_policy.py b/tests/test_policy.py index ddca1de6a..9494f4765 100644 --- a/tests/test_policy.py +++ b/tests/test_policy.py @@ -9,143 +9,143 @@ class TestCNNPolicy(unittest.TestCase): - def test_default_policy(self): - policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) - policy.eval_state(GameState()) - # just hope nothing breaks + def test_default_policy(self): + policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) + policy.eval_state(GameState()) + # just hope nothing breaks - def test_batch_eval_state(self): - policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) - results = policy.batch_eval_state([GameState(), GameState()]) - self.assertEqual(len(results), 2) # one result per GameState - self.assertEqual(len(results[0]), 361) # each one has 361 (move,prob) pairs + def test_batch_eval_state(self): + policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) + results = policy.batch_eval_state([GameState(), GameState()]) + self.assertEqual(len(results), 2) # one result per GameState + self.assertEqual(len(results[0]), 361) # each one has 361 (move,prob) pairs - def test_output_size(self): - policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19) - output = policy19.forward(policy19.preprocessor.state_to_tensor(GameState(19))) - self.assertEqual(output.shape, (1, 19 * 19)) + def test_output_size(self): + policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19) + output = policy19.forward(policy19.preprocessor.state_to_tensor(GameState(19))) + self.assertEqual(output.shape, (1, 19 * 19)) - policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13) - output = policy13.forward(policy13.preprocessor.state_to_tensor(GameState(13))) - self.assertEqual(output.shape, (1, 13 * 13)) + policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13) + output = policy13.forward(policy13.preprocessor.state_to_tensor(GameState(13))) + self.assertEqual(output.shape, (1, 13 * 13)) - def test_save_load(self): - policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) + def test_save_load(self): + policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) - model_file = 'TESTPOLICY.json' - weights_file = 'TESTWEIGHTS.h5' - model_file2 = 'TESTPOLICY2.json' - weights_file2 = 'TESTWEIGHTS2.h5' + model_file = 'TESTPOLICY.json' + weights_file = 'TESTWEIGHTS.h5' + model_file2 = 'TESTPOLICY2.json' + weights_file2 = 'TESTWEIGHTS2.h5' - # test saving model/weights separately - policy.save_model(model_file) - policy.model.save_weights(weights_file, overwrite=True) - # test saving them together - policy.save_model(model_file2, weights_file2) + # test saving model/weights separately + policy.save_model(model_file) + policy.model.save_weights(weights_file, overwrite=True) + # test saving them together + policy.save_model(model_file2, weights_file2) - copypolicy = CNNPolicy.load_model(model_file) - copypolicy.model.load_weights(weights_file) + copypolicy = CNNPolicy.load_model(model_file) + copypolicy.model.load_weights(weights_file) - copypolicy2 = CNNPolicy.load_model(model_file2) + copypolicy2 = CNNPolicy.load_model(model_file2) - for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()): - self.assertTrue(np.all(w1 == w2)) + for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()): + self.assertTrue(np.all(w1 == w2)) - os.remove(model_file) - os.remove(weights_file) - os.remove(model_file2) - os.remove(weights_file2) + os.remove(model_file) + os.remove(weights_file) + os.remove(model_file2) + os.remove(weights_file2) class TestResnetPolicy(unittest.TestCase): - def test_default_policy(self): - policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) - policy.eval_state(GameState()) - # just hope nothing breaks + def test_default_policy(self): + policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) + policy.eval_state(GameState()) + # just hope nothing breaks - def test_batch_eval_state(self): - policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) - results = policy.batch_eval_state([GameState(), GameState()]) - self.assertEqual(len(results), 2) # one result per GameState - self.assertEqual(len(results[0]), 361) # each one has 361 (move,prob) pairs + def test_batch_eval_state(self): + policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) + results = policy.batch_eval_state([GameState(), GameState()]) + self.assertEqual(len(results), 2) # one result per GameState + self.assertEqual(len(results[0]), 361) # each one has 361 (move,prob) pairs - def test_save_load(self): - """Identical to above test_save_load - """ - policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) + def test_save_load(self): + """Identical to above test_save_load + """ + policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"]) - model_file = 'TESTPOLICY.json' - weights_file = 'TESTWEIGHTS.h5' - model_file2 = 'TESTPOLICY2.json' - weights_file2 = 'TESTWEIGHTS2.h5' + model_file = 'TESTPOLICY.json' + weights_file = 'TESTWEIGHTS.h5' + model_file2 = 'TESTPOLICY2.json' + weights_file2 = 'TESTWEIGHTS2.h5' - # test saving model/weights separately - policy.save_model(model_file) - policy.model.save_weights(weights_file, overwrite=True) - # test saving them together - policy.save_model(model_file2, weights_file2) + # test saving model/weights separately + policy.save_model(model_file) + policy.model.save_weights(weights_file, overwrite=True) + # test saving them together + policy.save_model(model_file2, weights_file2) - copypolicy = ResnetPolicy.load_model(model_file) - copypolicy.model.load_weights(weights_file) + copypolicy = ResnetPolicy.load_model(model_file) + copypolicy.model.load_weights(weights_file) - copypolicy2 = ResnetPolicy.load_model(model_file2) + copypolicy2 = ResnetPolicy.load_model(model_file2) - for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()): - self.assertTrue(np.all(w1 == w2)) + for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()): + self.assertTrue(np.all(w1 == w2)) - # check that save/load keeps the ResnetPolicy class - self.assertTrue(type(policy) == type(copypolicy)) + # check that save/load keeps the ResnetPolicy class + self.assertTrue(type(policy) == type(copypolicy)) - os.remove(model_file) - os.remove(weights_file) - os.remove(model_file2) - os.remove(weights_file2) + os.remove(model_file) + os.remove(weights_file) + os.remove(model_file2) + os.remove(weights_file2) class TestPlayers(unittest.TestCase): - def test_greedy_player(self): - gs = GameState() - policy = CNNPolicy(["board", "ones", "turns_since"]) - player = GreedyPolicyPlayer(policy) - for i in range(20): - move = player.get_move(gs) - self.assertIsNotNone(move) - gs.do_move(move) - - def test_probabilistic_player(self): - gs = GameState() - policy = CNNPolicy(["board", "ones", "turns_since"]) - player = ProbabilisticPolicyPlayer(policy) - for i in range(20): - move = player.get_move(gs) - self.assertIsNotNone(move) - gs.do_move(move) - - def test_sensible_probabilistic(self): - gs = GameState() - policy = CNNPolicy(["board", "ones", "turns_since"]) - player = ProbabilisticPolicyPlayer(policy) - empty = (10, 10) - for x in range(19): - for y in range(19): - if (x, y) != empty: - gs.do_move((x, y), go.BLACK) - gs.current_player = go.BLACK - self.assertIsNone(player.get_move(gs)) - - def test_sensible_greedy(self): - gs = GameState() - policy = CNNPolicy(["board", "ones", "turns_since"]) - player = GreedyPolicyPlayer(policy) - empty = (10, 10) - for x in range(19): - for y in range(19): - if (x, y) != empty: - gs.do_move((x, y), go.BLACK) - gs.current_player = go.BLACK - self.assertIsNone(player.get_move(gs)) + def test_greedy_player(self): + gs = GameState() + policy = CNNPolicy(["board", "ones", "turns_since"]) + player = GreedyPolicyPlayer(policy) + for i in range(20): + move = player.get_move(gs) + self.assertIsNotNone(move) + gs.do_move(move) + + def test_probabilistic_player(self): + gs = GameState() + policy = CNNPolicy(["board", "ones", "turns_since"]) + player = ProbabilisticPolicyPlayer(policy) + for i in range(20): + move = player.get_move(gs) + self.assertIsNotNone(move) + gs.do_move(move) + + def test_sensible_probabilistic(self): + gs = GameState() + policy = CNNPolicy(["board", "ones", "turns_since"]) + player = ProbabilisticPolicyPlayer(policy) + empty = (10, 10) + for x in range(19): + for y in range(19): + if (x, y) != empty: + gs.do_move((x, y), go.BLACK) + gs.current_player = go.BLACK + self.assertIsNone(player.get_move(gs)) + + def test_sensible_greedy(self): + gs = GameState() + policy = CNNPolicy(["board", "ones", "turns_since"]) + player = GreedyPolicyPlayer(policy) + empty = (10, 10) + for x in range(19): + for y in range(19): + if (x, y) != empty: + gs.do_move((x, y), go.BLACK) + gs.current_player = go.BLACK + self.assertIsNone(player.get_move(gs)) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 509abeb26..a21e1851d 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -5,332 +5,332 @@ def simple_board(): - # make a tiny board for the sake of testing and hand-coding expected results - # - # X - # 0 1 2 3 4 5 6 - # B W . . . . . 0 - # B W . . . . . 1 - # B . . . B . . 2 - # Y . . . B k B . 3 - # . . . W B W . 4 - # . . . . W . . 5 - # . . . . . . . 6 - # - # where k is a ko position (white was just captured) - - gs = go.GameState(size=7) - - # ladder-looking thing in the top-left - gs.do_move((0, 0)) # B - gs.do_move((1, 0)) # W - gs.do_move((0, 1)) # B - gs.do_move((1, 1)) # W - gs.do_move((0, 2)) # B - - # ko position in the middle - gs.do_move((3, 4)) # W - gs.do_move((3, 3)) # B - gs.do_move((4, 5)) # W - gs.do_move((4, 2)) # B - gs.do_move((5, 4)) # W - gs.do_move((5, 3)) # B - gs.do_move((4, 3)) # W - the ko position - gs.do_move((4, 4)) # B - does the capture - - return gs + # make a tiny board for the sake of testing and hand-coding expected results + # + # X + # 0 1 2 3 4 5 6 + # B W . . . . . 0 + # B W . . . . . 1 + # B . . . B . . 2 + # Y . . . B k B . 3 + # . . . W B W . 4 + # . . . . W . . 5 + # . . . . . . . 6 + # + # where k is a ko position (white was just captured) + + gs = go.GameState(size=7) + + # ladder-looking thing in the top-left + gs.do_move((0, 0)) # B + gs.do_move((1, 0)) # W + gs.do_move((0, 1)) # B + gs.do_move((1, 1)) # W + gs.do_move((0, 2)) # B + + # ko position in the middle + gs.do_move((3, 4)) # W + gs.do_move((3, 3)) # B + gs.do_move((4, 5)) # W + gs.do_move((4, 2)) # B + gs.do_move((5, 4)) # W + gs.do_move((5, 3)) # B + gs.do_move((4, 3)) # W - the ko position + gs.do_move((4, 4)) # B - does the capture + + return gs def self_atari_board(): - # another tiny board for testing self-atari specifically. - # positions marked with 'a' are self-atari for black - # - # X - # 0 1 2 3 4 5 6 - # a W . . . W B 0 - # . . . . . . . 1 - # . . . . . . . 2 - # Y . . W . W . . 3 - # . W B a B W . 4 - # . . W W W . . 5 - # . . . . . . . 6 - # - # current_player = black - gs = go.GameState(size=7) - - gs.do_move((2, 4), go.BLACK) - gs.do_move((4, 4), go.BLACK) - gs.do_move((6, 0), go.BLACK) - - gs.do_move((1, 0), go.WHITE) - gs.do_move((5, 0), go.WHITE) - gs.do_move((2, 3), go.WHITE) - gs.do_move((4, 3), go.WHITE) - gs.do_move((1, 4), go.WHITE) - gs.do_move((5, 4), go.WHITE) - gs.do_move((2, 5), go.WHITE) - gs.do_move((3, 5), go.WHITE) - gs.do_move((4, 5), go.WHITE) - - return gs + # another tiny board for testing self-atari specifically. + # positions marked with 'a' are self-atari for black + # + # X + # 0 1 2 3 4 5 6 + # a W . . . W B 0 + # . . . . . . . 1 + # . . . . . . . 2 + # Y . . W . W . . 3 + # . W B a B W . 4 + # . . W W W . . 5 + # . . . . . . . 6 + # + # current_player = black + gs = go.GameState(size=7) + + gs.do_move((2, 4), go.BLACK) + gs.do_move((4, 4), go.BLACK) + gs.do_move((6, 0), go.BLACK) + + gs.do_move((1, 0), go.WHITE) + gs.do_move((5, 0), go.WHITE) + gs.do_move((2, 3), go.WHITE) + gs.do_move((4, 3), go.WHITE) + gs.do_move((1, 4), go.WHITE) + gs.do_move((5, 4), go.WHITE) + gs.do_move((2, 5), go.WHITE) + gs.do_move((3, 5), go.WHITE) + gs.do_move((4, 5), go.WHITE) + + return gs def capture_board(): - # another small board, this one with imminent captures - # - # X - # 0 1 2 3 4 5 6 - # . . B B . . . 0 - # . B W W B . . 1 - # . B W . . . . 2 - # Y . . B . . . . 3 - # . . . . W B . 4 - # . . . W . W B 5 - # . . . . W B . 6 - # - # current_player = black - gs = go.GameState(size=7) - - black = [(2, 0), (3, 0), (1, 1), (4, 1), (1, 2), (2, 3), (5, 4), (6, 5), (5, 6)] - white = [(2, 1), (3, 1), (2, 2), (4, 4), (3, 5), (5, 5), (4, 6)] - - for B in black: - gs.do_move(B, go.BLACK) - for W in white: - gs.do_move(W, go.WHITE) - gs.current_player = go.BLACK - - return gs + # another small board, this one with imminent captures + # + # X + # 0 1 2 3 4 5 6 + # . . B B . . . 0 + # . B W W B . . 1 + # . B W . . . . 2 + # Y . . B . . . . 3 + # . . . . W B . 4 + # . . . W . W B 5 + # . . . . W B . 6 + # + # current_player = black + gs = go.GameState(size=7) + + black = [(2, 0), (3, 0), (1, 1), (4, 1), (1, 2), (2, 3), (5, 4), (6, 5), (5, 6)] + white = [(2, 1), (3, 1), (2, 2), (4, 4), (3, 5), (5, 5), (4, 6)] + + for B in black: + gs.do_move(B, go.BLACK) + for W in white: + gs.do_move(W, go.WHITE) + gs.current_player = go.BLACK + + return gs class TestPreprocessingFeatures(unittest.TestCase): - """Test the functions in preprocessing.py - - note that the hand-coded features look backwards from what is depicted - in simple_board() because of the x/y column/row transpose thing (i.e. - numpy is typically thought of as indexing rows first, but we use (x,y) - indexes, so a numpy row is like a go column and vice versa) - """ - - def test_get_board(self): - gs = simple_board() - pp = Preprocess(["board"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - white_pos = np.asarray([ - [0, 0, 0, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - black_pos = np.asarray([ - [1, 1, 1, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 1, 0, 1, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos) - - # check number of planes - self.assertEqual(feature.shape, (gs.size, gs.size, 3)) - # check return value against hand-coded expectation - # (given that current_player is white) - self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos)))) - - def test_get_turns_since(self): - gs = simple_board() - pp = Preprocess(["turns_since"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - one_hot_turns = np.zeros((gs.size, gs.size, 8)) - - rev_moves = gs.history[::-1] - - for x in range(gs.size): - for y in range(gs.size): - if gs.board[x, y] != go.EMPTY: - # find most recent move at x, y - age = rev_moves.index((x, y)) - one_hot_turns[x, y, min(age, 7)] = 1 - - self.assertTrue(np.all(feature == one_hot_turns)) - - def test_get_liberties(self): - gs = simple_board() - pp = Preprocess(["liberties"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - # todo - test liberties when > 8 - - one_hot_liberties = np.zeros((gs.size, gs.size, 8)) - # black piece at (4,4) has a single liberty: (4,3) - one_hot_liberties[4, 4, 0] = 1 - - # the black group in the top left corner has 2 liberties - one_hot_liberties[0, 0:3, 1] = 1 - # .. as do the white pieces on the left and right of the eye - one_hot_liberties[3, 4, 1] = 1 - one_hot_liberties[5, 4, 1] = 1 - - # the white group in the top left corner has 3 liberties - one_hot_liberties[1, 0:2, 2] = 1 - # ...as does the white piece at (4,5) - one_hot_liberties[4, 5, 2] = 1 - # ...and the black pieces on the sides of the eye - one_hot_liberties[3, 3, 2] = 1 - one_hot_liberties[5, 3, 2] = 1 - - # the black piece at (4,2) has 4 liberties - one_hot_liberties[4, 2, 3] = 1 - - for i in range(8): - self.assertTrue( - np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), - "bad expectation: stones with %d liberties" % (i + 1)) - - def test_get_capture_size(self): - gs = capture_board() - pp = Preprocess(["capture_size"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - score_before = gs.num_white_prisoners - one_hot_capture = np.zeros((gs.size, gs.size, 8)) - # there is no capture available; all legal moves are zero-capture - for (x, y) in gs.get_legal_moves(): - copy = gs.copy() - copy.do_move((x, y)) - num_captured = copy.num_white_prisoners - score_before - one_hot_capture[x, y, min(7, num_captured)] = 1 - - for i in range(8): - self.assertTrue( - np.all(feature[:, :, i] == one_hot_capture[:, :, i]), - "bad expectation: capturing %d stones" % i) - - def test_get_self_atari_size(self): - gs = self_atari_board() - pp = Preprocess(["self_atari_size"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) - # self atari of size 1 at position 0,0 - one_hot_self_atari[0, 0, 0] = 1 - # self atari of size 3 at position 3,4 - one_hot_self_atari[3, 4, 2] = 1 - - self.assertTrue(np.all(feature == one_hot_self_atari)) - - def test_get_self_atari_size_cap(self): - gs = capture_board() - pp = Preprocess(["self_atari_size"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) - # self atari of size 1 at the ko position and just below it - one_hot_self_atari[4, 5, 0] = 1 - one_hot_self_atari[3, 6, 0] = 1 - # self atari of size 3 at bottom corner - one_hot_self_atari[6, 6, 2] = 1 - - self.assertTrue(np.all(feature == one_hot_self_atari)) - - def test_get_liberties_after(self): - gs = simple_board() - pp = Preprocess(["liberties_after"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - one_hot_liberties = np.zeros((gs.size, gs.size, 8)) - - # TODO (?) hand-code? - for (x, y) in gs.get_legal_moves(): - copy = gs.copy() - copy.do_move((x, y)) - libs = copy.liberty_counts[x, y] - if libs < 7: - one_hot_liberties[x, y, libs - 1] = 1 - else: - one_hot_liberties[x, y, 7] = 1 - - for i in range(8): - self.assertTrue( - np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), - "bad expectation: stones with %d liberties after move" % (i + 1)) - - def test_get_liberties_after_cap(self): - """A copy of test_get_liberties_after but where captures are imminent - """ - gs = capture_board() - pp = Preprocess(["liberties_after"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - one_hot_liberties = np.zeros((gs.size, gs.size, 8)) - - for (x, y) in gs.get_legal_moves(): - copy = gs.copy() - copy.do_move((x, y)) - libs = copy.liberty_counts[x, y] - one_hot_liberties[x, y, min(libs - 1, 7)] = 1 - - for i in range(8): - self.assertTrue( - np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), - "bad expectation: stones with %d liberties after move" % (i + 1)) - - def test_get_ladder_capture(self): - pass - - def test_get_ladder_escape(self): - pass - - def test_get_sensibleness(self): - # TODO - there are no legal eyes at the moment - - gs = simple_board() - pp = Preprocess(["sensibleness"]) - feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose - - expectation = np.zeros((gs.size, gs.size)) - for (x, y) in gs.get_legal_moves(): - if not (gs.is_eye((x, y), go.WHITE)): - expectation[x, y] = 1 - self.assertTrue(np.all(expectation == feature)) - - def test_get_legal(self): - gs = simple_board() - pp = Preprocess(["legal"]) - feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose - - expectation = np.zeros((gs.size, gs.size)) - for (x, y) in gs.get_legal_moves(): - expectation[x, y] = 1 - self.assertTrue(np.all(expectation == feature)) - - def test_feature_concatenation(self): - gs = simple_board() - pp = Preprocess(["board", "sensibleness", "capture_size"]) - feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) - - expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8)) - - # first three planes: board - expectation[:, :, 0] = (gs.board == go.WHITE) * 1 - expectation[:, :, 1] = (gs.board == go.BLACK) * 1 - expectation[:, :, 2] = (gs.board == go.EMPTY) * 1 - - # 4th plane: sensibleness (as in test_get_sensibleness) - for (x, y) in gs.get_legal_moves(): - if not (gs.is_eye((x, y), go.WHITE)): - expectation[x, y, 3] = 1 - - # 5th through 12th plane: capture size (all zero-capture) - for (x, y) in gs.get_legal_moves(): - expectation[x, y, 4] = 1 - - self.assertTrue(np.all(expectation == feature)) + """Test the functions in preprocessing.py + + note that the hand-coded features look backwards from what is depicted + in simple_board() because of the x/y column/row transpose thing (i.e. + numpy is typically thought of as indexing rows first, but we use (x,y) + indexes, so a numpy row is like a go column and vice versa) + """ + + def test_get_board(self): + gs = simple_board() + pp = Preprocess(["board"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + white_pos = np.asarray([ + [0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0]]) + black_pos = np.asarray([ + [1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0]]) + empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos) + + # check number of planes + self.assertEqual(feature.shape, (gs.size, gs.size, 3)) + # check return value against hand-coded expectation + # (given that current_player is white) + self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos)))) + + def test_get_turns_since(self): + gs = simple_board() + pp = Preprocess(["turns_since"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + one_hot_turns = np.zeros((gs.size, gs.size, 8)) + + rev_moves = gs.history[::-1] + + for x in range(gs.size): + for y in range(gs.size): + if gs.board[x, y] != go.EMPTY: + # find most recent move at x, y + age = rev_moves.index((x, y)) + one_hot_turns[x, y, min(age, 7)] = 1 + + self.assertTrue(np.all(feature == one_hot_turns)) + + def test_get_liberties(self): + gs = simple_board() + pp = Preprocess(["liberties"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + # todo - test liberties when > 8 + + one_hot_liberties = np.zeros((gs.size, gs.size, 8)) + # black piece at (4,4) has a single liberty: (4,3) + one_hot_liberties[4, 4, 0] = 1 + + # the black group in the top left corner has 2 liberties + one_hot_liberties[0, 0:3, 1] = 1 + # .. as do the white pieces on the left and right of the eye + one_hot_liberties[3, 4, 1] = 1 + one_hot_liberties[5, 4, 1] = 1 + + # the white group in the top left corner has 3 liberties + one_hot_liberties[1, 0:2, 2] = 1 + # ...as does the white piece at (4,5) + one_hot_liberties[4, 5, 2] = 1 + # ...and the black pieces on the sides of the eye + one_hot_liberties[3, 3, 2] = 1 + one_hot_liberties[5, 3, 2] = 1 + + # the black piece at (4,2) has 4 liberties + one_hot_liberties[4, 2, 3] = 1 + + for i in range(8): + self.assertTrue( + np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), + "bad expectation: stones with %d liberties" % (i + 1)) + + def test_get_capture_size(self): + gs = capture_board() + pp = Preprocess(["capture_size"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + score_before = gs.num_white_prisoners + one_hot_capture = np.zeros((gs.size, gs.size, 8)) + # there is no capture available; all legal moves are zero-capture + for (x, y) in gs.get_legal_moves(): + copy = gs.copy() + copy.do_move((x, y)) + num_captured = copy.num_white_prisoners - score_before + one_hot_capture[x, y, min(7, num_captured)] = 1 + + for i in range(8): + self.assertTrue( + np.all(feature[:, :, i] == one_hot_capture[:, :, i]), + "bad expectation: capturing %d stones" % i) + + def test_get_self_atari_size(self): + gs = self_atari_board() + pp = Preprocess(["self_atari_size"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) + # self atari of size 1 at position 0,0 + one_hot_self_atari[0, 0, 0] = 1 + # self atari of size 3 at position 3,4 + one_hot_self_atari[3, 4, 2] = 1 + + self.assertTrue(np.all(feature == one_hot_self_atari)) + + def test_get_self_atari_size_cap(self): + gs = capture_board() + pp = Preprocess(["self_atari_size"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) + # self atari of size 1 at the ko position and just below it + one_hot_self_atari[4, 5, 0] = 1 + one_hot_self_atari[3, 6, 0] = 1 + # self atari of size 3 at bottom corner + one_hot_self_atari[6, 6, 2] = 1 + + self.assertTrue(np.all(feature == one_hot_self_atari)) + + def test_get_liberties_after(self): + gs = simple_board() + pp = Preprocess(["liberties_after"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + one_hot_liberties = np.zeros((gs.size, gs.size, 8)) + + # TODO (?) hand-code? + for (x, y) in gs.get_legal_moves(): + copy = gs.copy() + copy.do_move((x, y)) + libs = copy.liberty_counts[x, y] + if libs < 7: + one_hot_liberties[x, y, libs - 1] = 1 + else: + one_hot_liberties[x, y, 7] = 1 + + for i in range(8): + self.assertTrue( + np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), + "bad expectation: stones with %d liberties after move" % (i + 1)) + + def test_get_liberties_after_cap(self): + """A copy of test_get_liberties_after but where captures are imminent + """ + gs = capture_board() + pp = Preprocess(["liberties_after"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + one_hot_liberties = np.zeros((gs.size, gs.size, 8)) + + for (x, y) in gs.get_legal_moves(): + copy = gs.copy() + copy.do_move((x, y)) + libs = copy.liberty_counts[x, y] + one_hot_liberties[x, y, min(libs - 1, 7)] = 1 + + for i in range(8): + self.assertTrue( + np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), + "bad expectation: stones with %d liberties after move" % (i + 1)) + + def test_get_ladder_capture(self): + pass + + def test_get_ladder_escape(self): + pass + + def test_get_sensibleness(self): + # TODO - there are no legal eyes at the moment + + gs = simple_board() + pp = Preprocess(["sensibleness"]) + feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose + + expectation = np.zeros((gs.size, gs.size)) + for (x, y) in gs.get_legal_moves(): + if not (gs.is_eye((x, y), go.WHITE)): + expectation[x, y] = 1 + self.assertTrue(np.all(expectation == feature)) + + def test_get_legal(self): + gs = simple_board() + pp = Preprocess(["legal"]) + feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose + + expectation = np.zeros((gs.size, gs.size)) + for (x, y) in gs.get_legal_moves(): + expectation[x, y] = 1 + self.assertTrue(np.all(expectation == feature)) + + def test_feature_concatenation(self): + gs = simple_board() + pp = Preprocess(["board", "sensibleness", "capture_size"]) + feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) + + expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8)) + + # first three planes: board + expectation[:, :, 0] = (gs.board == go.WHITE) * 1 + expectation[:, :, 1] = (gs.board == go.BLACK) * 1 + expectation[:, :, 2] = (gs.board == go.EMPTY) * 1 + + # 4th plane: sensibleness (as in test_get_sensibleness) + for (x, y) in gs.get_legal_moves(): + if not (gs.is_eye((x, y), go.WHITE)): + expectation[x, y, 3] = 1 + + # 5th through 12th plane: capture size (all zero-capture) + for (x, y) in gs.get_legal_moves(): + expectation[x, y, 4] = 1 + + self.assertTrue(np.all(expectation == feature)) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_reinforcement_policy_trainer.py b/tests/test_reinforcement_policy_trainer.py index 702d25332..03b041696 100644 --- a/tests/test_reinforcement_policy_trainer.py +++ b/tests/test_reinforcement_policy_trainer.py @@ -10,124 +10,124 @@ class TestReinforcementPolicyTrainer(unittest.TestCase): - def testTrain(self): - model = os.path.join('tests', 'test_data', 'minimodel.json') - init_weights = os.path.join('tests', 'test_data', 'hdf5', 'random_minimodel_weights.hdf5') - output = os.path.join('tests', 'test_data', '.tmp.rl.training/') - args = [model, init_weights, output, '--game-batch', '1', '--iterations', '1'] - run_training(args) + def testTrain(self): + model = os.path.join('tests', 'test_data', 'minimodel.json') + init_weights = os.path.join('tests', 'test_data', 'hdf5', 'random_minimodel_weights.hdf5') + output = os.path.join('tests', 'test_data', '.tmp.rl.training/') + args = [model, init_weights, output, '--game-batch', '1', '--iterations', '1'] + run_training(args) - os.remove(os.path.join(output, 'metadata.json')) - os.remove(os.path.join(output, 'weights.00000.hdf5')) - os.remove(os.path.join(output, 'weights.00001.hdf5')) - os.rmdir(output) + os.remove(os.path.join(output, 'metadata.json')) + os.remove(os.path.join(output, 'weights.00000.hdf5')) + os.remove(os.path.join(output, 'weights.00001.hdf5')) + os.rmdir(output) class TestOptimizer(unittest.TestCase): - def testApplyAndResetOnGamesFinished(self): - policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) - state = GameState(size=19) - optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) - policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) - - # Helper to check initial conditions of the optimizer. - def assertOptimizerInitialConditions(): - for v in optimizer.gradient_sign: - self.assertEqual(K.eval(v), 0) - self.assertEqual(K.eval(optimizer.running_games), 2) - - initial_parameters = policy.model.get_weights() - - def assertModelEffect(changed): - any_change = False - for cur, init in zip(policy.model.get_weights(), initial_parameters): - if not np.allclose(init, cur): - any_change = True - break - self.assertEqual(any_change, changed) - - assertOptimizerInitialConditions() - - # Make moves on the state and get trainable (state, action) pairs from them. - state_tensors = [] - action_tensors = [] - moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] - for m in moves: - (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) - state_tensors.append(st_tensor) - action_tensors.append(mv_tensor) - state.do_move(m) - - for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): - # Even moves in game 0, odd moves in game 1 - game_idx = i % 2 - optimizer.set_current_game(game_idx) - is_last_move = i + 2 >= len(moves) - if is_last_move: - # Mark game 0 as a win and game 1 as a loss. - optimizer.set_result(game_idx, game_idx == 0) - else: - # Games not finished yet; assert no change to optimizer state. - assertOptimizerInitialConditions() - # train_on_batch accumulates gradients, and should only cause a change to parameters - # on the first call after the final set_result() call - policy.model.train_on_batch(s, a) - if i + 1 < len(moves): - assertModelEffect(changed=False) - else: - assertModelEffect(changed=True) - # Once both games finished, the last call to train_on_batch() should have triggered a reset - # to the optimizer parameters back to initial conditions. - assertOptimizerInitialConditions() - - def testGradientDirectionChangesWithGameResult(self): - - def run_and_get_new_weights(init_weights, win0, win1): - state = GameState(size=19) - policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) - policy.model.set_weights(init_weights) - optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) - policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) - - # Make moves on the state and get trainable (state, action) pairs from them. - moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] - state_tensors = [] - action_tensors = [] - for m in moves: - (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) - state_tensors.append(st_tensor) - action_tensors.append(mv_tensor) - state.do_move(m) - - for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): - # Put even state/action pairs in game 0, odd ones in game 1. - game_idx = i % 2 - optimizer.set_current_game(game_idx) - is_last_move = i + 2 >= len(moves) - if is_last_move: - if game_idx == 0: - optimizer.set_result(game_idx, win0) - else: - optimizer.set_result(game_idx, win1) - # train_on_batch accumulates gradients, and should only cause a change to parameters - # on the first call after the final set_result() call - policy.model.train_on_batch(s, a) - return policy.model.get_weights() - - policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) - initial_parameters = policy.model.get_weights() - # Cases 1 and 2 have identical starting models and identical (state, action) pairs, - # but they differ in who won the games. - parameters1 = run_and_get_new_weights(initial_parameters, True, False) - parameters2 = run_and_get_new_weights(initial_parameters, False, True) - - # Changes in case 1 should be equal and opposite to changes in case 2. Allowing 0.1% - # difference in precision. - for (i, p1, p2) in zip(initial_parameters, parameters1, parameters2): - diff1 = p1 - i - diff2 = p2 - i - npt.assert_allclose(diff1, -diff2, rtol=1e-3) + def testApplyAndResetOnGamesFinished(self): + policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) + state = GameState(size=19) + optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) + policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) + + # Helper to check initial conditions of the optimizer. + def assertOptimizerInitialConditions(): + for v in optimizer.gradient_sign: + self.assertEqual(K.eval(v), 0) + self.assertEqual(K.eval(optimizer.running_games), 2) + + initial_parameters = policy.model.get_weights() + + def assertModelEffect(changed): + any_change = False + for cur, init in zip(policy.model.get_weights(), initial_parameters): + if not np.allclose(init, cur): + any_change = True + break + self.assertEqual(any_change, changed) + + assertOptimizerInitialConditions() + + # Make moves on the state and get trainable (state, action) pairs from them. + state_tensors = [] + action_tensors = [] + moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] + for m in moves: + (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) + state_tensors.append(st_tensor) + action_tensors.append(mv_tensor) + state.do_move(m) + + for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): + # Even moves in game 0, odd moves in game 1 + game_idx = i % 2 + optimizer.set_current_game(game_idx) + is_last_move = i + 2 >= len(moves) + if is_last_move: + # Mark game 0 as a win and game 1 as a loss. + optimizer.set_result(game_idx, game_idx == 0) + else: + # Games not finished yet; assert no change to optimizer state. + assertOptimizerInitialConditions() + # train_on_batch accumulates gradients, and should only cause a change to parameters + # on the first call after the final set_result() call + policy.model.train_on_batch(s, a) + if i + 1 < len(moves): + assertModelEffect(changed=False) + else: + assertModelEffect(changed=True) + # Once both games finished, the last call to train_on_batch() should have triggered a reset + # to the optimizer parameters back to initial conditions. + assertOptimizerInitialConditions() + + def testGradientDirectionChangesWithGameResult(self): + + def run_and_get_new_weights(init_weights, win0, win1): + state = GameState(size=19) + policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) + policy.model.set_weights(init_weights) + optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) + policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) + + # Make moves on the state and get trainable (state, action) pairs from them. + moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] + state_tensors = [] + action_tensors = [] + for m in moves: + (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) + state_tensors.append(st_tensor) + action_tensors.append(mv_tensor) + state.do_move(m) + + for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): + # Put even state/action pairs in game 0, odd ones in game 1. + game_idx = i % 2 + optimizer.set_current_game(game_idx) + is_last_move = i + 2 >= len(moves) + if is_last_move: + if game_idx == 0: + optimizer.set_result(game_idx, win0) + else: + optimizer.set_result(game_idx, win1) + # train_on_batch accumulates gradients, and should only cause a change to parameters + # on the first call after the final set_result() call + policy.model.train_on_batch(s, a) + return policy.model.get_weights() + + policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) + initial_parameters = policy.model.get_weights() + # Cases 1 and 2 have identical starting models and identical (state, action) pairs, + # but they differ in who won the games. + parameters1 = run_and_get_new_weights(initial_parameters, True, False) + parameters2 = run_and_get_new_weights(initial_parameters, False, True) + + # Changes in case 1 should be equal and opposite to changes in case 2. Allowing 0.1% + # difference in precision. + for (i, p1, p2) in zip(initial_parameters, parameters1, parameters2): + diff1 = p1 - i + diff2 = p2 - i + npt.assert_allclose(diff1, -diff2, rtol=1e-3) if __name__ == '__main__': - unittest.main() + unittest.main() diff --git a/tests/test_supervised_policy_trainer.py b/tests/test_supervised_policy_trainer.py index fea59f361..6ff12baa0 100644 --- a/tests/test_supervised_policy_trainer.py +++ b/tests/test_supervised_policy_trainer.py @@ -4,17 +4,17 @@ class TestSupervisedPolicyTrainer(unittest.TestCase): - def testTrain(self): - model = 'tests/test_data/minimodel.json' - data = 'tests/test_data/hdf5/alphago-vs-lee-sedol-features.hdf5' - output = 'tests/test_data/.tmp.training/' - args = [model, data, output, '--epochs', '1'] - run_training(args) + def testTrain(self): + model = 'tests/test_data/minimodel.json' + data = 'tests/test_data/hdf5/alphago-vs-lee-sedol-features.hdf5' + output = 'tests/test_data/.tmp.training/' + args = [model, data, output, '--epochs', '1'] + run_training(args) - os.remove(os.path.join(output, 'metadata.json')) - os.remove(os.path.join(output, 'shuffle.npz')) - os.remove(os.path.join(output, 'weights.00000.hdf5')) - os.rmdir(output) + os.remove(os.path.join(output, 'metadata.json')) + os.remove(os.path.join(output, 'shuffle.npz')) + os.remove(os.path.join(output, 'weights.00000.hdf5')) + os.rmdir(output) if __name__ == '__main__': - unittest.main() + unittest.main()