diff --git a/AlphaGo/ai.py b/AlphaGo/ai.py index 0c2157c8b..960c21d9a 100644 --- a/AlphaGo/ai.py +++ b/AlphaGo/ai.py @@ -54,12 +54,14 @@ def get_move(self, state): sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] if len(sensible_moves) > 0: move_probs = self.policy.eval_state(state, sensible_moves) - # zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)] + # zip(*list) is like the 'transpose' of zip; + # zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)] moves, probabilities = zip(*move_probs) probabilities = np.array(probabilities) probabilities = probabilities ** self.beta probabilities = probabilities / probabilities.sum() - # numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps + # numpy interprets a list of tuples as 2D, so we must choose an + # _index_ of moves then apply it in 2 steps choice_idx = np.random.choice(len(moves), p=probabilities) return moves[choice_idx] return go.PASS_MOVE @@ -67,7 +69,8 @@ def get_move(self, state): def get_moves(self, states): """Batch version of get_move. A list of moves is returned (one per state) """ - sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states] + sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] + for st in states] all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists) move_list = [None] * len(states) for i, move_probs in enumerate(all_moves_distributions): @@ -85,7 +88,8 @@ def get_moves(self, states): class MCTSPlayer(object): - def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100): + def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, + rollout_limit=500, playout_depth=40, n_playout=100): self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct, rollout_limit, playout_depth, n_playout) diff --git a/AlphaGo/go.py b/AlphaGo/go.py index f56df4c16..89f328b85 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -47,7 +47,8 @@ def __init__(self, size=19, komi=7.5, enforce_superko=False): # similarly to `liberty_sets`, `group_sets[x][y]` points to a set of tuples # containing all (x',y') pairs in the group connected to (x,y) self.group_sets = [[set() for _ in range(size)] for _ in range(size)] - # cache of list of legal moves (actually 'sensible' moves, with a separate list for eye-moves on request) + # cache of list of legal moves (actually 'sensible' moves, with a + # separate list for eye-moves on request) self.__legal_move_cache = None self.__legal_eyes_cache = None # on-the-fly record of 'age' of each stone @@ -104,7 +105,8 @@ def _create_neighbors_cache(self): GameState.__NEIGHBORS_CACHE[self.size] = {} for x in xrange(self.size): for y in xrange(self.size): - neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] if self._on_board(xy)] + neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] + if self._on_board(xy)] GameState.__NEIGHBORS_CACHE[self.size][(x, y)] = neighbors def _neighbors(self, position): @@ -117,7 +119,8 @@ def _diagonals(self, position): """Like _neighbors but for diagonal positions """ (x, y) = position - return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1)]) + return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), + (x + 1, y - 1), (x - 1, y + 1)]) def _update_neighbors(self, position): """A private helper function to update self.group_sets and self.liberty_sets @@ -229,8 +232,10 @@ def is_suicide(self, action): return False def is_positional_superko(self, action): - """Find all actions that the current_player has done in the past, taking into account the fact that - history starts with BLACK when there are no handicaps or with WHITE when there are. + """Find all actions that the current_player has done in the past, taking into + account the fact that history starts with BLACK when there are no + handicaps or with WHITE when there are. + """ if len(self.handicaps) == 0 and self.current_player == BLACK: player_history = self.history[0::2] diff --git a/AlphaGo/mcts.py b/AlphaGo/mcts.py index 09f9439b8..479e56fd8 100644 --- a/AlphaGo/mcts.py +++ b/AlphaGo/mcts.py @@ -105,7 +105,8 @@ class MCTS(object): fast evaluation from leaf nodes to the end of the game. """ - def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, rollout_limit=500, playout_depth=20, n_playout=10000): + def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, + rollout_limit=500, playout_depth=20, n_playout=10000): """Arguments: value_fn -- a function that takes in a state and ouputs a score in [-1, 1], i.e. the expected value of the end game score from the current player's perspective. @@ -115,9 +116,9 @@ def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, lmbda -- controls the relative weight of the value network and fast rollout policy result in determining the value of a leaf node. lmbda must be in [0, 1], where 0 means use only the value network and 1 means use only the result from the rollout. - c_puct -- a number in (0, inf) that controls how quickly exploration converges to the maximum- - value policy, where a higher value means relying on the prior more, and should be used only - in conjunction with a large value for n_playout. + c_puct -- a number in (0, inf) that controls how quickly exploration converges to the + maximum-value policy, where a higher value means relying on the prior more, and + should be used only in conjunction with a large value for n_playout. """ self._root = TreeNode(None, 1.0) self._value = value_fn diff --git a/AlphaGo/models/nn_util.py b/AlphaGo/models/nn_util.py index dcaabfd3e..9fe0eaa0f 100644 --- a/AlphaGo/models/nn_util.py +++ b/AlphaGo/models/nn_util.py @@ -46,7 +46,8 @@ def _model_forward(self): # be set to 0 when using the network in prediction mode and is automatically set to 1 # during training. if self.model.uses_learning_phase: - forward_function = K.function([self.model.input, K.learning_phase()], [self.model.output]) + forward_function = K.function([self.model.input, K.learning_phase()], + [self.model.output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. @@ -68,7 +69,8 @@ def load_model(json_file): try: network_class = NeuralNetBase.subclasses[class_name] except KeyError: - raise ValueError("Unknown neural network type in json file: {}\n(was it registered with the @neuralnet decorator?)".format(class_name)) + raise ValueError("Unknown neural network type in json file: {}\n" + "(was it registered with the @neuralnet decorator?)".format(class_name)) # create new object new_net = network_class(object_specs['feature_list'], init_network=False) diff --git a/AlphaGo/models/policy.py b/AlphaGo/models/policy.py index c16058cc2..2b3a802f0 100644 --- a/AlphaGo/models/policy.py +++ b/AlphaGo/models/policy.py @@ -40,13 +40,15 @@ def batch_eval_state(self, states, moves_lists=None): raise ValueError("all states must have the same size") # concatenate together all one-hot encoded states along the 'batch' dimension nn_input = np.concatenate([self.preprocessor.state_to_tensor(s) for s in states], axis=0) - # pass all input through the network at once (backend makes use of batches if len(states) is large) + # pass all input through the network at once (backend makes use of + # batches if len(states) is large) network_output = self.forward(nn_input) # default move lists to all legal moves moves_lists = moves_lists or [st.get_legal_moves() for st in states] results = [None] * n_states for i in range(n_states): - results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], state_size) + results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], + state_size) return results def eval_state(self, state, moves=None): @@ -168,10 +170,12 @@ def create_network(**kwargs): O - output M - merge - The input is always passed through a Conv2D layer, the output of which layer is counted as '1'. - Each subsequent [R -- C] block is counted as one 'layer'. The 'merge' layer isn't counted; hence - if n_skip_1 is 2, the next valid skip parameter is n_skip_3, which will start at the output - of the merge + The input is always passed through a Conv2D layer, the output of which + layer is counted as '1'. Each subsequent [R -- C] block is counted as + one 'layer'. The 'merge' layer isn't counted; hence if n_skip_1 is 2, + the next valid skip parameter is n_skip_3, which will start at the + output of the merge + """ defaults = { "board": 19, diff --git a/AlphaGo/preprocessing/game_converter.py b/AlphaGo/preprocessing/game_converter.py index 6324d254b..77bde6905 100644 --- a/AlphaGo/preprocessing/game_converter.py +++ b/AlphaGo/preprocessing/game_converter.py @@ -46,18 +46,22 @@ def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, ver - sgf_files : an iterable of relative or absolute paths to SGF files - hdf5_file : the name of the HDF5 where features will be saved - bd_size : side length of board of games that are loaded - - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note - that sgf.ParseException and go.IllegalMove exceptions are always skipped + + - ignore_errors : if True, issues a Warning when there is an unknown + exception rather than halting. Note that sgf.ParseException and + go.IllegalMove exceptions are always skipped The resulting file has the following properties: states : dataset with shape (n_data, n_features, board width, board height) - actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played) + actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of + where the move was played) file_offsets : group mapping from filenames to tuples of (index, length) For example, to find what positions in the dataset come from 'test.sgf': index, length = file_offsets['test.sgf'] test_states = states[index:index+length] test_actions = actions[index:index+length] + """ # TODO - also save feature list @@ -72,9 +76,9 @@ def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, ver 'states', dtype=np.uint8, shape=(1, self.n_features, bd_size, bd_size), - maxshape=(None, self.n_features, bd_size, bd_size), # 'None' dimension allows it to grow arbitrarily - exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 - chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks + maxshape=(None, self.n_features, bd_size, bd_size), # 'None' == arbitrary size + exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 + chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks compression="lzf") actions = h5f.require_dataset( 'actions', @@ -107,7 +111,8 @@ def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, ver n_pairs += 1 next_idx += 1 except go.IllegalMove: - warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name) + warnings.warn("Illegal Move encountered in %s\n" + "\tdropping the remainder of the game" % file_name) except sgf.ParseException: warnings.warn("Could not parse %s\n\tdropping game" % file_name) except SizeMismatchError: @@ -115,12 +120,14 @@ def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, ver except Exception as e: # catch everything else if ignore_errors: - warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2) + warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), + stacklevel=2) else: raise e finally: if n_pairs > 0: - # '/' has special meaning in HDF5 key names, so they are replaced with ':' here + # '/' has special meaning in HDF5 key names, so they + # are replaced with ':' here file_name_key = file_name.replace('/', ':') file_offsets[file_name_key] = [file_start_idx, n_pairs] if verbose: diff --git a/AlphaGo/preprocessing/preprocessing.py b/AlphaGo/preprocessing/preprocessing.py index 5455e3e9b..17690a1b3 100644 --- a/AlphaGo/preprocessing/preprocessing.py +++ b/AlphaGo/preprocessing/preprocessing.py @@ -43,7 +43,8 @@ def get_liberties(state, maximum=8): """ planes = np.zeros((maximum, state.size, state.size)) for i in range(maximum): - # single liberties in plane zero (groups won't have zero), double liberties in plane one, etc + # single liberties in plane zero (groups won't have zero), double + # liberties in plane one, etc planes[i, state.liberty_counts == i + 1] = 1 # the "maximum-or-more" case on the backmost plane planes[maximum - 1, state.liberty_counts >= maximum] = 1 @@ -51,14 +52,16 @@ def get_liberties(state, maximum=8): def get_capture_size(state, maximum=8): - """A feature encoding the number of opponent stones that would be captured by playing at each location, - up to 'maximum' + """A feature encoding the number of opponent stones that would be captured by + playing at each location, up to 'maximum' Note: - we currently *do* treat the 0th plane as "capturing zero stones" - - the [maximum-1] plane is used for any capturable group of size greater than or equal to maximum-1 + - the [maximum-1] plane is used for any capturable group of size + greater than or equal to maximum-1 - the 0th plane is used for legal moves that would not result in capture - illegal move locations are all-zero features + """ planes = np.zeros((maximum, state.size, state.size)) for (x, y) in state.get_legal_moves(): @@ -71,14 +74,17 @@ def get_capture_size(state, maximum=8): # (note suicide and ko are not an issue because they are not # legal moves) (gx, gy) = next(iter(neighbor_group)) - if (state.liberty_counts[gx][gy] == 1) and (state.board[gx, gy] != state.current_player): + if (state.liberty_counts[gx][gy] == 1) and \ + (state.board[gx, gy] != state.current_player): n_captured += len(state.group_sets[gx][gy]) planes[min(n_captured, maximum - 1), x, y] = 1 return planes def get_self_atari_size(state, maximum=8): - """A feature encoding the size of the own-stone group that is put into atari by playing at a location + """A feature encoding the size of the own-stone group that is put into atari by + playing at a location + """ planes = np.zeros((maximum, state.size, state.size)) diff --git a/AlphaGo/training/reinforcement_policy_trainer.py b/AlphaGo/training/reinforcement_policy_trainer.py index 18fac78a3..69242105a 100644 --- a/AlphaGo/training/reinforcement_policy_trainer.py +++ b/AlphaGo/training/reinforcement_policy_trainer.py @@ -58,7 +58,8 @@ def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) # Create a set of accumulated gradients, one for each game. shapes = [K.get_variable_shape(p) for p in params] - self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] for _ in range(self.num_games)] + self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] + for _ in range(self.num_games)] def conditional_update(cond, variable, new_value): '''Helper function to create updates that only happen when cond is True. Writes to @@ -123,9 +124,12 @@ def _make_training_pair(st, mv, preprocessor): def run_n_games(optimizer, learner, opponent, num_games): - '''Run num_games games to completion, calling train_batch() on each position the learner sees. + '''Run num_games games to completion, calling train_batch() on each position + the learner sees. + + (Note: optimizer only accumulates gradients in its update function until + all games have finished) - (Note: optimizer only accumulates gradients in its update function until all games have finished) ''' board_size = learner.policy.model.input_shape[-1] states = [GameState(size=board_size) for _ in range(num_games)] @@ -214,11 +218,14 @@ def run_training(cmd_line_args=None): # make a copy of weights file, "weights.00000.hdf5" in the output directory copyfile(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) if args.verbose: - print "copied {} to {}".format(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE)) + print "copied {} to {}".format(args.initial_weights, + os.path.join(args.out_directory, ZEROTH_FILE)) player_weights = ZEROTH_FILE else: - # if resuming, we expect initial_weights to be just a "weights.#####.hdf5" file, not a full path - args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights)) + # if resuming, we expect initial_weights to be just a + # "weights.#####.hdf5" file, not a full path + args.initial_weights = os.path.join(args.out_directory, + os.path.basename(args.initial_weights)) if not os.path.exists(args.initial_weights): raise ValueError("Cannot resume; weights {} do not exist".format(args.initial_weights)) elif args.verbose: @@ -228,12 +235,14 @@ def run_training(cmd_line_args=None): # Set initial conditions policy = CNNPolicy.load_model(args.model_json) policy.model.load_weights(args.initial_weights) - player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, move_limit=args.move_limit) + player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, + move_limit=args.move_limit) # different opponents come from simply changing the weights of 'opponent.policy.model'. That # is, only 'opp_policy' needs to be changed, and 'opponent' will change. opp_policy = CNNPolicy.load_model(args.model_json) - opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, move_limit=args.move_limit) + opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, + move_limit=args.move_limit) if args.verbose: print "created player and opponent with temperature {}".format(args.policy_temp) @@ -246,7 +255,8 @@ def run_training(cmd_line_args=None): "temperature": args.policy_temp, "game_batch": args.game_batch, "opponents": [ZEROTH_FILE], # which weights from which to sample an opponent each batch - "win_ratio": {} # map from player to tuple of (opponent, win ratio) Useful for validating in lieu of 'accuracy/loss' + "win_ratio": {} # map from player to tuple of (opponent, win ratio) Useful for + # validating in lieu of 'accuracy/loss' } else: with open(os.path.join(args.out_directory, "metadata.json"), "r") as f: @@ -262,8 +272,8 @@ def save_metadata(): optimizer = BatchedReinforcementLearningSGD(lr=args.learning_rate, ng=args.game_batch) player.policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer) for i_iter in xrange(1, args.iterations + 1): - # Randomly choose opponent from pool (possibly self), and playing game_batch games against - # them. + # Randomly choose opponent from pool (possibly self), and playing + # game_batch games against them. opp_weights = np.random.choice(metadata["opponents"]) opp_path = os.path.join(args.out_directory, opp_weights) @@ -272,7 +282,8 @@ def save_metadata(): if args.verbose: print "Batch {}\tsampled opponent is {}".format(i_iter, opp_weights) - # Run games (and learn from results). Keep track of the win ratio vs each opponent over time. + # Run games (and learn from results). Keep track of the win ratio vs + # each opponent over time. win_ratio = run_n_games(optimizer, player, opponent, args.game_batch) metadata["win_ratio"][player_weights] = (opp_weights, win_ratio) diff --git a/AlphaGo/training/supervised_policy_trainer.py b/AlphaGo/training/supervised_policy_trainer.py index ec247812c..5fc12942d 100644 --- a/AlphaGo/training/supervised_policy_trainer.py +++ b/AlphaGo/training/supervised_policy_trainer.py @@ -15,7 +15,8 @@ def one_hot_action(action, size=19): return categorical -def shuffled_hdf5_batch_generator(state_dataset, action_dataset, indices, batch_size, transforms=[]): +def shuffled_hdf5_batch_generator(state_dataset, action_dataset, + indices, batch_size, transforms=[]): """A generator of batches of training data for use with the fit_generator function of Keras. Data is accessed in the order of the given indices for shuffling. """ @@ -29,7 +30,8 @@ def shuffled_hdf5_batch_generator(state_dataset, action_dataset, indices, batch_ # choose a random transformation of the data (rotations/reflections of the board) transform = np.random.choice(transforms) # get state from dataset and transform it. - # loop comprehension is used so that the transformation acts on the 3rd and 4th dimensions + # loop comprehension is used so that the transformation acts on the + # 3rd and 4th dimensions state = np.array([transform(plane) for plane in state_dataset[data_idx]]) # must be cast to a tuple so that it is interpreted as (x,y) not [(x,:), (y,:)] action_xy = tuple(action_dataset[data_idx]) @@ -115,10 +117,12 @@ def run_training(cmd_line_args=None): if args.verbose: if resume: - print "trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights)) + print("trying to resume from %s with weights %s" % + (args.out_directory, os.path.join(args.out_directory, args.weights))) else: if os.path.exists(args.out_directory): - print "directory %s exists. any previous data will be overwritten" % args.out_directory + print("directory %s exists. any previous data will be overwritten" % + args.out_directory) else: print "starting fresh output directory %s" % args.out_directory @@ -127,11 +131,13 @@ def run_training(cmd_line_args=None): if resume: model.load_weights(os.path.join(args.out_directory, args.weights)) - # TODO - (waiting on game_converter) verify that features of model match features of training data + # TODO - (waiting on game_converter) verify that features of model match + # features of training data dataset = h5.File(args.train_data) n_total_data = len(dataset["states"]) n_train_data = int(args.train_val_test[0] * n_total_data) - # Need to make sure training data is divisible by minibatch size or get warning mentioning accuracy from keras + # Need to make sure training data is divisible by minibatch size or get + # warning mentioning accuracy from keras n_train_data = n_train_data - (n_train_data % args.minibatch) n_val_data = n_total_data - n_train_data # n_test_data = n_total_data - (n_train_data + n_val_data) @@ -154,15 +160,21 @@ def run_training(cmd_line_args=None): with open(meta_file, "r") as f: meta_writer.metadata = json.load(f) if args.verbose: - print "previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"]) + print("previous metadata loaded: %d epochs. new epochs will be appended." % + len(meta_writer.metadata["epochs"])) elif args.verbose: print "starting with empty metadata" - # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add in anything else we like here - # TODO - model and train_data are saved in meta_file; check that they match (and make args optional when restarting?) + # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add + # in anything else we like here + # + # TODO - model and train_data are saved in meta_file; check that they match + # (and make args optional when restarting?) meta_writer.metadata["training_data"] = args.train_data meta_writer.metadata["model_file"] = args.model - # Record all command line args in a list so that all args are recorded even when training is stopped and resumed. - meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []).append(vars(args)) + # Record all command line args in a list so that all args are recorded even + # when training is stopped and resumed. + meta_writer.metadata["cmd_line_args"] \ + = meta_writer.metadata.get("cmd_line_args", []).append(vars(args)) # create ModelCheckpoint to save weights every epoch checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5") @@ -184,7 +196,8 @@ def run_training(cmd_line_args=None): np.save(f, shuffle_indices) if args.verbose: print "created new data shuffling indices" - # training indices are the first consecutive set of shuffled indices, val next, then test gets the remainder + # training indices are the first consecutive set of shuffled indices, val + # next, then test gets the remainder train_indices = shuffle_indices[0:n_train_data] val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data] # test_indices = shuffle_indices[n_train_data + n_val_data:] diff --git a/AlphaGo/util.py b/AlphaGo/util.py index 6acb5062d..e2e918b03 100644 --- a/AlphaGo/util.py +++ b/AlphaGo/util.py @@ -62,7 +62,8 @@ def sgf_to_gamestate(sgf_string): return gs -def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown', white_player_name='Unknown', size=19, komi=7.5): +def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown', + white_player_name='Unknown', size=19, komi=7.5): """Creates a simplified sgf for viewing playouts or positions """ str_list = [] @@ -79,7 +80,8 @@ def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown' str_list.append('HA[{}]'.format(len(gamestate.handicaps))) str_list.append(';AB') for handicap in gamestate.handicaps: - str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), LETTERS[handicap[1]].lower())) + str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), + LETTERS[handicap[1]].lower())) # Move list for move, color in zip(gamestate.history, itertools.cycle(cycle_string)): # Move color prefix @@ -97,12 +99,14 @@ def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown' def sgf_iter_states(sgf_string, include_end=True): """Iterates over (GameState, move, player) tuples in the first game of the given SGF file. - Ignores variations - only the main line is returned. - The state object is modified in-place, so don't try to, for example, keep track of it through time + Ignores variations - only the main line is returned. The state object is + modified in-place, so don't try to, for example, keep track of it through + time + + If include_end is False, the final tuple yielded is the penultimate state, + but the state will still be left in the final position at the end of + iteration because 'gs' is modified in-place the state. See sgf_to_gamestate - If include_end is False, the final tuple yielded is the penultimate state, but the state - will still be left in the final position at the end of iteration because 'gs' is modified - in-place the state. See sgf_to_gamestate """ collection = sgf.parse(sgf_string) game = collection[0] diff --git a/benchmarks/preprocessing_benchmark.py b/benchmarks/preprocessing_benchmark.py index 635c0fd55..8774a1ff7 100644 --- a/benchmarks/preprocessing_benchmark.py +++ b/benchmarks/preprocessing_benchmark.py @@ -3,7 +3,8 @@ prof = Profile() -test_features = ["board", "turns_since", "liberties", "capture_size", "self_atari_size", "liberties_after", "sensibleness", "zeros"] +test_features = ["board", "turns_since", "liberties", "capture_size", "self_atari_size", + "liberties_after", "sensibleness", "zeros"] gc = game_converter(test_features) args = ('tests/test_data/sgf/Lee-Sedol-vs-AlphaGo-20160309.sgf', 19) diff --git a/benchmarks/reinforcement_policy_training_benchmark.py b/benchmarks/reinforcement_policy_training_benchmark.py index 4fc66407c..6573c1f62 100644 --- a/benchmarks/reinforcement_policy_training_benchmark.py +++ b/benchmarks/reinforcement_policy_training_benchmark.py @@ -5,7 +5,8 @@ # make a miniature model for playing on a miniature 7x7 board architecture = {'filters_per_layer': 32, 'layers': 4, 'board': 7} -features = ['board', 'ones', 'turns_since', 'liberties', 'capture_size', 'self_atari_size', 'liberties_after', 'sensibleness'] +features = ['board', 'ones', 'turns_since', 'liberties', 'capture_size', + 'self_atari_size', 'liberties_after', 'sensibleness'] policy = CNNPolicy(features, **architecture) datadir = os.path.join('benchmarks', 'data') @@ -21,7 +22,8 @@ policy.save_model(modelfile) profile = Profile() -arguments = (modelfile, weights, outdir, '--learning-rate', '0.001', '--save-every', '2', '--game-batch', '20', '--iterations', '10', '--verbose') +arguments = (modelfile, weights, outdir, '--learning-rate', '0.001', '--save-every', '2', + '--game-batch', '20', '--iterations', '10', '--verbose') profile.runcall(run_training, arguments) profile.dump_stats(stats_file) diff --git a/interface/gtp_wrapper.py b/interface/gtp_wrapper.py index 89d35f242..1dca11510 100644 --- a/interface/gtp_wrapper.py +++ b/interface/gtp_wrapper.py @@ -9,7 +9,8 @@ def run_gnugo(sgf_file_name, command): from distutils import spawn if spawn.find_executable('gnugo'): from subprocess import Popen, PIPE - p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], stdout=PIPE, stdin=PIPE, stderr=PIPE) + p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], + stdout=PIPE, stdin=PIPE, stderr=PIPE) out_bytes = p.communicate(input=command)[0] return out_bytes.decode('utf-8')[2:] else: diff --git a/tests/test_game_converter.py b/tests/test_game_converter.py index e0f4963f7..d38bf96fd 100644 --- a/tests/test_game_converter.py +++ b/tests/test_game_converter.py @@ -13,12 +13,16 @@ def test_ab_aw(self): class TestCmdlineConverter(unittest.TestCase): def test_directory_conversion(self): - args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data/sgf/'] + args = ['--features', 'board,ones,turns_since', + '--outfile', '.tmp.testing.h5', + '--directory', 'tests/test_data/sgf/'] run_game_converter(args) os.remove('.tmp.testing.h5') def test_directory_walk(self): - args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data', '--recurse'] + args = ['--features', 'board,ones,turns_since', + '--outfile', '.tmp.testing.h5', + '--directory', 'tests/test_data', '--recurse'] run_game_converter(args) os.remove('.tmp.testing.h5') diff --git a/tests/test_gamestate.py b/tests/test_gamestate.py index f1f28818b..59863f4e5 100644 --- a/tests/test_gamestate.py +++ b/tests/test_gamestate.py @@ -58,7 +58,9 @@ def test_snapback_is_not_ko(self): self.assertEqual(gs.num_white_prisoners, 1) def test_positional_superko(self): - move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)] + move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), + (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), + (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)] gs = GameState(size=9) for move in move_list: