diff --git a/AlphaGo/ai.py b/AlphaGo/ai.py index 437759960..0c2157c8b 100644 --- a/AlphaGo/ai.py +++ b/AlphaGo/ai.py @@ -87,7 +87,7 @@ def get_moves(self, states): class MCTSPlayer(object): def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100): self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct, - rollout_limit, playout_depth, n_playout) + rollout_limit, playout_depth, n_playout) def get_move(self, state): sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)] diff --git a/AlphaGo/go.py b/AlphaGo/go.py index 48b696fda..f56df4c16 100644 --- a/AlphaGo/go.py +++ b/AlphaGo/go.py @@ -427,7 +427,7 @@ def do_move(self, action, color=None): # Check for end of game if len(self.history) > 1: if self.history[-1] is PASS_MOVE and self.history[-2] is PASS_MOVE \ - and self.current_player == WHITE: + and self.current_player == WHITE: self.is_end_of_game = True return self.is_end_of_game diff --git a/AlphaGo/mcts.py b/AlphaGo/mcts.py index 962b4aabe..09f9439b8 100644 --- a/AlphaGo/mcts.py +++ b/AlphaGo/mcts.py @@ -11,6 +11,7 @@ class TreeNode(object): """A node in the MCTS tree. Each node keeps track of its own value Q, prior probability P, and its visit-count-adjusted prior score u. """ + def __init__(self, parent, prior_p): self._parent = parent self._children = {} # a map from action to TreeNode diff --git a/AlphaGo/models/nn_util.py b/AlphaGo/models/nn_util.py index 2f29211ae..dcaabfd3e 100644 --- a/AlphaGo/models/nn_util.py +++ b/AlphaGo/models/nn_util.py @@ -118,6 +118,7 @@ class Bias(Layer): Largely copied from the keras docs: http://keras.io/layers/writing-your-own-keras-layers/#writing-your-own-keras-layers """ + def __init__(self, **kwargs): super(Bias, self).__init__(**kwargs) diff --git a/AlphaGo/preprocessing/preprocessing.py b/AlphaGo/preprocessing/preprocessing.py index 12f9269ea..5455e3e9b 100644 --- a/AlphaGo/preprocessing/preprocessing.py +++ b/AlphaGo/preprocessing/preprocessing.py @@ -2,7 +2,7 @@ import AlphaGo.go as go ## -## individual feature functions (state --> tensor) begin here +# individual feature functions (state --> tensor) begin here ## diff --git a/AlphaGo/training/reinforcement_policy_trainer.py b/AlphaGo/training/reinforcement_policy_trainer.py index 8bf36cc94..18fac78a3 100644 --- a/AlphaGo/training/reinforcement_policy_trainer.py +++ b/AlphaGo/training/reinforcement_policy_trainer.py @@ -24,6 +24,7 @@ class BatchedReinforcementLearningSGD(Optimizer): lr: float >= 0. Learning rate. ng: int > 0. Number of games played in parallel. Each one has its own cumulative gradient. ''' + def __init__(self, lr=0.01, ng=20, **kwargs): super(BatchedReinforcementLearningSGD, self).__init__(**kwargs) self.__dict__.update(locals()) diff --git a/interface/Play.py b/interface/Play.py index 58e3fe56d..059a5e06e 100644 --- a/interface/Play.py +++ b/interface/Play.py @@ -4,6 +4,7 @@ class play_match(object): """Interface to handle play between two players.""" + def __init__(self, player1, player2, save_dir=None, size=19): # super(ClassName, self).__init__() self.player1 = player1 @@ -19,7 +20,7 @@ def _play(self, player): # self.state.write_to_disk() if len(self.state.history) > 1: if self.state.history[-1] is None and self.state.history[-2] is None \ - and self.state.current_player == -1: + and self.state.current_player == -1: end_of_game = True else: end_of_game = False