Skip to content

Commit

Permalink
global replace of tab with 4-spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
thouis committed Sep 22, 2016
1 parent 8cdd4aa commit 8cd09fe
Show file tree
Hide file tree
Showing 24 changed files with 3,070 additions and 3,070 deletions.
158 changes: 79 additions & 79 deletions AlphaGo/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,95 +5,95 @@


class GreedyPolicyPlayer(object):
"""A player that uses a greedy policy (i.e. chooses the highest probability
move each turn)
"""
"""A player that uses a greedy policy (i.e. chooses the highest probability
move each turn)
"""

def __init__(self, policy_function, pass_when_offered=False, move_limit=None):
self.policy = policy_function
self.pass_when_offered = pass_when_offered
self.move_limit = move_limit
def __init__(self, policy_function, pass_when_offered=False, move_limit=None):
self.policy = policy_function
self.pass_when_offered = pass_when_offered
self.move_limit = move_limit

def get_move(self, state):
if self.move_limit is not None and len(state.history) > self.move_limit:
return go.PASS_MOVE
if self.pass_when_offered:
if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
return go.PASS_MOVE
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move_probs = self.policy.eval_state(state, sensible_moves)
max_prob = max(move_probs, key=lambda (a, p): p)
return max_prob[0]
# No 'sensible' moves available, so do pass move
return go.PASS_MOVE
def get_move(self, state):
if self.move_limit is not None and len(state.history) > self.move_limit:
return go.PASS_MOVE
if self.pass_when_offered:
if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
return go.PASS_MOVE
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move_probs = self.policy.eval_state(state, sensible_moves)
max_prob = max(move_probs, key=lambda (a, p): p)
return max_prob[0]
# No 'sensible' moves available, so do pass move
return go.PASS_MOVE


class ProbabilisticPolicyPlayer(object):
"""A player that samples a move in proportion to the probability given by the
policy.
"""A player that samples a move in proportion to the probability given by the
policy.
By manipulating the 'temperature', moves can be pushed towards totally random
(high temperature) or towards greedy play (low temperature)
"""
By manipulating the 'temperature', moves can be pushed towards totally random
(high temperature) or towards greedy play (low temperature)
"""

def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None):
assert(temperature > 0.0)
self.policy = policy_function
self.move_limit = move_limit
self.beta = 1.0 / temperature
self.pass_when_offered = pass_when_offered
self.move_limit = move_limit
def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None):
assert(temperature > 0.0)
self.policy = policy_function
self.move_limit = move_limit
self.beta = 1.0 / temperature
self.pass_when_offered = pass_when_offered
self.move_limit = move_limit

def get_move(self, state):
if self.move_limit is not None and len(state.history) > self.move_limit:
return go.PASS_MOVE
if self.pass_when_offered:
if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
return go.PASS_MOVE
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move_probs = self.policy.eval_state(state, sensible_moves)
# zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)]
moves, probabilities = zip(*move_probs)
probabilities = np.array(probabilities)
probabilities = probabilities ** self.beta
probabilities = probabilities / probabilities.sum()
# numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps
choice_idx = np.random.choice(len(moves), p=probabilities)
return moves[choice_idx]
return go.PASS_MOVE
def get_move(self, state):
if self.move_limit is not None and len(state.history) > self.move_limit:
return go.PASS_MOVE
if self.pass_when_offered:
if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
return go.PASS_MOVE
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move_probs = self.policy.eval_state(state, sensible_moves)
# zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)]
moves, probabilities = zip(*move_probs)
probabilities = np.array(probabilities)
probabilities = probabilities ** self.beta
probabilities = probabilities / probabilities.sum()
# numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps
choice_idx = np.random.choice(len(moves), p=probabilities)
return moves[choice_idx]
return go.PASS_MOVE

def get_moves(self, states):
"""Batch version of get_move. A list of moves is returned (one per state)
"""
sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states]
all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists)
move_list = [None] * len(states)
for i, move_probs in enumerate(all_moves_distributions):
if len(move_probs) == 0 or len(states[i].history) > self.move_limit:
move_list[i] = go.PASS_MOVE
else:
# this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move
moves, probabilities = zip(*move_probs)
probabilities = np.array(probabilities)
probabilities = probabilities ** self.beta
probabilities = probabilities / probabilities.sum()
choice_idx = np.random.choice(len(moves), p=probabilities)
move_list[i] = moves[choice_idx]
return move_list
def get_moves(self, states):
"""Batch version of get_move. A list of moves is returned (one per state)
"""
sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states]
all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists)
move_list = [None] * len(states)
for i, move_probs in enumerate(all_moves_distributions):
if len(move_probs) == 0 or len(states[i].history) > self.move_limit:
move_list[i] = go.PASS_MOVE
else:
# this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move
moves, probabilities = zip(*move_probs)
probabilities = np.array(probabilities)
probabilities = probabilities ** self.beta
probabilities = probabilities / probabilities.sum()
choice_idx = np.random.choice(len(moves), p=probabilities)
move_list[i] = moves[choice_idx]
return move_list


class MCTSPlayer(object):
def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100):
self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct,
rollout_limit, playout_depth, n_playout)
def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100):
self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct,
rollout_limit, playout_depth, n_playout)

def get_move(self, state):
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move = self.mcts.get_move(state)
self.mcts.update_with_move(move)
return move
# No 'sensible' moves available, so do pass move
return go.PASS_MOVE
def get_move(self, state):
sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
if len(sensible_moves) > 0:
move = self.mcts.get_move(state)
self.mcts.update_with_move(move)
return move
# No 'sensible' moves available, so do pass move
return go.PASS_MOVE
Loading

0 comments on commit 8cd09fe

Please sign in to comment.