From 8cd09fe3a8be2dedd95686c786b27961b73f59e3 Mon Sep 17 00:00:00 2001
From: "Thouis (Ray) Jones" <thouis@gmail.com>
Date: Thu, 22 Sep 2016 09:29:04 -0400
Subject: [PATCH] global replace of tab with 4-spaces

---
 AlphaGo/ai.py                                 | 158 ++--
 AlphaGo/go.py                                 | 848 +++++++++---------
 AlphaGo/mcts.py                               | 402 ++++-----
 AlphaGo/models/nn_util.py                     | 224 ++---
 AlphaGo/models/policy.py                      | 488 +++++-----
 AlphaGo/preprocessing/game_converter.py       | 392 ++++----
 AlphaGo/preprocessing/preprocessing.py        | 452 +++++-----
 .../training/reinforcement_policy_trainer.py  | 526 +++++------
 AlphaGo/training/supervised_policy_trainer.py | 388 ++++----
 AlphaGo/util.py                               | 190 ++--
 benchmarks/preprocessing_benchmark.py         |   4 +-
 ...reinforcement_policy_training_benchmark.py |   4 +-
 .../supervised_policy_training_benchmark.py   |   2 +-
 interface/Play.py                             |  54 +-
 interface/gtp_wrapper.py                      | 270 +++---
 tests/test_game_converter.py                  |  24 +-
 tests/test_gamestate.py                       | 298 +++---
 tests/test_gtp_wrapper.py                     |  30 +-
 tests/test_liberties.py                       |  70 +-
 tests/test_mcts.py                            | 208 ++---
 tests/test_policy.py                          | 222 ++---
 tests/test_preprocessing.py                   | 636 ++++++-------
 tests/test_reinforcement_policy_trainer.py    | 228 ++---
 tests/test_supervised_policy_trainer.py       |  22 +-
 24 files changed, 3070 insertions(+), 3070 deletions(-)

diff --git a/AlphaGo/ai.py b/AlphaGo/ai.py
index 593e856da..437759960 100644
--- a/AlphaGo/ai.py
+++ b/AlphaGo/ai.py
@@ -5,95 +5,95 @@
 
 
 class GreedyPolicyPlayer(object):
-	"""A player that uses a greedy policy (i.e. chooses the highest probability
-	move each turn)
-	"""
+    """A player that uses a greedy policy (i.e. chooses the highest probability
+    move each turn)
+    """
 
-	def __init__(self, policy_function, pass_when_offered=False, move_limit=None):
-		self.policy = policy_function
-		self.pass_when_offered = pass_when_offered
-		self.move_limit = move_limit
+    def __init__(self, policy_function, pass_when_offered=False, move_limit=None):
+        self.policy = policy_function
+        self.pass_when_offered = pass_when_offered
+        self.move_limit = move_limit
 
-	def get_move(self, state):
-		if self.move_limit is not None and len(state.history) > self.move_limit:
-			return go.PASS_MOVE
-		if self.pass_when_offered:
-			if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
-				return go.PASS_MOVE
-		sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
-		if len(sensible_moves) > 0:
-			move_probs = self.policy.eval_state(state, sensible_moves)
-			max_prob = max(move_probs, key=lambda (a, p): p)
-			return max_prob[0]
-		# No 'sensible' moves available, so do pass move
-		return go.PASS_MOVE
+    def get_move(self, state):
+        if self.move_limit is not None and len(state.history) > self.move_limit:
+            return go.PASS_MOVE
+        if self.pass_when_offered:
+            if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
+                return go.PASS_MOVE
+        sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
+        if len(sensible_moves) > 0:
+            move_probs = self.policy.eval_state(state, sensible_moves)
+            max_prob = max(move_probs, key=lambda (a, p): p)
+            return max_prob[0]
+        # No 'sensible' moves available, so do pass move
+        return go.PASS_MOVE
 
 
 class ProbabilisticPolicyPlayer(object):
-	"""A player that samples a move in proportion to the probability given by the
-	policy.
+    """A player that samples a move in proportion to the probability given by the
+    policy.
 
-	By manipulating the 'temperature', moves can be pushed towards totally random
-	(high temperature) or towards greedy play (low temperature)
-	"""
+    By manipulating the 'temperature', moves can be pushed towards totally random
+    (high temperature) or towards greedy play (low temperature)
+    """
 
-	def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None):
-		assert(temperature > 0.0)
-		self.policy = policy_function
-		self.move_limit = move_limit
-		self.beta = 1.0 / temperature
-		self.pass_when_offered = pass_when_offered
-		self.move_limit = move_limit
+    def __init__(self, policy_function, temperature=1.0, pass_when_offered=False, move_limit=None):
+        assert(temperature > 0.0)
+        self.policy = policy_function
+        self.move_limit = move_limit
+        self.beta = 1.0 / temperature
+        self.pass_when_offered = pass_when_offered
+        self.move_limit = move_limit
 
-	def get_move(self, state):
-		if self.move_limit is not None and len(state.history) > self.move_limit:
-			return go.PASS_MOVE
-		if self.pass_when_offered:
-			if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
-				return go.PASS_MOVE
-		sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
-		if len(sensible_moves) > 0:
-			move_probs = self.policy.eval_state(state, sensible_moves)
-			# zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)]
-			moves, probabilities = zip(*move_probs)
-			probabilities = np.array(probabilities)
-			probabilities = probabilities ** self.beta
-			probabilities = probabilities / probabilities.sum()
-			# numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps
-			choice_idx = np.random.choice(len(moves), p=probabilities)
-			return moves[choice_idx]
-		return go.PASS_MOVE
+    def get_move(self, state):
+        if self.move_limit is not None and len(state.history) > self.move_limit:
+            return go.PASS_MOVE
+        if self.pass_when_offered:
+            if len(state.history) > 100 and state.history[-1] == go.PASS_MOVE:
+                return go.PASS_MOVE
+        sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
+        if len(sensible_moves) > 0:
+            move_probs = self.policy.eval_state(state, sensible_moves)
+            # zip(*list) is like the 'transpose' of zip; zip(*zip([1,2,3], [4,5,6])) is [(1,2,3), (4,5,6)]
+            moves, probabilities = zip(*move_probs)
+            probabilities = np.array(probabilities)
+            probabilities = probabilities ** self.beta
+            probabilities = probabilities / probabilities.sum()
+            # numpy interprets a list of tuples as 2D, so we must choose an _index_ of moves then apply it in 2 steps
+            choice_idx = np.random.choice(len(moves), p=probabilities)
+            return moves[choice_idx]
+        return go.PASS_MOVE
 
-	def get_moves(self, states):
-		"""Batch version of get_move. A list of moves is returned (one per state)
-		"""
-		sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states]
-		all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists)
-		move_list = [None] * len(states)
-		for i, move_probs in enumerate(all_moves_distributions):
-			if len(move_probs) == 0 or len(states[i].history) > self.move_limit:
-				move_list[i] = go.PASS_MOVE
-			else:
-				# this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move
-				moves, probabilities = zip(*move_probs)
-				probabilities = np.array(probabilities)
-				probabilities = probabilities ** self.beta
-				probabilities = probabilities / probabilities.sum()
-				choice_idx = np.random.choice(len(moves), p=probabilities)
-				move_list[i] = moves[choice_idx]
-		return move_list
+    def get_moves(self, states):
+        """Batch version of get_move. A list of moves is returned (one per state)
+        """
+        sensible_move_lists = [[move for move in st.get_legal_moves(include_eyes=False)] for st in states]
+        all_moves_distributions = self.policy.batch_eval_state(states, sensible_move_lists)
+        move_list = [None] * len(states)
+        for i, move_probs in enumerate(all_moves_distributions):
+            if len(move_probs) == 0 or len(states[i].history) > self.move_limit:
+                move_list[i] = go.PASS_MOVE
+            else:
+                # this 'else' clause is identical to ProbabilisticPolicyPlayer.get_move
+                moves, probabilities = zip(*move_probs)
+                probabilities = np.array(probabilities)
+                probabilities = probabilities ** self.beta
+                probabilities = probabilities / probabilities.sum()
+                choice_idx = np.random.choice(len(moves), p=probabilities)
+                move_list[i] = moves[choice_idx]
+        return move_list
 
 
 class MCTSPlayer(object):
-	def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100):
-		self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct,
-					rollout_limit, playout_depth, n_playout)
+    def __init__(self, value_function, policy_function, rollout_function, lmbda=.5, c_puct=5, rollout_limit=500, playout_depth=40, n_playout=100):
+        self.mcts = mcts.MCTS(value_function, policy_function, rollout_function, lmbda, c_puct,
+                    rollout_limit, playout_depth, n_playout)
 
-	def get_move(self, state):
-		sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
-		if len(sensible_moves) > 0:
-			move = self.mcts.get_move(state)
-			self.mcts.update_with_move(move)
-			return move
-		# No 'sensible' moves available, so do pass move
-		return go.PASS_MOVE
+    def get_move(self, state):
+        sensible_moves = [move for move in state.get_legal_moves(include_eyes=False)]
+        if len(sensible_moves) > 0:
+            move = self.mcts.get_move(state)
+            self.mcts.update_with_move(move)
+            return move
+        # No 'sensible' moves available, so do pass move
+        return go.PASS_MOVE
diff --git a/AlphaGo/go.py b/AlphaGo/go.py
index 67b6c7269..48b696fda 100644
--- a/AlphaGo/go.py
+++ b/AlphaGo/go.py
@@ -7,430 +7,430 @@
 
 
 class GameState(object):
-	"""State of a game of Go and some basic functions to interact with it
-	"""
-
-	# Looking up positions adjacent to a given position takes a surprising
-	# amount of time, hence this shared lookup table {boardsize: {position: [neighbors]}}
-	__NEIGHBORS_CACHE = {}
-
-	def __init__(self, size=19, komi=7.5, enforce_superko=False):
-		self.board = np.zeros((size, size))
-		self.board.fill(EMPTY)
-		self.size = size
-		self.current_player = BLACK
-		self.ko = None
-		self.komi = komi  # Komi is number of extra points WHITE gets for going 2nd
-		self.handicaps = []
-		self.history = []
-		self.num_black_prisoners = 0
-		self.num_white_prisoners = 0
-		self.is_end_of_game = False
-		# Each pass move by a player subtracts a point
-		self.passes_white = 0
-		self.passes_black = 0
-		# `self.liberty_sets` is a 2D array with the same indexes as `board`
-		# each entry points to a set of tuples - the liberties of a stone's
-		# connected block. By caching liberties in this way, we can directly
-		# optimize update functions (e.g. do_move) and in doing so indirectly
-		# speed up any function that queries liberties
-		self._create_neighbors_cache()
-		self.liberty_sets = [[set() for _ in range(size)] for _ in range(size)]
-		for x in range(size):
-			for y in range(size):
-				self.liberty_sets[x][y] = set(self._neighbors((x, y)))
-		# separately cache the 2D numpy array of the _size_ of liberty sets
-		# at each board position
-		self.liberty_counts = np.zeros((size, size), dtype=np.int)
-		self.liberty_counts.fill(-1)
-		# initialize liberty_sets of empty board: the set of neighbors of each position
-		# similarly to `liberty_sets`, `group_sets[x][y]` points to a set of tuples
-		# containing all (x',y') pairs in the group connected to (x,y)
-		self.group_sets = [[set() for _ in range(size)] for _ in range(size)]
-		# cache of list of legal moves (actually 'sensible' moves, with a separate list for eye-moves on request)
-		self.__legal_move_cache = None
-		self.__legal_eyes_cache = None
-		# on-the-fly record of 'age' of each stone
-		self.stone_ages = np.zeros((size, size), dtype=np.int) - 1
-
-		# setup Zobrist hash to keep track of board state
-		self.enforce_superko = enforce_superko
-		rng = np.random.RandomState(0)
-		self.hash_lookup = {
-			WHITE: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64'),
-			BLACK: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64')}
-		self.current_hash = np.uint64(0)
-		self.previous_hashes = set()
-
-	def get_group(self, position):
-		"""Get the group of connected same-color stones to the given position
-		Keyword arguments:
-		position -- a tuple of (x, y)
-		x being the column index of the starting position of the search
-		y being the row index of the starting position of the search
-		Return:
-		a set of tuples consist of (x, y)s which are the same-color cluster
-		which contains the input single position. len(group) is size of the cluster, can be large.
-		"""
-		(x, y) = position
-		# given that this is already cached, it is a fast lookup
-		return self.group_sets[x][y]
-
-	def get_groups_around(self, position):
-		"""returns a list of the unique groups adjacent to position
-		'unique' means that, for example in this position:
-			. . . . .
-			. B W . .
-			. W W . .
-			. . . . .
-			. . . . .
-		only the one white group would be returned on get_groups_around((1,1))
-		"""
-		groups = []
-		for (nx, ny) in self._neighbors(position):
-			group = self.group_sets[nx][ny]
-			if len(group) > 0 and group not in groups:
-				groups.append(self.group_sets[nx][ny])
-		return groups
-
-	def _on_board(self, position):
-		"""simply return True iff position is within the bounds of [0, self.size)
-		"""
-		(x, y) = position
-		return x >= 0 and y >= 0 and x < self.size and y < self.size
-
-	def _create_neighbors_cache(self):
-		if self.size not in GameState.__NEIGHBORS_CACHE:
-			GameState.__NEIGHBORS_CACHE[self.size] = {}
-			for x in xrange(self.size):
-				for y in xrange(self.size):
-					neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] if self._on_board(xy)]
-					GameState.__NEIGHBORS_CACHE[self.size][(x, y)] = neighbors
-
-	def _neighbors(self, position):
-		"""A private helper function that simply returns a list of positions neighboring
-		the given (x,y) position. Basically it handles edges and corners.
-		"""
-		return GameState.__NEIGHBORS_CACHE[self.size][position]
-
-	def _diagonals(self, position):
-		"""Like _neighbors but for diagonal positions
-		"""
-		(x, y) = position
-		return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1)])
-
-	def _update_neighbors(self, position):
-		"""A private helper function to update self.group_sets and self.liberty_sets
-		given that a stone was just played at `position`
-		"""
-		(x, y) = position
-
-		merged_group = set()
-		merged_group.add(position)
-		merged_libs = self.liberty_sets[x][y]
-		for (nx, ny) in self._neighbors(position):
-			# remove (x,y) from liberties of neighboring positions
-			self.liberty_sets[nx][ny] -= set([position])
-			# if neighbor was opponent, update group's liberties count
-			# (current_player's groups will be updated below regardless)
-			if self.board[nx][ny] == -self.current_player:
-				new_liberty_count = len(self.liberty_sets[nx][ny])
-				for (gx, gy) in self.group_sets[nx][ny]:
-					self.liberty_counts[gx][gy] = new_liberty_count
-			# MERGE group/liberty sets if neighbor is the same color
-			# note: this automatically takes care of merging two separate
-			# groups that just became connected through (x,y)
-			elif self.board[x][y] == self.board[nx][ny]:
-				merged_group |= self.group_sets[nx][ny]
-				merged_libs |= self.liberty_sets[nx][ny]
-
-		# now that we have one big 'merged' set for groups and liberties, loop
-		# over every member of the same-color group to update them
-		# Note: neighboring opponent groups are already updated in the previous loop
-		count_merged_libs = len(merged_libs)
-		for (gx, gy) in merged_group:
-			self.group_sets[gx][gy] = merged_group
-			self.liberty_sets[gx][gy] = merged_libs
-			self.liberty_counts[gx][gy] = count_merged_libs
-
-	def _update_hash(self, action, color):
-		(x, y) = action
-		self.current_hash = np.bitwise_xor(self.current_hash, self.hash_lookup[color][x][y])
-
-	def _remove_group(self, group):
-		"""A private helper function to take a group off the board (due to capture),
-		updating group sets and liberties along the way
-		"""
-		for (x, y) in group:
-			self._update_hash((x, y), self.board[x, y])
-			self.board[x, y] = EMPTY
-		for (x, y) in group:
-			# clear group_sets for all positions in 'group'
-			self.group_sets[x][y] = set()
-			self.liberty_sets[x][y] = set()
-			self.liberty_counts[x][y] = -1
-			self.stone_ages[x][y] = -1
-			for (nx, ny) in self._neighbors((x, y)):
-				if self.board[nx, ny] == EMPTY:
-					# add empty neighbors of (x,y) to its liberties
-					self.liberty_sets[x][y].add((nx, ny))
-				else:
-					# add (x,y) to the liberties of its nonempty neighbors
-					self.liberty_sets[nx][ny].add((x, y))
-					for (gx, gy) in self.group_sets[nx][ny]:
-						self.liberty_counts[gx][gy] = len(self.liberty_sets[nx][ny])
-
-	def copy(self):
-		"""get a copy of this Game state
-		"""
-		other = GameState(self.size, self.komi)
-		other.board = self.board.copy()
-		other.current_player = self.current_player
-		other.ko = self.ko
-		other.handicaps = list(self.handicaps)
-		other.history = list(self.history)
-		other.num_black_prisoners = self.num_black_prisoners
-		other.num_white_prisoners = self.num_white_prisoners
-		other.enforce_superko = self.enforce_superko
-		other.current_hash = self.current_hash.copy()
-		other.previous_hashes = self.previous_hashes.copy()
-
-		# update liberty and group sets. Note: calling set(a) on another set
-		# copies the entries (any iterable as an argument would work so
-		# set(list(a)) is unnecessary)
-		for x in range(self.size):
-			for y in range(self.size):
-				other.group_sets[x][y] = set(self.group_sets[x][y])
-				other.liberty_sets[x][y] = set(self.liberty_sets[x][y])
-		other.liberty_counts = self.liberty_counts.copy()
-		return other
-
-	def is_suicide(self, action):
-		"""return true if having current_player play at <action> would be suicide
-		"""
-		(x, y) = action
-		num_liberties_here = len(self.liberty_sets[x][y])
-		if num_liberties_here == 0:
-			# no liberties here 'immediately'
-			# but this may still connect to another group of the same color
-			for (nx, ny) in self._neighbors(action):
-				# check if we're saved by attaching to a friendly group that has
-				# liberties elsewhere
-				is_friendly_group = self.board[nx, ny] == self.current_player
-				group_has_other_liberties = len(self.liberty_sets[nx][ny] - set([action])) > 0
-				if is_friendly_group and group_has_other_liberties:
-					return False
-				# check if we're killing an unfriendly group
-				is_enemy_group = self.board[nx, ny] == -self.current_player
-				if is_enemy_group and (not group_has_other_liberties):
-					return False
-			# checked all the neighbors, and it doesn't look good.
-			return True
-		return False
-
-	def is_positional_superko(self, action):
-		"""Find all actions that the current_player has done in the past, taking into account the fact that
-		history starts with BLACK when there are no handicaps or with WHITE when there are.
-		"""
-		if len(self.handicaps) == 0 and self.current_player == BLACK:
-			player_history = self.history[0::2]
-		elif len(self.handicaps) > 0 and self.current_player == WHITE:
-			player_history = self.history[0::2]
-		else:
-			player_history = self.history[1::2]
-
-		if action not in self.handicaps and action not in player_history:
-			return False
-
-		state_copy = self.copy()
-		state_copy.enforce_superko = False
-		state_copy.do_move(action)
-
-		if state_copy.current_hash in self.previous_hashes:
-			return True
-		else:
-			return False
-
-	def is_legal(self, action):
-		"""determine if the given action (x,y tuple) is a legal move
-		note: we only check ko, not superko at this point (TODO?)
-		"""
-		# passing is always legal
-		if action is PASS_MOVE:
-			return True
-		(x, y) = action
-		if not self._on_board(action):
-			return False
-		if self.board[x][y] != EMPTY:
-			return False
-		if self.is_suicide(action):
-			return False
-		if action == self.ko:
-			return False
-		if self.enforce_superko and self.is_positional_superko(action):
-			return False
-		return True
-
-	def is_eyeish(self, position, owner):
-		"""returns whether the position is empty and is surrounded by all stones of 'owner'
-		"""
-		(x, y) = position
-		if self.board[x, y] != EMPTY:
-			return False
-
-		for (nx, ny) in self._neighbors(position):
-			if self.board[nx, ny] != owner:
-				return False
-		return True
-
-	def is_eye(self, position, owner, stack=[]):
-		"""returns whether the position is a true eye of 'owner'
-		Requires a recursive call; empty spaces diagonal to 'position' are fine
-		as long as they themselves are eyes
-		"""
-		if not self.is_eyeish(position, owner):
-			return False
-		# (as in Fuego/Michi/etc) ensure that num "bad" diagonals is 0 (edges) or 1
-		# where a bad diagonal is an opponent stone or an empty non-eye space
-		num_bad_diagonal = 0
-		# if in middle of board, 1 bad neighbor is allowable; zero for edges and corners
-		allowable_bad_diagonal = 1 if len(self._neighbors(position)) == 4 else 0
-
-		for d in self._diagonals(position):
-			# opponent stones count against this being eye
-			if self.board[d] == -owner:
-				num_bad_diagonal += 1
-			# empty spaces (that aren't themselves eyes) count against it too
-			# the 'stack' keeps track of where we've already been to prevent
-			# infinite loops of recursion
-			elif self.board[d] == EMPTY and d not in stack:
-				stack.append(position)
-				if not self.is_eye(d, owner, stack):
-					num_bad_diagonal += 1
-				stack.pop()
-			# at any point, if we've surpassed # allowable, we can stop
-			if num_bad_diagonal > allowable_bad_diagonal:
-				return False
-		return True
-
-	def get_legal_moves(self, include_eyes=True):
-		if self.__legal_move_cache is not None:
-			if include_eyes:
-				return self.__legal_move_cache + self.__legal_eyes_cache
-			else:
-				return self.__legal_move_cache
-		self.__legal_move_cache = []
-		self.__legal_eyes_cache = []
-		for x in range(self.size):
-			for y in range(self.size):
-				if self.is_legal((x, y)):
-					if not self.is_eye((x, y), self.current_player):
-						self.__legal_move_cache.append((x, y))
-					else:
-						self.__legal_eyes_cache.append((x, y))
-		return self.get_legal_moves(include_eyes)
-
-	def get_winner(self):
-		"""Calculate score of board state and return player ID (1, -1, or 0 for tie)
-		corresponding to winner. Uses 'Area scoring'.
-		"""
-		# Count number of positions filled by each player, plus 1 for each eye-ish space owned
-		score_white = np.sum(self.board == WHITE)
-		score_black = np.sum(self.board == BLACK)
-		empties = zip(*np.where(self.board == EMPTY))
-		for empty in empties:
-			# Check that all surrounding points are of one color
-			if self.is_eyeish(empty, BLACK):
-				score_black += 1
-			elif self.is_eyeish(empty, WHITE):
-				score_white += 1
-		score_white += self.komi
-		score_white -= self.passes_white
-		score_black -= self.passes_black
-		if score_black > score_white:
-			winner = BLACK
-		elif score_white > score_black:
-			winner = WHITE
-		else:
-			# Tie
-			winner = 0
-		return winner
-
-	def place_handicaps(self, actions):
-		if len(self.history) > 0:
-			raise IllegalMove("Cannot place handicap on a started game")
-		self.handicaps.extend(actions)
-		for action in actions:
-			self.do_move(action, BLACK)
-		self.history = []
-
-	def get_current_player(self):
-		"""Returns the color of the player who will make the next move.
-		"""
-		return self.current_player
-
-	def do_move(self, action, color=None):
-		"""Play stone at action=(x,y). If color is not specified, current_player is used
-		If it is a legal move, current_player switches to the opposite color
-		If not, an IllegalMove exception is raised
-		"""
-		color = color or self.current_player
-		reset_player = self.current_player
-		self.current_player = color
-		if self.is_legal(action):
-			# reset ko
-			self.ko = None
-			# increment age of stones by 1
-			self.stone_ages[self.stone_ages >= 0] += 1
-			if action is not PASS_MOVE:
-				(x, y) = action
-				self.board[x][y] = color
-				self._update_hash(action, color)
-				self._update_neighbors(action)
-				self.stone_ages[x][y] = 0
-
-				# check neighboring groups' liberties for captures
-				for (nx, ny) in self._neighbors(action):
-					if self.board[nx, ny] == -color and len(self.liberty_sets[nx][ny]) == 0:
-						# capture occurred!
-						captured_group = self.group_sets[nx][ny]
-						num_captured = len(captured_group)
-						self._remove_group(captured_group)
-						if color == BLACK:
-							self.num_white_prisoners += num_captured
-						else:
-							self.num_black_prisoners += num_captured
-						# check for ko
-						if num_captured == 1:
-							# it is a ko iff, were the opponent to play at the captured position,
-							# it would recapture (x,y) only
-							# (a bigger group containing xy may be captured - this is 'snapback')
-							would_recapture = len(self.liberty_sets[x][y]) == 1
-							recapture_size_is_1 = len(self.group_sets[x][y]) == 1
-							if would_recapture and recapture_size_is_1:
-								# note: (nx,ny) is the stone that was captured
-								self.ko = (nx, ny)
-				# _remove_group has finished updating the hash
-				self.previous_hashes.add(self.current_hash)
-			else:
-				if color == BLACK:
-					self.passes_black += 1
-				if color == WHITE:
-					self.passes_white += 1
-			# next turn
-			self.current_player = -color
-			self.history.append(action)
-			self.__legal_move_cache = None
-		else:
-			self.current_player = reset_player
-			raise IllegalMove(str(action))
-		# Check for end of game
-		if len(self.history) > 1:
-			if self.history[-1] is PASS_MOVE and self.history[-2] is PASS_MOVE \
-				and self.current_player == WHITE:
-				self.is_end_of_game = True
-		return self.is_end_of_game
+    """State of a game of Go and some basic functions to interact with it
+    """
+
+    # Looking up positions adjacent to a given position takes a surprising
+    # amount of time, hence this shared lookup table {boardsize: {position: [neighbors]}}
+    __NEIGHBORS_CACHE = {}
+
+    def __init__(self, size=19, komi=7.5, enforce_superko=False):
+        self.board = np.zeros((size, size))
+        self.board.fill(EMPTY)
+        self.size = size
+        self.current_player = BLACK
+        self.ko = None
+        self.komi = komi  # Komi is number of extra points WHITE gets for going 2nd
+        self.handicaps = []
+        self.history = []
+        self.num_black_prisoners = 0
+        self.num_white_prisoners = 0
+        self.is_end_of_game = False
+        # Each pass move by a player subtracts a point
+        self.passes_white = 0
+        self.passes_black = 0
+        # `self.liberty_sets` is a 2D array with the same indexes as `board`
+        # each entry points to a set of tuples - the liberties of a stone's
+        # connected block. By caching liberties in this way, we can directly
+        # optimize update functions (e.g. do_move) and in doing so indirectly
+        # speed up any function that queries liberties
+        self._create_neighbors_cache()
+        self.liberty_sets = [[set() for _ in range(size)] for _ in range(size)]
+        for x in range(size):
+            for y in range(size):
+                self.liberty_sets[x][y] = set(self._neighbors((x, y)))
+        # separately cache the 2D numpy array of the _size_ of liberty sets
+        # at each board position
+        self.liberty_counts = np.zeros((size, size), dtype=np.int)
+        self.liberty_counts.fill(-1)
+        # initialize liberty_sets of empty board: the set of neighbors of each position
+        # similarly to `liberty_sets`, `group_sets[x][y]` points to a set of tuples
+        # containing all (x',y') pairs in the group connected to (x,y)
+        self.group_sets = [[set() for _ in range(size)] for _ in range(size)]
+        # cache of list of legal moves (actually 'sensible' moves, with a separate list for eye-moves on request)
+        self.__legal_move_cache = None
+        self.__legal_eyes_cache = None
+        # on-the-fly record of 'age' of each stone
+        self.stone_ages = np.zeros((size, size), dtype=np.int) - 1
+
+        # setup Zobrist hash to keep track of board state
+        self.enforce_superko = enforce_superko
+        rng = np.random.RandomState(0)
+        self.hash_lookup = {
+            WHITE: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64'),
+            BLACK: rng.randint(np.iinfo(np.uint64).max, size=(size, size), dtype='uint64')}
+        self.current_hash = np.uint64(0)
+        self.previous_hashes = set()
+
+    def get_group(self, position):
+        """Get the group of connected same-color stones to the given position
+        Keyword arguments:
+        position -- a tuple of (x, y)
+        x being the column index of the starting position of the search
+        y being the row index of the starting position of the search
+        Return:
+        a set of tuples consist of (x, y)s which are the same-color cluster
+        which contains the input single position. len(group) is size of the cluster, can be large.
+        """
+        (x, y) = position
+        # given that this is already cached, it is a fast lookup
+        return self.group_sets[x][y]
+
+    def get_groups_around(self, position):
+        """returns a list of the unique groups adjacent to position
+        'unique' means that, for example in this position:
+            . . . . .
+            . B W . .
+            . W W . .
+            . . . . .
+            . . . . .
+        only the one white group would be returned on get_groups_around((1,1))
+        """
+        groups = []
+        for (nx, ny) in self._neighbors(position):
+            group = self.group_sets[nx][ny]
+            if len(group) > 0 and group not in groups:
+                groups.append(self.group_sets[nx][ny])
+        return groups
+
+    def _on_board(self, position):
+        """simply return True iff position is within the bounds of [0, self.size)
+        """
+        (x, y) = position
+        return x >= 0 and y >= 0 and x < self.size and y < self.size
+
+    def _create_neighbors_cache(self):
+        if self.size not in GameState.__NEIGHBORS_CACHE:
+            GameState.__NEIGHBORS_CACHE[self.size] = {}
+            for x in xrange(self.size):
+                for y in xrange(self.size):
+                    neighbors = [xy for xy in [(x - 1, y), (x + 1, y), (x, y - 1), (x, y + 1)] if self._on_board(xy)]
+                    GameState.__NEIGHBORS_CACHE[self.size][(x, y)] = neighbors
+
+    def _neighbors(self, position):
+        """A private helper function that simply returns a list of positions neighboring
+        the given (x,y) position. Basically it handles edges and corners.
+        """
+        return GameState.__NEIGHBORS_CACHE[self.size][position]
+
+    def _diagonals(self, position):
+        """Like _neighbors but for diagonal positions
+        """
+        (x, y) = position
+        return filter(self._on_board, [(x - 1, y - 1), (x + 1, y + 1), (x + 1, y - 1), (x - 1, y + 1)])
+
+    def _update_neighbors(self, position):
+        """A private helper function to update self.group_sets and self.liberty_sets
+        given that a stone was just played at `position`
+        """
+        (x, y) = position
+
+        merged_group = set()
+        merged_group.add(position)
+        merged_libs = self.liberty_sets[x][y]
+        for (nx, ny) in self._neighbors(position):
+            # remove (x,y) from liberties of neighboring positions
+            self.liberty_sets[nx][ny] -= set([position])
+            # if neighbor was opponent, update group's liberties count
+            # (current_player's groups will be updated below regardless)
+            if self.board[nx][ny] == -self.current_player:
+                new_liberty_count = len(self.liberty_sets[nx][ny])
+                for (gx, gy) in self.group_sets[nx][ny]:
+                    self.liberty_counts[gx][gy] = new_liberty_count
+            # MERGE group/liberty sets if neighbor is the same color
+            # note: this automatically takes care of merging two separate
+            # groups that just became connected through (x,y)
+            elif self.board[x][y] == self.board[nx][ny]:
+                merged_group |= self.group_sets[nx][ny]
+                merged_libs |= self.liberty_sets[nx][ny]
+
+        # now that we have one big 'merged' set for groups and liberties, loop
+        # over every member of the same-color group to update them
+        # Note: neighboring opponent groups are already updated in the previous loop
+        count_merged_libs = len(merged_libs)
+        for (gx, gy) in merged_group:
+            self.group_sets[gx][gy] = merged_group
+            self.liberty_sets[gx][gy] = merged_libs
+            self.liberty_counts[gx][gy] = count_merged_libs
+
+    def _update_hash(self, action, color):
+        (x, y) = action
+        self.current_hash = np.bitwise_xor(self.current_hash, self.hash_lookup[color][x][y])
+
+    def _remove_group(self, group):
+        """A private helper function to take a group off the board (due to capture),
+        updating group sets and liberties along the way
+        """
+        for (x, y) in group:
+            self._update_hash((x, y), self.board[x, y])
+            self.board[x, y] = EMPTY
+        for (x, y) in group:
+            # clear group_sets for all positions in 'group'
+            self.group_sets[x][y] = set()
+            self.liberty_sets[x][y] = set()
+            self.liberty_counts[x][y] = -1
+            self.stone_ages[x][y] = -1
+            for (nx, ny) in self._neighbors((x, y)):
+                if self.board[nx, ny] == EMPTY:
+                    # add empty neighbors of (x,y) to its liberties
+                    self.liberty_sets[x][y].add((nx, ny))
+                else:
+                    # add (x,y) to the liberties of its nonempty neighbors
+                    self.liberty_sets[nx][ny].add((x, y))
+                    for (gx, gy) in self.group_sets[nx][ny]:
+                        self.liberty_counts[gx][gy] = len(self.liberty_sets[nx][ny])
+
+    def copy(self):
+        """get a copy of this Game state
+        """
+        other = GameState(self.size, self.komi)
+        other.board = self.board.copy()
+        other.current_player = self.current_player
+        other.ko = self.ko
+        other.handicaps = list(self.handicaps)
+        other.history = list(self.history)
+        other.num_black_prisoners = self.num_black_prisoners
+        other.num_white_prisoners = self.num_white_prisoners
+        other.enforce_superko = self.enforce_superko
+        other.current_hash = self.current_hash.copy()
+        other.previous_hashes = self.previous_hashes.copy()
+
+        # update liberty and group sets. Note: calling set(a) on another set
+        # copies the entries (any iterable as an argument would work so
+        # set(list(a)) is unnecessary)
+        for x in range(self.size):
+            for y in range(self.size):
+                other.group_sets[x][y] = set(self.group_sets[x][y])
+                other.liberty_sets[x][y] = set(self.liberty_sets[x][y])
+        other.liberty_counts = self.liberty_counts.copy()
+        return other
+
+    def is_suicide(self, action):
+        """return true if having current_player play at <action> would be suicide
+        """
+        (x, y) = action
+        num_liberties_here = len(self.liberty_sets[x][y])
+        if num_liberties_here == 0:
+            # no liberties here 'immediately'
+            # but this may still connect to another group of the same color
+            for (nx, ny) in self._neighbors(action):
+                # check if we're saved by attaching to a friendly group that has
+                # liberties elsewhere
+                is_friendly_group = self.board[nx, ny] == self.current_player
+                group_has_other_liberties = len(self.liberty_sets[nx][ny] - set([action])) > 0
+                if is_friendly_group and group_has_other_liberties:
+                    return False
+                # check if we're killing an unfriendly group
+                is_enemy_group = self.board[nx, ny] == -self.current_player
+                if is_enemy_group and (not group_has_other_liberties):
+                    return False
+            # checked all the neighbors, and it doesn't look good.
+            return True
+        return False
+
+    def is_positional_superko(self, action):
+        """Find all actions that the current_player has done in the past, taking into account the fact that
+        history starts with BLACK when there are no handicaps or with WHITE when there are.
+        """
+        if len(self.handicaps) == 0 and self.current_player == BLACK:
+            player_history = self.history[0::2]
+        elif len(self.handicaps) > 0 and self.current_player == WHITE:
+            player_history = self.history[0::2]
+        else:
+            player_history = self.history[1::2]
+
+        if action not in self.handicaps and action not in player_history:
+            return False
+
+        state_copy = self.copy()
+        state_copy.enforce_superko = False
+        state_copy.do_move(action)
+
+        if state_copy.current_hash in self.previous_hashes:
+            return True
+        else:
+            return False
+
+    def is_legal(self, action):
+        """determine if the given action (x,y tuple) is a legal move
+        note: we only check ko, not superko at this point (TODO?)
+        """
+        # passing is always legal
+        if action is PASS_MOVE:
+            return True
+        (x, y) = action
+        if not self._on_board(action):
+            return False
+        if self.board[x][y] != EMPTY:
+            return False
+        if self.is_suicide(action):
+            return False
+        if action == self.ko:
+            return False
+        if self.enforce_superko and self.is_positional_superko(action):
+            return False
+        return True
+
+    def is_eyeish(self, position, owner):
+        """returns whether the position is empty and is surrounded by all stones of 'owner'
+        """
+        (x, y) = position
+        if self.board[x, y] != EMPTY:
+            return False
+
+        for (nx, ny) in self._neighbors(position):
+            if self.board[nx, ny] != owner:
+                return False
+        return True
+
+    def is_eye(self, position, owner, stack=[]):
+        """returns whether the position is a true eye of 'owner'
+        Requires a recursive call; empty spaces diagonal to 'position' are fine
+        as long as they themselves are eyes
+        """
+        if not self.is_eyeish(position, owner):
+            return False
+        # (as in Fuego/Michi/etc) ensure that num "bad" diagonals is 0 (edges) or 1
+        # where a bad diagonal is an opponent stone or an empty non-eye space
+        num_bad_diagonal = 0
+        # if in middle of board, 1 bad neighbor is allowable; zero for edges and corners
+        allowable_bad_diagonal = 1 if len(self._neighbors(position)) == 4 else 0
+
+        for d in self._diagonals(position):
+            # opponent stones count against this being eye
+            if self.board[d] == -owner:
+                num_bad_diagonal += 1
+            # empty spaces (that aren't themselves eyes) count against it too
+            # the 'stack' keeps track of where we've already been to prevent
+            # infinite loops of recursion
+            elif self.board[d] == EMPTY and d not in stack:
+                stack.append(position)
+                if not self.is_eye(d, owner, stack):
+                    num_bad_diagonal += 1
+                stack.pop()
+            # at any point, if we've surpassed # allowable, we can stop
+            if num_bad_diagonal > allowable_bad_diagonal:
+                return False
+        return True
+
+    def get_legal_moves(self, include_eyes=True):
+        if self.__legal_move_cache is not None:
+            if include_eyes:
+                return self.__legal_move_cache + self.__legal_eyes_cache
+            else:
+                return self.__legal_move_cache
+        self.__legal_move_cache = []
+        self.__legal_eyes_cache = []
+        for x in range(self.size):
+            for y in range(self.size):
+                if self.is_legal((x, y)):
+                    if not self.is_eye((x, y), self.current_player):
+                        self.__legal_move_cache.append((x, y))
+                    else:
+                        self.__legal_eyes_cache.append((x, y))
+        return self.get_legal_moves(include_eyes)
+
+    def get_winner(self):
+        """Calculate score of board state and return player ID (1, -1, or 0 for tie)
+        corresponding to winner. Uses 'Area scoring'.
+        """
+        # Count number of positions filled by each player, plus 1 for each eye-ish space owned
+        score_white = np.sum(self.board == WHITE)
+        score_black = np.sum(self.board == BLACK)
+        empties = zip(*np.where(self.board == EMPTY))
+        for empty in empties:
+            # Check that all surrounding points are of one color
+            if self.is_eyeish(empty, BLACK):
+                score_black += 1
+            elif self.is_eyeish(empty, WHITE):
+                score_white += 1
+        score_white += self.komi
+        score_white -= self.passes_white
+        score_black -= self.passes_black
+        if score_black > score_white:
+            winner = BLACK
+        elif score_white > score_black:
+            winner = WHITE
+        else:
+            # Tie
+            winner = 0
+        return winner
+
+    def place_handicaps(self, actions):
+        if len(self.history) > 0:
+            raise IllegalMove("Cannot place handicap on a started game")
+        self.handicaps.extend(actions)
+        for action in actions:
+            self.do_move(action, BLACK)
+        self.history = []
+
+    def get_current_player(self):
+        """Returns the color of the player who will make the next move.
+        """
+        return self.current_player
+
+    def do_move(self, action, color=None):
+        """Play stone at action=(x,y). If color is not specified, current_player is used
+        If it is a legal move, current_player switches to the opposite color
+        If not, an IllegalMove exception is raised
+        """
+        color = color or self.current_player
+        reset_player = self.current_player
+        self.current_player = color
+        if self.is_legal(action):
+            # reset ko
+            self.ko = None
+            # increment age of stones by 1
+            self.stone_ages[self.stone_ages >= 0] += 1
+            if action is not PASS_MOVE:
+                (x, y) = action
+                self.board[x][y] = color
+                self._update_hash(action, color)
+                self._update_neighbors(action)
+                self.stone_ages[x][y] = 0
+
+                # check neighboring groups' liberties for captures
+                for (nx, ny) in self._neighbors(action):
+                    if self.board[nx, ny] == -color and len(self.liberty_sets[nx][ny]) == 0:
+                        # capture occurred!
+                        captured_group = self.group_sets[nx][ny]
+                        num_captured = len(captured_group)
+                        self._remove_group(captured_group)
+                        if color == BLACK:
+                            self.num_white_prisoners += num_captured
+                        else:
+                            self.num_black_prisoners += num_captured
+                        # check for ko
+                        if num_captured == 1:
+                            # it is a ko iff, were the opponent to play at the captured position,
+                            # it would recapture (x,y) only
+                            # (a bigger group containing xy may be captured - this is 'snapback')
+                            would_recapture = len(self.liberty_sets[x][y]) == 1
+                            recapture_size_is_1 = len(self.group_sets[x][y]) == 1
+                            if would_recapture and recapture_size_is_1:
+                                # note: (nx,ny) is the stone that was captured
+                                self.ko = (nx, ny)
+                # _remove_group has finished updating the hash
+                self.previous_hashes.add(self.current_hash)
+            else:
+                if color == BLACK:
+                    self.passes_black += 1
+                if color == WHITE:
+                    self.passes_white += 1
+            # next turn
+            self.current_player = -color
+            self.history.append(action)
+            self.__legal_move_cache = None
+        else:
+            self.current_player = reset_player
+            raise IllegalMove(str(action))
+        # Check for end of game
+        if len(self.history) > 1:
+            if self.history[-1] is PASS_MOVE and self.history[-2] is PASS_MOVE \
+                and self.current_player == WHITE:
+                self.is_end_of_game = True
+        return self.is_end_of_game
 
 
 class IllegalMove(Exception):
-	pass
+    pass
diff --git a/AlphaGo/mcts.py b/AlphaGo/mcts.py
index d2d572e2a..962b4aabe 100644
--- a/AlphaGo/mcts.py
+++ b/AlphaGo/mcts.py
@@ -8,210 +8,210 @@
 
 
 class TreeNode(object):
-	"""A node in the MCTS tree. Each node keeps track of its own value Q, prior probability P, and
-	its visit-count-adjusted prior score u.
-	"""
-	def __init__(self, parent, prior_p):
-		self._parent = parent
-		self._children = {}  # a map from action to TreeNode
-		self._n_visits = 0
-		self._Q = 0
-		# This value for u will be overwritten in the first call to update(), but is useful for
-		# choosing the first action from this node.
-		self._u = prior_p
-		self._P = prior_p
-
-	def expand(self, action_priors):
-		"""Expand tree by creating new children.
-
-		Arguments:
-		action_priors -- output from policy function - a list of tuples of actions and their prior
-			probability according to the policy function.
-
-		Returns:
-		None
-		"""
-		for action, prob in action_priors:
-			if action not in self._children:
-				self._children[action] = TreeNode(self, prob)
-
-	def select(self):
-		"""Select action among children that gives maximum action value, Q plus bonus u(P).
-
-		Returns:
-		A tuple of (action, next_node)
-		"""
-		return max(self._children.iteritems(), key=lambda (action, node): node.get_value())
-
-	def update(self, leaf_value, c_puct):
-		"""Update node values from leaf evaluation.
-
-		Arguments:
-		leaf_value -- the value of subtree evaluation from the current player's perspective.
-		c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and
-			prior probability, P, on this node's score.
-
-		Returns:
-		None
-		"""
-		# Count visit.
-		self._n_visits += 1
-		# Update Q, a running average of values for all visits.
-		self._Q += (leaf_value - self._Q) / self._n_visits
-		# Update u, the prior weighted by an exploration hyperparameter c_puct and the number of
-		# visits. Note that u is not normalized to be a distribution.
-		if not self.is_root():
-			self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits)
-
-	def update_recursive(self, leaf_value, c_puct):
-		"""Like a call to update(), but applied recursively for all ancestors.
-
-		Note: it is important that this happens from the root downward so that 'parent' visit
-		counts are correct.
-		"""
-		# If it is not root, this node's parent should be updated first.
-		if self._parent:
-			self._parent.update_recursive(leaf_value, c_puct)
-		self.update(leaf_value, c_puct)
-
-	def get_value(self):
-		"""Calculate and return the value for this node: a combination of leaf evaluations, Q, and
-		this node's prior adjusted for its visit count, u
-		"""
-		return self._Q + self._u
-
-	def is_leaf(self):
-		"""Check if leaf node (i.e. no nodes below this have been expanded).
-		"""
-		return self._children == {}
-
-	def is_root(self):
-		return self._parent is None
+    """A node in the MCTS tree. Each node keeps track of its own value Q, prior probability P, and
+    its visit-count-adjusted prior score u.
+    """
+    def __init__(self, parent, prior_p):
+        self._parent = parent
+        self._children = {}  # a map from action to TreeNode
+        self._n_visits = 0
+        self._Q = 0
+        # This value for u will be overwritten in the first call to update(), but is useful for
+        # choosing the first action from this node.
+        self._u = prior_p
+        self._P = prior_p
+
+    def expand(self, action_priors):
+        """Expand tree by creating new children.
+
+        Arguments:
+        action_priors -- output from policy function - a list of tuples of actions and their prior
+            probability according to the policy function.
+
+        Returns:
+        None
+        """
+        for action, prob in action_priors:
+            if action not in self._children:
+                self._children[action] = TreeNode(self, prob)
+
+    def select(self):
+        """Select action among children that gives maximum action value, Q plus bonus u(P).
+
+        Returns:
+        A tuple of (action, next_node)
+        """
+        return max(self._children.iteritems(), key=lambda (action, node): node.get_value())
+
+    def update(self, leaf_value, c_puct):
+        """Update node values from leaf evaluation.
+
+        Arguments:
+        leaf_value -- the value of subtree evaluation from the current player's perspective.
+        c_puct -- a number in (0, inf) controlling the relative impact of values, Q, and
+            prior probability, P, on this node's score.
+
+        Returns:
+        None
+        """
+        # Count visit.
+        self._n_visits += 1
+        # Update Q, a running average of values for all visits.
+        self._Q += (leaf_value - self._Q) / self._n_visits
+        # Update u, the prior weighted by an exploration hyperparameter c_puct and the number of
+        # visits. Note that u is not normalized to be a distribution.
+        if not self.is_root():
+            self._u = c_puct * self._P * np.sqrt(self._parent._n_visits) / (1 + self._n_visits)
+
+    def update_recursive(self, leaf_value, c_puct):
+        """Like a call to update(), but applied recursively for all ancestors.
+
+        Note: it is important that this happens from the root downward so that 'parent' visit
+        counts are correct.
+        """
+        # If it is not root, this node's parent should be updated first.
+        if self._parent:
+            self._parent.update_recursive(leaf_value, c_puct)
+        self.update(leaf_value, c_puct)
+
+    def get_value(self):
+        """Calculate and return the value for this node: a combination of leaf evaluations, Q, and
+        this node's prior adjusted for its visit count, u
+        """
+        return self._Q + self._u
+
+    def is_leaf(self):
+        """Check if leaf node (i.e. no nodes below this have been expanded).
+        """
+        return self._children == {}
+
+    def is_root(self):
+        return self._parent is None
 
 
 class MCTS(object):
-	"""A simple (and slow) single-threaded implementation of Monte Carlo Tree Search.
-
-	Search works by exploring moves randomly according to the given policy up to a certain
-	depth, which is relatively small given the search space. "Leaves" at this depth are assigned a
-	value comprising a weighted combination of (1) the value function evaluated at that leaf, and
-	(2) the result of finishing the game from that leaf according to the 'rollout' policy. The
-	probability of revisiting a node changes over the course of the many playouts according to its
-	estimated value. Ultimately the most visited node is returned as the next action, not the most
-	valued node.
-
-	The term "playout" refers to a single search from the root, whereas "rollout" refers to the
-	fast evaluation from leaf nodes to the end of the game.
-	"""
-
-	def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, rollout_limit=500, playout_depth=20, n_playout=10000):
-		"""Arguments:
-		value_fn -- a function that takes in a state and ouputs a score in [-1, 1], i.e. the
-			expected value of the end game score from the current player's perspective.
-		policy_fn -- a function that takes in a state and outputs a list of (action, probability)
-			tuples for the current player.
-		rollout_policy_fn -- a coarse, fast version of policy_fn used in the rollout phase.
-		lmbda -- controls the relative weight of the value network and fast rollout policy result
-			in determining the value of a leaf node. lmbda must be in [0, 1], where 0 means use only
-			the value network and 1 means use only the result from the rollout.
-		c_puct -- a number in (0, inf) that controls how quickly exploration converges to the maximum-
-			value policy, where a higher value means relying on the prior more, and should be used only
-			in conjunction with a large value for n_playout.
-		"""
-		self._root = TreeNode(None, 1.0)
-		self._value = value_fn
-		self._policy = policy_fn
-		self._rollout = rollout_policy_fn
-		self._lmbda = lmbda
-		self._c_puct = c_puct
-		self._rollout_limit = rollout_limit
-		self._L = playout_depth
-		self._n_playout = n_playout
-
-	def _playout(self, state, leaf_depth):
-		"""Run a single playout from the root to the given depth, getting a value at the leaf and
-		propagating it back through its parents. State is modified in-place, so a copy must be
-		provided.
-
-		Arguments:
-		state -- a copy of the state.
-		leaf_depth -- after this many moves, leaves are evaluated.
-
-		Returns:
-		None
-		"""
-		node = self._root
-		for i in range(leaf_depth):
-			# Only expand node if it has not already been done. Existing nodes already know their
-			# prior.
-			if node.is_leaf():
-				action_probs = self._policy(state)
-				# Check for end of game.
-				if len(action_probs) == 0:
-					break
-				node.expand(action_probs)
-			# Greedily select next move.
-			action, node = node.select()
-			state.do_move(action)
-
-		# Evaluate the leaf using a weighted combination of the value network, v, and the game's
-		# winner, z, according to the rollout policy. If lmbda is equal to 0 or 1, only one of
-		# these contributes and the other may be skipped. Both v and z are from the perspective
-		# of the current player (+1 is good, -1 is bad).
-		v = self._value(state) if self._lmbda < 1 else 0
-		z = self._evaluate_rollout(state, self._rollout_limit) if self._lmbda > 0 else 0
-		leaf_value = (1 - self._lmbda) * v + self._lmbda * z
-
-		# Update value and visit count of nodes in this traversal.
-		node.update_recursive(leaf_value, self._c_puct)
-
-	def _evaluate_rollout(self, state, limit):
-		"""Use the rollout policy to play until the end of the game, returning +1 if the current
-		player wins, -1 if the opponent wins, and 0 if it is a tie.
-		"""
-		player = state.get_current_player()
-		for i in range(limit):
-			action_probs = self._rollout(state)
-			if len(action_probs) == 0:
-				break
-			max_action = max(action_probs, key=lambda (a, p): p)[0]
-			state.do_move(max_action)
-		else:
-			# If no break from the loop, issue a warning.
-			print "WARNING: rollout reached move limit"
-		winner = state.get_winner()
-		if winner == 0:
-			return 0
-		else:
-			return 1 if winner == player else -1
-
-	def get_move(self, state):
-		"""Runs all playouts sequentially and returns the most visited action.
-
-		Arguments:
-		state -- the current state, including both game state and the current player.
-
-		Returns:
-		the selected action
-		"""
-		for n in range(self._n_playout):
-			state_copy = state.copy()
-			self._playout(state_copy, self._L)
-
-		# chosen action is the *most visited child*, not the highest-value one
-		# (they are the same as self._n_playout gets large).
-		return max(self._root._children.iteritems(), key=lambda (a, n): n._n_visits)[0]
-
-	def update_with_move(self, last_move):
-		"""Step forward in the tree, keeping everything we already know about the subtree, assuming
-		that get_move() has been called already. Siblings of the new root will be garbage-collected.
-		"""
-		if last_move in self._root._children:
-			self._root = self._root._children[last_move]
-			self._root._parent = None
-		else:
-			self._root = TreeNode(None, 1.0)
+    """A simple (and slow) single-threaded implementation of Monte Carlo Tree Search.
+
+    Search works by exploring moves randomly according to the given policy up to a certain
+    depth, which is relatively small given the search space. "Leaves" at this depth are assigned a
+    value comprising a weighted combination of (1) the value function evaluated at that leaf, and
+    (2) the result of finishing the game from that leaf according to the 'rollout' policy. The
+    probability of revisiting a node changes over the course of the many playouts according to its
+    estimated value. Ultimately the most visited node is returned as the next action, not the most
+    valued node.
+
+    The term "playout" refers to a single search from the root, whereas "rollout" refers to the
+    fast evaluation from leaf nodes to the end of the game.
+    """
+
+    def __init__(self, value_fn, policy_fn, rollout_policy_fn, lmbda=0.5, c_puct=5, rollout_limit=500, playout_depth=20, n_playout=10000):
+        """Arguments:
+        value_fn -- a function that takes in a state and ouputs a score in [-1, 1], i.e. the
+            expected value of the end game score from the current player's perspective.
+        policy_fn -- a function that takes in a state and outputs a list of (action, probability)
+            tuples for the current player.
+        rollout_policy_fn -- a coarse, fast version of policy_fn used in the rollout phase.
+        lmbda -- controls the relative weight of the value network and fast rollout policy result
+            in determining the value of a leaf node. lmbda must be in [0, 1], where 0 means use only
+            the value network and 1 means use only the result from the rollout.
+        c_puct -- a number in (0, inf) that controls how quickly exploration converges to the maximum-
+            value policy, where a higher value means relying on the prior more, and should be used only
+            in conjunction with a large value for n_playout.
+        """
+        self._root = TreeNode(None, 1.0)
+        self._value = value_fn
+        self._policy = policy_fn
+        self._rollout = rollout_policy_fn
+        self._lmbda = lmbda
+        self._c_puct = c_puct
+        self._rollout_limit = rollout_limit
+        self._L = playout_depth
+        self._n_playout = n_playout
+
+    def _playout(self, state, leaf_depth):
+        """Run a single playout from the root to the given depth, getting a value at the leaf and
+        propagating it back through its parents. State is modified in-place, so a copy must be
+        provided.
+
+        Arguments:
+        state -- a copy of the state.
+        leaf_depth -- after this many moves, leaves are evaluated.
+
+        Returns:
+        None
+        """
+        node = self._root
+        for i in range(leaf_depth):
+            # Only expand node if it has not already been done. Existing nodes already know their
+            # prior.
+            if node.is_leaf():
+                action_probs = self._policy(state)
+                # Check for end of game.
+                if len(action_probs) == 0:
+                    break
+                node.expand(action_probs)
+            # Greedily select next move.
+            action, node = node.select()
+            state.do_move(action)
+
+        # Evaluate the leaf using a weighted combination of the value network, v, and the game's
+        # winner, z, according to the rollout policy. If lmbda is equal to 0 or 1, only one of
+        # these contributes and the other may be skipped. Both v and z are from the perspective
+        # of the current player (+1 is good, -1 is bad).
+        v = self._value(state) if self._lmbda < 1 else 0
+        z = self._evaluate_rollout(state, self._rollout_limit) if self._lmbda > 0 else 0
+        leaf_value = (1 - self._lmbda) * v + self._lmbda * z
+
+        # Update value and visit count of nodes in this traversal.
+        node.update_recursive(leaf_value, self._c_puct)
+
+    def _evaluate_rollout(self, state, limit):
+        """Use the rollout policy to play until the end of the game, returning +1 if the current
+        player wins, -1 if the opponent wins, and 0 if it is a tie.
+        """
+        player = state.get_current_player()
+        for i in range(limit):
+            action_probs = self._rollout(state)
+            if len(action_probs) == 0:
+                break
+            max_action = max(action_probs, key=lambda (a, p): p)[0]
+            state.do_move(max_action)
+        else:
+            # If no break from the loop, issue a warning.
+            print "WARNING: rollout reached move limit"
+        winner = state.get_winner()
+        if winner == 0:
+            return 0
+        else:
+            return 1 if winner == player else -1
+
+    def get_move(self, state):
+        """Runs all playouts sequentially and returns the most visited action.
+
+        Arguments:
+        state -- the current state, including both game state and the current player.
+
+        Returns:
+        the selected action
+        """
+        for n in range(self._n_playout):
+            state_copy = state.copy()
+            self._playout(state_copy, self._L)
+
+        # chosen action is the *most visited child*, not the highest-value one
+        # (they are the same as self._n_playout gets large).
+        return max(self._root._children.iteritems(), key=lambda (a, n): n._n_visits)[0]
+
+    def update_with_move(self, last_move):
+        """Step forward in the tree, keeping everything we already know about the subtree, assuming
+        that get_move() has been called already. Siblings of the new root will be garbage-collected.
+        """
+        if last_move in self._root._children:
+            self._root = self._root._children[last_move]
+            self._root._parent = None
+        else:
+            self._root = TreeNode(None, 1.0)
 
 
 class ParallelMCTS(MCTS):
-	pass
+    pass
diff --git a/AlphaGo/models/nn_util.py b/AlphaGo/models/nn_util.py
index 62696acd2..2f29211ae 100644
--- a/AlphaGo/models/nn_util.py
+++ b/AlphaGo/models/nn_util.py
@@ -6,124 +6,124 @@
 
 
 class NeuralNetBase(object):
-	"""Base class for neural network classes handling feature processing, construction
-	of a 'forward' function, etc.
-	"""
-
-	# keep track of subclasses to make generic saving/loading cleaner.
-	# subclasses can be 'registered' with the @neuralnet decorator
-	subclasses = {}
-
-	def __init__(self, feature_list, **kwargs):
-		"""create a neural net object that preprocesses according to feature_list and uses
-		a neural network specified by keyword arguments (using subclass' create_network())
-
-		optional argument: init_network (boolean). If set to False, skips initializing
-		self.model and self.forward and the calling function should set them.
-		"""
-		self.preprocessor = Preprocess(feature_list)
-		kwargs["input_dim"] = self.preprocessor.output_dim
-
-		if kwargs.get('init_network', True):
-			# self.__class__ refers to the subclass so that subclasses only
-			# need to override create_network()
-			self.model = self.__class__.create_network(**kwargs)
-			# self.forward is a lambda function wrapping a Keras function
-			self.forward = self._model_forward()
-
-	def _model_forward(self):
-		"""Construct a function using the current keras backend that, when given a batch
-		of inputs, simply processes them forward and returns the output
-
-		This is as opposed to model.compile(), which takes a loss function
-		and training method.
-
-		c.f. https://github.com/fchollet/keras/issues/1426
-		"""
-		# The uses_learning_phase property is True if the model contains layers that behave
-		# differently during training and testing, e.g. Dropout or BatchNormalization.
-		# In these cases, K.learning_phase() is a reference to a backend variable that should
-		# be set to 0 when using the network in prediction mode and is automatically set to 1
-		# during training.
-		if self.model.uses_learning_phase:
-			forward_function = K.function([self.model.input, K.learning_phase()], [self.model.output])
-
-			# the forward_function returns a list of tensors
-			# the first [0] gets the front tensor.
-			return lambda inpt: forward_function([inpt, 0])[0]
-		else:
-			# identical but without a second input argument for the learning phase
-			forward_function = K.function([self.model.input], [self.model.output])
-			return lambda inpt: forward_function([inpt])[0]
-
-	@staticmethod
-	def load_model(json_file):
-		"""create a new neural net object from the architecture specified in json_file
-		"""
-		with open(json_file, 'r') as f:
-			object_specs = json.load(f)
-
-		# Create object; may be a subclass of networks saved in specs['class']
-		class_name = object_specs.get('class', 'CNNPolicy')
-		try:
-			network_class = NeuralNetBase.subclasses[class_name]
-		except KeyError:
-			raise ValueError("Unknown neural network type in json file: {}\n(was it registered with the @neuralnet decorator?)".format(class_name))
-
-		# create new object
-		new_net = network_class(object_specs['feature_list'], init_network=False)
-
-		new_net.model = model_from_json(object_specs['keras_model'], custom_objects={'Bias': Bias})
-		if 'weights_file' in object_specs:
-			new_net.model.load_weights(object_specs['weights_file'])
-		new_net.forward = new_net._model_forward()
-		return new_net
-
-	def save_model(self, json_file, weights_file=None):
-		"""write the network model and preprocessing features to the specified file
-
-		If a weights_file (.hdf5 extension) is also specified, model weights are also
-		saved to that file and will be reloaded automatically in a call to load_model
-		"""
-		# this looks odd because we are serializing a model with json as a string
-		# then making that the value of an object which is then serialized as
-		# json again.
-		# It's not as crazy as it looks. A Network has 2 moving parts - the
-		# feature preprocessing and the neural net, each of which gets a top-level
-		# entry in the saved file. Keras just happens to serialize models with JSON
-		# as well. Note how this format makes load_model fairly clean as well.
-		object_specs = {
-			'class': self.__class__.__name__,
-			'keras_model': self.model.to_json(),
-			'feature_list': self.preprocessor.feature_list
-		}
-		if weights_file is not None:
-			self.model.save_weights(weights_file)
-			object_specs['weights_file'] = weights_file
-		# use the json module to write object_specs to file
-		with open(json_file, 'w') as f:
-			json.dump(object_specs, f)
+    """Base class for neural network classes handling feature processing, construction
+    of a 'forward' function, etc.
+    """
+
+    # keep track of subclasses to make generic saving/loading cleaner.
+    # subclasses can be 'registered' with the @neuralnet decorator
+    subclasses = {}
+
+    def __init__(self, feature_list, **kwargs):
+        """create a neural net object that preprocesses according to feature_list and uses
+        a neural network specified by keyword arguments (using subclass' create_network())
+
+        optional argument: init_network (boolean). If set to False, skips initializing
+        self.model and self.forward and the calling function should set them.
+        """
+        self.preprocessor = Preprocess(feature_list)
+        kwargs["input_dim"] = self.preprocessor.output_dim
+
+        if kwargs.get('init_network', True):
+            # self.__class__ refers to the subclass so that subclasses only
+            # need to override create_network()
+            self.model = self.__class__.create_network(**kwargs)
+            # self.forward is a lambda function wrapping a Keras function
+            self.forward = self._model_forward()
+
+    def _model_forward(self):
+        """Construct a function using the current keras backend that, when given a batch
+        of inputs, simply processes them forward and returns the output
+
+        This is as opposed to model.compile(), which takes a loss function
+        and training method.
+
+        c.f. https://github.com/fchollet/keras/issues/1426
+        """
+        # The uses_learning_phase property is True if the model contains layers that behave
+        # differently during training and testing, e.g. Dropout or BatchNormalization.
+        # In these cases, K.learning_phase() is a reference to a backend variable that should
+        # be set to 0 when using the network in prediction mode and is automatically set to 1
+        # during training.
+        if self.model.uses_learning_phase:
+            forward_function = K.function([self.model.input, K.learning_phase()], [self.model.output])
+
+            # the forward_function returns a list of tensors
+            # the first [0] gets the front tensor.
+            return lambda inpt: forward_function([inpt, 0])[0]
+        else:
+            # identical but without a second input argument for the learning phase
+            forward_function = K.function([self.model.input], [self.model.output])
+            return lambda inpt: forward_function([inpt])[0]
+
+    @staticmethod
+    def load_model(json_file):
+        """create a new neural net object from the architecture specified in json_file
+        """
+        with open(json_file, 'r') as f:
+            object_specs = json.load(f)
+
+        # Create object; may be a subclass of networks saved in specs['class']
+        class_name = object_specs.get('class', 'CNNPolicy')
+        try:
+            network_class = NeuralNetBase.subclasses[class_name]
+        except KeyError:
+            raise ValueError("Unknown neural network type in json file: {}\n(was it registered with the @neuralnet decorator?)".format(class_name))
+
+        # create new object
+        new_net = network_class(object_specs['feature_list'], init_network=False)
+
+        new_net.model = model_from_json(object_specs['keras_model'], custom_objects={'Bias': Bias})
+        if 'weights_file' in object_specs:
+            new_net.model.load_weights(object_specs['weights_file'])
+        new_net.forward = new_net._model_forward()
+        return new_net
+
+    def save_model(self, json_file, weights_file=None):
+        """write the network model and preprocessing features to the specified file
+
+        If a weights_file (.hdf5 extension) is also specified, model weights are also
+        saved to that file and will be reloaded automatically in a call to load_model
+        """
+        # this looks odd because we are serializing a model with json as a string
+        # then making that the value of an object which is then serialized as
+        # json again.
+        # It's not as crazy as it looks. A Network has 2 moving parts - the
+        # feature preprocessing and the neural net, each of which gets a top-level
+        # entry in the saved file. Keras just happens to serialize models with JSON
+        # as well. Note how this format makes load_model fairly clean as well.
+        object_specs = {
+            'class': self.__class__.__name__,
+            'keras_model': self.model.to_json(),
+            'feature_list': self.preprocessor.feature_list
+        }
+        if weights_file is not None:
+            self.model.save_weights(weights_file)
+            object_specs['weights_file'] = weights_file
+        # use the json module to write object_specs to file
+        with open(json_file, 'w') as f:
+            json.dump(object_specs, f)
 
 
 def neuralnet(cls):
-	"""Class decorator for registering subclasses of NeuralNetBase
-	"""
-	NeuralNetBase.subclasses[cls.__name__] = cls
-	return cls
+    """Class decorator for registering subclasses of NeuralNetBase
+    """
+    NeuralNetBase.subclasses[cls.__name__] = cls
+    return cls
 
 
 class Bias(Layer):
-	"""Custom keras layer that simply adds a scalar bias to each location in the input
+    """Custom keras layer that simply adds a scalar bias to each location in the input
 
-	Largely copied from the keras docs:
-	http://keras.io/layers/writing-your-own-keras-layers/#writing-your-own-keras-layers
-	"""
-	def __init__(self, **kwargs):
-		super(Bias, self).__init__(**kwargs)
+    Largely copied from the keras docs:
+    http://keras.io/layers/writing-your-own-keras-layers/#writing-your-own-keras-layers
+    """
+    def __init__(self, **kwargs):
+        super(Bias, self).__init__(**kwargs)
 
-	def build(self, input_shape):
-		self.W = K.zeros(input_shape[1:])
-		self.trainable_weights = [self.W]
+    def build(self, input_shape):
+        self.W = K.zeros(input_shape[1:])
+        self.trainable_weights = [self.W]
 
-	def call(self, x, mask=None):
-		return x + self.W
+    def call(self, x, mask=None):
+        return x + self.W
diff --git a/AlphaGo/models/policy.py b/AlphaGo/models/policy.py
index bf65200be..c16058cc2 100644
--- a/AlphaGo/models/policy.py
+++ b/AlphaGo/models/policy.py
@@ -8,251 +8,251 @@
 
 @neuralnet
 class CNNPolicy(NeuralNetBase):
-	"""uses a convolutional neural network to evaluate the state of the game
-	and compute a probability distribution over the next action
-	"""
-
-	def _select_moves_and_normalize(self, nn_output, moves, size):
-		"""helper function to normalize a distribution over the given list of moves
-		and return a list of (move, prob) tuples
-		"""
-		if len(moves) == 0:
-			return []
-		move_indices = [flatten_idx(m, size) for m in moves]
-		# get network activations at legal move locations
-		distribution = nn_output[move_indices]
-		distribution = distribution / distribution.sum()
-		return zip(moves, distribution)
-
-	def batch_eval_state(self, states, moves_lists=None):
-		"""Given a list of states, evaluates them all at once to make best use of GPU
-		batching capabilities.
-
-		Analogous to [eval_state(s) for s in states]
-
-		Returns: a parallel list of move distributions as in eval_state
-		"""
-		n_states = len(states)
-		if n_states == 0:
-			return []
-		state_size = states[0].size
-		if not all([st.size == state_size for st in states]):
-			raise ValueError("all states must have the same size")
-		# concatenate together all one-hot encoded states along the 'batch' dimension
-		nn_input = np.concatenate([self.preprocessor.state_to_tensor(s) for s in states], axis=0)
-		# pass all input through the network at once (backend makes use of batches if len(states) is large)
-		network_output = self.forward(nn_input)
-		# default move lists to all legal moves
-		moves_lists = moves_lists or [st.get_legal_moves() for st in states]
-		results = [None] * n_states
-		for i in range(n_states):
-			results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], state_size)
-		return results
-
-	def eval_state(self, state, moves=None):
-		"""Given a GameState object, returns a list of (action, probability) pairs
-		according to the network outputs
-
-		If a list of moves is specified, only those moves are kept in the distribution
-		"""
-		tensor = self.preprocessor.state_to_tensor(state)
-		# run the tensor through the network
-		network_output = self.forward(tensor)
-		moves = moves or state.get_legal_moves()
-		return self._select_moves_and_normalize(network_output[0], moves, state.size)
-
-	@staticmethod
-	def create_network(**kwargs):
-		"""construct a convolutional neural network.
-
-		Keword Arguments:
-		- input_dim:         	depth of features to be processed by first layer (no default)
-		- board:             	width of the go board to be processed (default 19)
-		- filters_per_layer: 	number of filters used on every layer (default 128)
-		- layers:            	number of convolutional steps (default 12)
-		- filter_width_K:    	(where K is between 1 and <layers>) width of filter on
-								layer K (default 3 except 1st layer which defaults to 5).
-								Must be odd.
-		"""
-		defaults = {
-			"board": 19,
-			"filters_per_layer": 128,
-			"layers": 12,
-			"filter_width_1": 5
-		}
-		# copy defaults, but override with anything in kwargs
-		params = defaults
-		params.update(kwargs)
-
-		# create the network:
-		# a series of zero-paddings followed by convolutions
-		# such that the output dimensions are also board x board
-		network = Sequential()
-
-		# create first layer
-		network.add(convolutional.Convolution2D(
-			input_shape=(params["input_dim"], params["board"], params["board"]),
-			nb_filter=params["filters_per_layer"],
-			nb_row=params["filter_width_1"],
-			nb_col=params["filter_width_1"],
-			init='uniform',
-			activation='relu',
-			border_mode='same'))
-
-		# create all other layers
-		for i in range(2, params["layers"] + 1):
-			# use filter_width_K if it is there, otherwise use 3
-			filter_key = "filter_width_%d" % i
-			filter_width = params.get(filter_key, 3)
-			network.add(convolutional.Convolution2D(
-				nb_filter=params["filters_per_layer"],
-				nb_row=filter_width,
-				nb_col=filter_width,
-				init='uniform',
-				activation='relu',
-				border_mode='same'))
-
-		# the last layer maps each <filters_per_layer> feature to a number
-		network.add(convolutional.Convolution2D(
-			nb_filter=1,
-			nb_row=1,
-			nb_col=1,
-			init='uniform',
-			border_mode='same'))
-		# reshape output to be board x board
-		network.add(Flatten())
-		# add a bias to each board location
-		network.add(Bias())
-		# softmax makes it into a probability distribution
-		network.add(Activation('softmax'))
-
-		return network
+    """uses a convolutional neural network to evaluate the state of the game
+    and compute a probability distribution over the next action
+    """
+
+    def _select_moves_and_normalize(self, nn_output, moves, size):
+        """helper function to normalize a distribution over the given list of moves
+        and return a list of (move, prob) tuples
+        """
+        if len(moves) == 0:
+            return []
+        move_indices = [flatten_idx(m, size) for m in moves]
+        # get network activations at legal move locations
+        distribution = nn_output[move_indices]
+        distribution = distribution / distribution.sum()
+        return zip(moves, distribution)
+
+    def batch_eval_state(self, states, moves_lists=None):
+        """Given a list of states, evaluates them all at once to make best use of GPU
+        batching capabilities.
+
+        Analogous to [eval_state(s) for s in states]
+
+        Returns: a parallel list of move distributions as in eval_state
+        """
+        n_states = len(states)
+        if n_states == 0:
+            return []
+        state_size = states[0].size
+        if not all([st.size == state_size for st in states]):
+            raise ValueError("all states must have the same size")
+        # concatenate together all one-hot encoded states along the 'batch' dimension
+        nn_input = np.concatenate([self.preprocessor.state_to_tensor(s) for s in states], axis=0)
+        # pass all input through the network at once (backend makes use of batches if len(states) is large)
+        network_output = self.forward(nn_input)
+        # default move lists to all legal moves
+        moves_lists = moves_lists or [st.get_legal_moves() for st in states]
+        results = [None] * n_states
+        for i in range(n_states):
+            results[i] = self._select_moves_and_normalize(network_output[i], moves_lists[i], state_size)
+        return results
+
+    def eval_state(self, state, moves=None):
+        """Given a GameState object, returns a list of (action, probability) pairs
+        according to the network outputs
+
+        If a list of moves is specified, only those moves are kept in the distribution
+        """
+        tensor = self.preprocessor.state_to_tensor(state)
+        # run the tensor through the network
+        network_output = self.forward(tensor)
+        moves = moves or state.get_legal_moves()
+        return self._select_moves_and_normalize(network_output[0], moves, state.size)
+
+    @staticmethod
+    def create_network(**kwargs):
+        """construct a convolutional neural network.
+
+        Keword Arguments:
+        - input_dim:             depth of features to be processed by first layer (no default)
+        - board:                 width of the go board to be processed (default 19)
+        - filters_per_layer:     number of filters used on every layer (default 128)
+        - layers:                number of convolutional steps (default 12)
+        - filter_width_K:        (where K is between 1 and <layers>) width of filter on
+                                layer K (default 3 except 1st layer which defaults to 5).
+                                Must be odd.
+        """
+        defaults = {
+            "board": 19,
+            "filters_per_layer": 128,
+            "layers": 12,
+            "filter_width_1": 5
+        }
+        # copy defaults, but override with anything in kwargs
+        params = defaults
+        params.update(kwargs)
+
+        # create the network:
+        # a series of zero-paddings followed by convolutions
+        # such that the output dimensions are also board x board
+        network = Sequential()
+
+        # create first layer
+        network.add(convolutional.Convolution2D(
+            input_shape=(params["input_dim"], params["board"], params["board"]),
+            nb_filter=params["filters_per_layer"],
+            nb_row=params["filter_width_1"],
+            nb_col=params["filter_width_1"],
+            init='uniform',
+            activation='relu',
+            border_mode='same'))
+
+        # create all other layers
+        for i in range(2, params["layers"] + 1):
+            # use filter_width_K if it is there, otherwise use 3
+            filter_key = "filter_width_%d" % i
+            filter_width = params.get(filter_key, 3)
+            network.add(convolutional.Convolution2D(
+                nb_filter=params["filters_per_layer"],
+                nb_row=filter_width,
+                nb_col=filter_width,
+                init='uniform',
+                activation='relu',
+                border_mode='same'))
+
+        # the last layer maps each <filters_per_layer> feature to a number
+        network.add(convolutional.Convolution2D(
+            nb_filter=1,
+            nb_row=1,
+            nb_col=1,
+            init='uniform',
+            border_mode='same'))
+        # reshape output to be board x board
+        network.add(Flatten())
+        # add a bias to each board location
+        network.add(Bias())
+        # softmax makes it into a probability distribution
+        network.add(Activation('softmax'))
+
+        return network
 
 
 @neuralnet
 class ResnetPolicy(CNNPolicy):
-	"""Residual network architecture as per He at al. 2015
-	"""
-	@staticmethod
-	def create_network(**kwargs):
-		"""construct a convolutional neural network with Resnet-style skip connections.
-		Arguments are the same as with the default CNNPolicy network, except the default
-		number of layers is 20 plus a new n_skip parameter
-
-		Keword Arguments:
-		- input_dim:         	depth of features to be processed by first layer (no default)
-		- board:             	width of the go board to be processed (default 19)
-		- filters_per_layer: 	number of filters used on every layer (default 128)
-		- layers:            	number of convolutional steps (default 20)
-		- filter_width_K:    	(where K is between 1 and <layers>) width of filter on
-								layer K (default 3 except 1st layer which defaults to 5).
-								Must be odd.
-		- n_skip_K:             (where K is as in filter_width_K) number of convolutional
-								layers to skip with the linear path starting at K. Only valid
-								at K >= 1. (Each layer defaults to 1)
-
-		Note that n_skip_1=s means that the next valid value of n_skip_* is 3
-
-		A diagram may help explain (numbers indicate layer):
-
-			1             2              3                   4              5              6
-		I--C -- B -- R -- C -- B -- R -- C -- M -- B -- R -- C -- B -- R -- C -- B -- R -- C -- M  ...  M  -- R -- F -- O
-			\___________________________/ \____________________________________________________/ \ ... /
-					[n_skip_1 = 2]                             [n_skip_3 = 3]
-
-		I - input
-		B - BatchNormalization
-		R - ReLU
-		C - Conv2D
-		F - Flatten
-		O - output
-		M - merge
-
-		The input is always passed through a Conv2D layer, the output of which layer is counted as '1'.
-		Each subsequent [R -- C] block is counted as one 'layer'. The 'merge' layer isn't counted; hence
-		if n_skip_1 is 2, the next valid skip parameter is n_skip_3, which will start at the output
-		of the merge
-		"""
-		defaults = {
-			"board": 19,
-			"filters_per_layer": 128,
-			"layers": 20,
-			"filter_width_1": 5
-		}
-		# copy defaults, but override with anything in kwargs
-		params = defaults
-		params.update(kwargs)
-
-		# create the network using Keras' functional API,
-		# since this isn't 'Sequential'
-		model_input = Input(shape=(params["input_dim"], params["board"], params["board"]))
-
-		# create first layer
-		convolution_path = convolutional.Convolution2D(
-			input_shape=(),
-			nb_filter=params["filters_per_layer"],
-			nb_row=params["filter_width_1"],
-			nb_col=params["filter_width_1"],
-			init='uniform',
-			activation='linear',  # relu activations done inside resnet modules
-			border_mode='same')(model_input)
-
-		def add_resnet_unit(path, K, **params):
-			"""Add a resnet unit to path starting at layer 'K',
-			adding as many (ReLU + Conv2D) modules as specified by n_skip_K
-
-			Returns new path and next layer index, i.e. K + n_skip_K, in a tuple
-			"""
-			# loosely based on https://github.com/keunwoochoi/residual_block_keras
-			# (see also keras docs here: http://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers)
-
-			block_input = path
-			# use n_skip_K if it is there, default to 1
-			skip_key = "n_skip_%d" % K
-			n_skip = params.get(skip_key, 1)
-			for i in range(n_skip):
-				layer = K + i
-				# add BatchNorm
-				path = BatchNormalization()(path)
-				# add ReLU
-				path = Activation('relu')(path)
-				# use filter_width_K if it is there, otherwise use 3
-				filter_key = "filter_width_%d" % layer
-				filter_width = params.get(filter_key, 3)
-				# add Conv2D
-				path = convolutional.Convolution2D(
-					nb_filter=params["filters_per_layer"],
-					nb_row=filter_width,
-					nb_col=filter_width,
-					init='uniform',
-					activation='linear',
-					border_mode='same')(path)
-			# Merge 'input layer' with the path
-			path = merge([block_input, path], mode='sum')
-			return path, K + n_skip
-
-		# create all other layers
-		layer = 1
-		while layer < params['layers']:
-			convolution_path, layer = add_resnet_unit(convolution_path, layer, **params)
-		if layer > params['layers']:
-			print "Due to skipping, ended with {} layers instead of {}".format(layer, params['layers'])
-
-		# since each layer's activation was linear, need one more ReLu
-		convolution_path = Activation('relu')(convolution_path)
-
-		# the last layer maps each <filters_per_layer> featuer to a number
-		convolution_path = convolutional.Convolution2D(
-			nb_filter=1,
-			nb_row=1,
-			nb_col=1,
-			init='uniform',
-			border_mode='same')(convolution_path)
-		# flatten output
-		network_output = Flatten()(convolution_path)
-		# add a bias to each board location
-		network_output = Bias()(network_output)
-		# softmax makes it into a probability distribution
-		network_output = Activation('softmax')(network_output)
-
-		return Model(input=[model_input], output=[network_output])
+    """Residual network architecture as per He at al. 2015
+    """
+    @staticmethod
+    def create_network(**kwargs):
+        """construct a convolutional neural network with Resnet-style skip connections.
+        Arguments are the same as with the default CNNPolicy network, except the default
+        number of layers is 20 plus a new n_skip parameter
+
+        Keword Arguments:
+        - input_dim:             depth of features to be processed by first layer (no default)
+        - board:                 width of the go board to be processed (default 19)
+        - filters_per_layer:     number of filters used on every layer (default 128)
+        - layers:                number of convolutional steps (default 20)
+        - filter_width_K:        (where K is between 1 and <layers>) width of filter on
+                                layer K (default 3 except 1st layer which defaults to 5).
+                                Must be odd.
+        - n_skip_K:             (where K is as in filter_width_K) number of convolutional
+                                layers to skip with the linear path starting at K. Only valid
+                                at K >= 1. (Each layer defaults to 1)
+
+        Note that n_skip_1=s means that the next valid value of n_skip_* is 3
+
+        A diagram may help explain (numbers indicate layer):
+
+            1             2              3                   4              5              6
+        I--C -- B -- R -- C -- B -- R -- C -- M -- B -- R -- C -- B -- R -- C -- B -- R -- C -- M  ...  M  -- R -- F -- O
+            \___________________________/ \____________________________________________________/ \ ... /
+                    [n_skip_1 = 2]                             [n_skip_3 = 3]
+
+        I - input
+        B - BatchNormalization
+        R - ReLU
+        C - Conv2D
+        F - Flatten
+        O - output
+        M - merge
+
+        The input is always passed through a Conv2D layer, the output of which layer is counted as '1'.
+        Each subsequent [R -- C] block is counted as one 'layer'. The 'merge' layer isn't counted; hence
+        if n_skip_1 is 2, the next valid skip parameter is n_skip_3, which will start at the output
+        of the merge
+        """
+        defaults = {
+            "board": 19,
+            "filters_per_layer": 128,
+            "layers": 20,
+            "filter_width_1": 5
+        }
+        # copy defaults, but override with anything in kwargs
+        params = defaults
+        params.update(kwargs)
+
+        # create the network using Keras' functional API,
+        # since this isn't 'Sequential'
+        model_input = Input(shape=(params["input_dim"], params["board"], params["board"]))
+
+        # create first layer
+        convolution_path = convolutional.Convolution2D(
+            input_shape=(),
+            nb_filter=params["filters_per_layer"],
+            nb_row=params["filter_width_1"],
+            nb_col=params["filter_width_1"],
+            init='uniform',
+            activation='linear',  # relu activations done inside resnet modules
+            border_mode='same')(model_input)
+
+        def add_resnet_unit(path, K, **params):
+            """Add a resnet unit to path starting at layer 'K',
+            adding as many (ReLU + Conv2D) modules as specified by n_skip_K
+
+            Returns new path and next layer index, i.e. K + n_skip_K, in a tuple
+            """
+            # loosely based on https://github.com/keunwoochoi/residual_block_keras
+            # (see also keras docs here: http://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers)
+
+            block_input = path
+            # use n_skip_K if it is there, default to 1
+            skip_key = "n_skip_%d" % K
+            n_skip = params.get(skip_key, 1)
+            for i in range(n_skip):
+                layer = K + i
+                # add BatchNorm
+                path = BatchNormalization()(path)
+                # add ReLU
+                path = Activation('relu')(path)
+                # use filter_width_K if it is there, otherwise use 3
+                filter_key = "filter_width_%d" % layer
+                filter_width = params.get(filter_key, 3)
+                # add Conv2D
+                path = convolutional.Convolution2D(
+                    nb_filter=params["filters_per_layer"],
+                    nb_row=filter_width,
+                    nb_col=filter_width,
+                    init='uniform',
+                    activation='linear',
+                    border_mode='same')(path)
+            # Merge 'input layer' with the path
+            path = merge([block_input, path], mode='sum')
+            return path, K + n_skip
+
+        # create all other layers
+        layer = 1
+        while layer < params['layers']:
+            convolution_path, layer = add_resnet_unit(convolution_path, layer, **params)
+        if layer > params['layers']:
+            print "Due to skipping, ended with {} layers instead of {}".format(layer, params['layers'])
+
+        # since each layer's activation was linear, need one more ReLu
+        convolution_path = Activation('relu')(convolution_path)
+
+        # the last layer maps each <filters_per_layer> featuer to a number
+        convolution_path = convolutional.Convolution2D(
+            nb_filter=1,
+            nb_row=1,
+            nb_col=1,
+            init='uniform',
+            border_mode='same')(convolution_path)
+        # flatten output
+        network_output = Flatten()(convolution_path)
+        # add a bias to each board location
+        network_output = Bias()(network_output)
+        # softmax makes it into a probability distribution
+        network_output = Activation('softmax')(network_output)
+
+        return Model(input=[model_input], output=[network_output])
diff --git a/AlphaGo/preprocessing/game_converter.py b/AlphaGo/preprocessing/game_converter.py
index de606aabd..6324d254b 100644
--- a/AlphaGo/preprocessing/game_converter.py
+++ b/AlphaGo/preprocessing/game_converter.py
@@ -10,209 +10,209 @@
 
 
 class SizeMismatchError(Exception):
-	pass
+    pass
 
 
 class game_converter:
 
-	def __init__(self, features):
-		self.feature_processor = Preprocess(features)
-		self.n_features = self.feature_processor.output_dim
-
-	def convert_game(self, file_name, bd_size):
-		"""Read the given SGF file into an iterable of (input,output) pairs
-		for neural network training
-
-		Each input is a GameState converted into one-hot neural net features
-		Each output is an action as an (x,y) pair (passes are skipped)
-
-		If this game's size does not match bd_size, a SizeMismatchError is raised
-		"""
-
-		with open(file_name, 'r') as file_object:
-			state_action_iterator = sgf_iter_states(file_object.read(), include_end=False)
-
-		for (state, move, player) in state_action_iterator:
-			if state.size != bd_size:
-				raise SizeMismatchError()
-			if move != go.PASS_MOVE:
-				nn_input = self.feature_processor.state_to_tensor(state)
-				yield (nn_input, move)
-
-	def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False):
-		"""Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file
-
-		Arguments:
-		- sgf_files : an iterable of relative or absolute paths to SGF files
-		- hdf5_file : the name of the HDF5 where features will be saved
-		- bd_size : side length of board of games that are loaded
-		- ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note
-			that sgf.ParseException and go.IllegalMove exceptions are always skipped
-
-		The resulting file has the following properties:
-			states  : dataset with shape (n_data, n_features, board width, board height)
-			actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played)
-			file_offsets : group mapping from filenames to tuples of (index, length)
-
-		For example, to find what positions in the dataset come from 'test.sgf':
-			index, length = file_offsets['test.sgf']
-			test_states = states[index:index+length]
-			test_actions = actions[index:index+length]
-		"""
-		# TODO - also save feature list
-
-		# make a hidden temporary file in case of a crash.
-		# on success, this is renamed to hdf5_file
-		tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file))
-		h5f = h5.File(tmp_file, 'w')
-
-		try:
-			# see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset
-			states = h5f.require_dataset(
-				'states',
-				dtype=np.uint8,
-				shape=(1, self.n_features, bd_size, bd_size),
-				maxshape=(None, self.n_features, bd_size, bd_size),  # 'None' dimension allows it to grow arbitrarily
-				exact=False,                                         # allow non-uint8 datasets to be loaded, coerced to uint8
-				chunks=(64, self.n_features, bd_size, bd_size),      # approximately 1MB chunks
-				compression="lzf")
-			actions = h5f.require_dataset(
-				'actions',
-				dtype=np.uint8,
-				shape=(1, 2),
-				maxshape=(None, 2),
-				exact=False,
-				chunks=(1024, 2),
-				compression="lzf")
-			# 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast
-			file_offsets = h5f.require_group('file_offsets')
-
-			if verbose:
-				print("created HDF5 dataset in {}".format(tmp_file))
-
-			next_idx = 0
-			for file_name in sgf_files:
-				if verbose:
-					print(file_name)
-				# count number of state/action pairs yielded by this game
-				n_pairs = 0
-				file_start_idx = next_idx
-				try:
-					for state, move in self.convert_game(file_name, bd_size):
-						if next_idx >= len(states):
-							states.resize((next_idx + 1, self.n_features, bd_size, bd_size))
-							actions.resize((next_idx + 1, 2))
-						states[next_idx] = state
-						actions[next_idx] = move
-						n_pairs += 1
-						next_idx += 1
-				except go.IllegalMove:
-					warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name)
-				except sgf.ParseException:
-					warnings.warn("Could not parse %s\n\tdropping game" % file_name)
-				except SizeMismatchError:
-					warnings.warn("Skipping %s; wrong board size" % file_name)
-				except Exception as e:
-					# catch everything else
-					if ignore_errors:
-						warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2)
-					else:
-						raise e
-				finally:
-					if n_pairs > 0:
-						# '/' has special meaning in HDF5 key names, so they are replaced with ':' here
-						file_name_key = file_name.replace('/', ':')
-						file_offsets[file_name_key] = [file_start_idx, n_pairs]
-						if verbose:
-							print("\t%d state/action pairs extracted" % n_pairs)
-					elif verbose:
-						print("\t-no usable data-")
-		except Exception as e:
-			print("sgfs_to_hdf5 failed")
-			os.remove(tmp_file)
-			raise e
-
-		if verbose:
-			print("finished. renaming %s to %s" % (tmp_file, hdf5_file))
-
-		# processing complete; rename tmp_file to hdf5_file
-		h5f.close()
-		os.rename(tmp_file, hdf5_file)
+    def __init__(self, features):
+        self.feature_processor = Preprocess(features)
+        self.n_features = self.feature_processor.output_dim
+
+    def convert_game(self, file_name, bd_size):
+        """Read the given SGF file into an iterable of (input,output) pairs
+        for neural network training
+
+        Each input is a GameState converted into one-hot neural net features
+        Each output is an action as an (x,y) pair (passes are skipped)
+
+        If this game's size does not match bd_size, a SizeMismatchError is raised
+        """
+
+        with open(file_name, 'r') as file_object:
+            state_action_iterator = sgf_iter_states(file_object.read(), include_end=False)
+
+        for (state, move, player) in state_action_iterator:
+            if state.size != bd_size:
+                raise SizeMismatchError()
+            if move != go.PASS_MOVE:
+                nn_input = self.feature_processor.state_to_tensor(state)
+                yield (nn_input, move)
+
+    def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False):
+        """Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file
+
+        Arguments:
+        - sgf_files : an iterable of relative or absolute paths to SGF files
+        - hdf5_file : the name of the HDF5 where features will be saved
+        - bd_size : side length of board of games that are loaded
+        - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note
+            that sgf.ParseException and go.IllegalMove exceptions are always skipped
+
+        The resulting file has the following properties:
+            states  : dataset with shape (n_data, n_features, board width, board height)
+            actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played)
+            file_offsets : group mapping from filenames to tuples of (index, length)
+
+        For example, to find what positions in the dataset come from 'test.sgf':
+            index, length = file_offsets['test.sgf']
+            test_states = states[index:index+length]
+            test_actions = actions[index:index+length]
+        """
+        # TODO - also save feature list
+
+        # make a hidden temporary file in case of a crash.
+        # on success, this is renamed to hdf5_file
+        tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file))
+        h5f = h5.File(tmp_file, 'w')
+
+        try:
+            # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset
+            states = h5f.require_dataset(
+                'states',
+                dtype=np.uint8,
+                shape=(1, self.n_features, bd_size, bd_size),
+                maxshape=(None, self.n_features, bd_size, bd_size),  # 'None' dimension allows it to grow arbitrarily
+                exact=False,                                         # allow non-uint8 datasets to be loaded, coerced to uint8
+                chunks=(64, self.n_features, bd_size, bd_size),      # approximately 1MB chunks
+                compression="lzf")
+            actions = h5f.require_dataset(
+                'actions',
+                dtype=np.uint8,
+                shape=(1, 2),
+                maxshape=(None, 2),
+                exact=False,
+                chunks=(1024, 2),
+                compression="lzf")
+            # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast
+            file_offsets = h5f.require_group('file_offsets')
+
+            if verbose:
+                print("created HDF5 dataset in {}".format(tmp_file))
+
+            next_idx = 0
+            for file_name in sgf_files:
+                if verbose:
+                    print(file_name)
+                # count number of state/action pairs yielded by this game
+                n_pairs = 0
+                file_start_idx = next_idx
+                try:
+                    for state, move in self.convert_game(file_name, bd_size):
+                        if next_idx >= len(states):
+                            states.resize((next_idx + 1, self.n_features, bd_size, bd_size))
+                            actions.resize((next_idx + 1, 2))
+                        states[next_idx] = state
+                        actions[next_idx] = move
+                        n_pairs += 1
+                        next_idx += 1
+                except go.IllegalMove:
+                    warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name)
+                except sgf.ParseException:
+                    warnings.warn("Could not parse %s\n\tdropping game" % file_name)
+                except SizeMismatchError:
+                    warnings.warn("Skipping %s; wrong board size" % file_name)
+                except Exception as e:
+                    # catch everything else
+                    if ignore_errors:
+                        warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2)
+                    else:
+                        raise e
+                finally:
+                    if n_pairs > 0:
+                        # '/' has special meaning in HDF5 key names, so they are replaced with ':' here
+                        file_name_key = file_name.replace('/', ':')
+                        file_offsets[file_name_key] = [file_start_idx, n_pairs]
+                        if verbose:
+                            print("\t%d state/action pairs extracted" % n_pairs)
+                    elif verbose:
+                        print("\t-no usable data-")
+        except Exception as e:
+            print("sgfs_to_hdf5 failed")
+            os.remove(tmp_file)
+            raise e
+
+        if verbose:
+            print("finished. renaming %s to %s" % (tmp_file, hdf5_file))
+
+        # processing complete; rename tmp_file to hdf5_file
+        h5f.close()
+        os.rename(tmp_file, hdf5_file)
 
 
 def run_game_converter(cmd_line_args=None):
-	"""Run conversions. command-line args may be passed in as a list
-	"""
-	import argparse
-	import sys
-
-	parser = argparse.ArgumentParser(
-		description='Prepare SGF Go game files for training the neural network model.',
-		epilog="Available features are: board, ones, turns_since, liberties,\
-		capture_size, self_atari_size, liberties_after, sensibleness, and zeros.\
-		Ladder features are not currently implemented")
-	parser.add_argument("--features", "-f", help="Comma-separated list of features to compute and store or 'all'", default='all')
-	parser.add_argument("--outfile", "-o", help="Destination to write data (hdf5 file)", required=True)
-	parser.add_argument("--recurse", "-R", help="Set to recurse through directories searching for SGF files", default=False, action="store_true")
-	parser.add_argument("--directory", "-d", help="Directory containing SGF files to process. if not present, expects files from stdin", default=None)
-	parser.add_argument("--size", "-s", help="Size of the game board. SGFs not matching this are discarded with a warning", type=int, default=19)
-	parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
-
-	if cmd_line_args is None:
-		args = parser.parse_args()
-	else:
-		args = parser.parse_args(cmd_line_args)
-
-	if args.features.lower() == 'all':
-		feature_list = [
-			"board",
-			"ones",
-			"turns_since",
-			"liberties",
-			"capture_size",
-			"self_atari_size",
-			"liberties_after",
-			# "ladder_capture",
-			# "ladder_escape",
-			"sensibleness",
-			"zeros"]
-	else:
-		feature_list = args.features.split(",")
-
-	if args.verbose:
-		print("using features", feature_list)
-
-	converter = game_converter(feature_list)
-
-	def _is_sgf(fname):
-		return fname.strip()[-4:] == ".sgf"
-
-	def _walk_all_sgfs(root):
-		"""a helper function/generator to get all SGF files in subdirectories of root
-		"""
-		for (dirpath, dirname, files) in os.walk(root):
-			for filename in files:
-				if _is_sgf(filename):
-					# yield the full (relative) path to the file
-					yield os.path.join(dirpath, filename)
-
-	def _list_sgfs(path):
-		"""helper function to get all SGF files in a directory (does not recurse)
-		"""
-		files = os.listdir(path)
-		return (os.path.join(path, f) for f in files if _is_sgf(f))
-
-	# get an iterator of SGF files according to command line args
-	if args.directory:
-		if args.recurse:
-			files = _walk_all_sgfs(args.directory)
-		else:
-			files = _list_sgfs(args.directory)
-	else:
-		files = (f.strip() for f in sys.stdin if _is_sgf(f))
-
-	converter.sgfs_to_hdf5(files, args.outfile, bd_size=args.size, verbose=args.verbose)
+    """Run conversions. command-line args may be passed in as a list
+    """
+    import argparse
+    import sys
+
+    parser = argparse.ArgumentParser(
+        description='Prepare SGF Go game files for training the neural network model.',
+        epilog="Available features are: board, ones, turns_since, liberties,\
+        capture_size, self_atari_size, liberties_after, sensibleness, and zeros.\
+        Ladder features are not currently implemented")
+    parser.add_argument("--features", "-f", help="Comma-separated list of features to compute and store or 'all'", default='all')
+    parser.add_argument("--outfile", "-o", help="Destination to write data (hdf5 file)", required=True)
+    parser.add_argument("--recurse", "-R", help="Set to recurse through directories searching for SGF files", default=False, action="store_true")
+    parser.add_argument("--directory", "-d", help="Directory containing SGF files to process. if not present, expects files from stdin", default=None)
+    parser.add_argument("--size", "-s", help="Size of the game board. SGFs not matching this are discarded with a warning", type=int, default=19)
+    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
+
+    if cmd_line_args is None:
+        args = parser.parse_args()
+    else:
+        args = parser.parse_args(cmd_line_args)
+
+    if args.features.lower() == 'all':
+        feature_list = [
+            "board",
+            "ones",
+            "turns_since",
+            "liberties",
+            "capture_size",
+            "self_atari_size",
+            "liberties_after",
+            # "ladder_capture",
+            # "ladder_escape",
+            "sensibleness",
+            "zeros"]
+    else:
+        feature_list = args.features.split(",")
+
+    if args.verbose:
+        print("using features", feature_list)
+
+    converter = game_converter(feature_list)
+
+    def _is_sgf(fname):
+        return fname.strip()[-4:] == ".sgf"
+
+    def _walk_all_sgfs(root):
+        """a helper function/generator to get all SGF files in subdirectories of root
+        """
+        for (dirpath, dirname, files) in os.walk(root):
+            for filename in files:
+                if _is_sgf(filename):
+                    # yield the full (relative) path to the file
+                    yield os.path.join(dirpath, filename)
+
+    def _list_sgfs(path):
+        """helper function to get all SGF files in a directory (does not recurse)
+        """
+        files = os.listdir(path)
+        return (os.path.join(path, f) for f in files if _is_sgf(f))
+
+    # get an iterator of SGF files according to command line args
+    if args.directory:
+        if args.recurse:
+            files = _walk_all_sgfs(args.directory)
+        else:
+            files = _list_sgfs(args.directory)
+    else:
+        files = (f.strip() for f in sys.stdin if _is_sgf(f))
+
+    converter.sgfs_to_hdf5(files, args.outfile, bd_size=args.size, verbose=args.verbose)
 
 
 if __name__ == '__main__':
-	run_game_converter()
+    run_game_converter()
diff --git a/AlphaGo/preprocessing/preprocessing.py b/AlphaGo/preprocessing/preprocessing.py
index 60732e3f5..12f9269ea 100644
--- a/AlphaGo/preprocessing/preprocessing.py
+++ b/AlphaGo/preprocessing/preprocessing.py
@@ -7,267 +7,267 @@
 
 
 def get_board(state):
-	"""A feature encoding WHITE BLACK and EMPTY on separate planes, but plane 0
-	always refers to the current player and plane 1 to the opponent
-	"""
-	planes = np.zeros((3, state.size, state.size))
-	planes[0, :, :] = state.board == state.current_player  # own stone
-	planes[1, :, :] = state.board == -state.current_player  # opponent stone
-	planes[2, :, :] = state.board == go.EMPTY  # empty space
-	return planes
+    """A feature encoding WHITE BLACK and EMPTY on separate planes, but plane 0
+    always refers to the current player and plane 1 to the opponent
+    """
+    planes = np.zeros((3, state.size, state.size))
+    planes[0, :, :] = state.board == state.current_player  # own stone
+    planes[1, :, :] = state.board == -state.current_player  # opponent stone
+    planes[2, :, :] = state.board == go.EMPTY  # empty space
+    return planes
 
 
 def get_turns_since(state, maximum=8):
-	"""A feature encoding the age of the stone at each location up to 'maximum'
+    """A feature encoding the age of the stone at each location up to 'maximum'
 
-	Note:
-	- the [maximum-1] plane is used for any stone with age greater than or equal to maximum
-	- EMPTY locations are all-zero features
-	"""
-	planes = np.zeros((maximum, state.size, state.size))
-	for x in range(state.size):
-		for y in range(state.size):
-			if state.stone_ages[x][y] >= 0:
-				planes[min(state.stone_ages[x][y], maximum - 1), x, y] = 1
-	return planes
+    Note:
+    - the [maximum-1] plane is used for any stone with age greater than or equal to maximum
+    - EMPTY locations are all-zero features
+    """
+    planes = np.zeros((maximum, state.size, state.size))
+    for x in range(state.size):
+        for y in range(state.size):
+            if state.stone_ages[x][y] >= 0:
+                planes[min(state.stone_ages[x][y], maximum - 1), x, y] = 1
+    return planes
 
 
 def get_liberties(state, maximum=8):
-	"""A feature encoding the number of liberties of the group connected to the stone at
-	each location
-
-	Note:
-	- there is no zero-liberties plane; the 0th plane indicates groups in atari
-	- the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
-	- EMPTY locations are all-zero features
-	"""
-	planes = np.zeros((maximum, state.size, state.size))
-	for i in range(maximum):
-		# single liberties in plane zero (groups won't have zero), double liberties in plane one, etc
-		planes[i, state.liberty_counts == i + 1] = 1
-	# the "maximum-or-more" case on the backmost plane
-	planes[maximum - 1, state.liberty_counts >= maximum] = 1
-	return planes
+    """A feature encoding the number of liberties of the group connected to the stone at
+    each location
+
+    Note:
+    - there is no zero-liberties plane; the 0th plane indicates groups in atari
+    - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
+    - EMPTY locations are all-zero features
+    """
+    planes = np.zeros((maximum, state.size, state.size))
+    for i in range(maximum):
+        # single liberties in plane zero (groups won't have zero), double liberties in plane one, etc
+        planes[i, state.liberty_counts == i + 1] = 1
+    # the "maximum-or-more" case on the backmost plane
+    planes[maximum - 1, state.liberty_counts >= maximum] = 1
+    return planes
 
 
 def get_capture_size(state, maximum=8):
-	"""A feature encoding the number of opponent stones that would be captured by playing at each location,
-	up to 'maximum'
-
-	Note:
-	- we currently *do* treat the 0th plane as "capturing zero stones"
-	- the [maximum-1] plane is used for any capturable group of size greater than or equal to maximum-1
-	- the 0th plane is used for legal moves that would not result in capture
-	- illegal move locations are all-zero features
-	"""
-	planes = np.zeros((maximum, state.size, state.size))
-	for (x, y) in state.get_legal_moves():
-		# multiple disconnected groups may be captured. hence we loop over
-		# groups and count sizes if captured.
-		n_captured = 0
-		for neighbor_group in state.get_groups_around((x, y)):
-			# if the neighboring group is opponent stones and they have
-			# one liberty, it must be (x,y) and we are capturing them
-			# (note suicide and ko are not an issue because they are not
-			# legal moves)
-			(gx, gy) = next(iter(neighbor_group))
-			if (state.liberty_counts[gx][gy] == 1) and (state.board[gx, gy] != state.current_player):
-				n_captured += len(state.group_sets[gx][gy])
-		planes[min(n_captured, maximum - 1), x, y] = 1
-	return planes
+    """A feature encoding the number of opponent stones that would be captured by playing at each location,
+    up to 'maximum'
+
+    Note:
+    - we currently *do* treat the 0th plane as "capturing zero stones"
+    - the [maximum-1] plane is used for any capturable group of size greater than or equal to maximum-1
+    - the 0th plane is used for legal moves that would not result in capture
+    - illegal move locations are all-zero features
+    """
+    planes = np.zeros((maximum, state.size, state.size))
+    for (x, y) in state.get_legal_moves():
+        # multiple disconnected groups may be captured. hence we loop over
+        # groups and count sizes if captured.
+        n_captured = 0
+        for neighbor_group in state.get_groups_around((x, y)):
+            # if the neighboring group is opponent stones and they have
+            # one liberty, it must be (x,y) and we are capturing them
+            # (note suicide and ko are not an issue because they are not
+            # legal moves)
+            (gx, gy) = next(iter(neighbor_group))
+            if (state.liberty_counts[gx][gy] == 1) and (state.board[gx, gy] != state.current_player):
+                n_captured += len(state.group_sets[gx][gy])
+        planes[min(n_captured, maximum - 1), x, y] = 1
+    return planes
 
 
 def get_self_atari_size(state, maximum=8):
-	"""A feature encoding the size of the own-stone group that is put into atari by playing at a location
-	"""
-	planes = np.zeros((maximum, state.size, state.size))
-
-	for (x, y) in state.get_legal_moves():
-		# make a copy of the liberty/group sets at (x,y) so we can manipulate them
-		lib_set_after = set(state.liberty_sets[x][y])
-		group_set_after = set()
-		group_set_after.add((x, y))
-		captured_stones = set()
-		for neighbor_group in state.get_groups_around((x, y)):
-			# if the neighboring group is of the same color as the current player
-			# then playing here will connect this stone to that group
-			(gx, gy) = next(iter(neighbor_group))
-			if state.board[gx, gy] == state.current_player:
-				lib_set_after |= state.liberty_sets[gx][gy]
-				group_set_after |= state.group_sets[gx][gy]
-			# if instead neighboring group is opponent *and about to be captured*
-			# then we might gain new liberties
-			elif state.liberty_counts[gx][gy] == 1:
-				captured_stones |= state.group_sets[gx][gy]
-		# add captured stones to liberties if they are neighboring the 'group_set_after'
-		# i.e. if they will become liberties once capture is resolved
-		if len(captured_stones) > 0:
-			for (gx, gy) in group_set_after:
-				# intersection of group's neighbors and captured stones will become liberties
-				lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones
-		if (x, y) in lib_set_after:
-			lib_set_after.remove((x, y))
-		# check if this move resulted in atari
-		if len(lib_set_after) == 1:
-			group_size = len(group_set_after)
-			# 0th plane used for size=1, so group_size-1 is the index
-			planes[min(group_size - 1, maximum - 1), x, y] = 1
-	return planes
+    """A feature encoding the size of the own-stone group that is put into atari by playing at a location
+    """
+    planes = np.zeros((maximum, state.size, state.size))
+
+    for (x, y) in state.get_legal_moves():
+        # make a copy of the liberty/group sets at (x,y) so we can manipulate them
+        lib_set_after = set(state.liberty_sets[x][y])
+        group_set_after = set()
+        group_set_after.add((x, y))
+        captured_stones = set()
+        for neighbor_group in state.get_groups_around((x, y)):
+            # if the neighboring group is of the same color as the current player
+            # then playing here will connect this stone to that group
+            (gx, gy) = next(iter(neighbor_group))
+            if state.board[gx, gy] == state.current_player:
+                lib_set_after |= state.liberty_sets[gx][gy]
+                group_set_after |= state.group_sets[gx][gy]
+            # if instead neighboring group is opponent *and about to be captured*
+            # then we might gain new liberties
+            elif state.liberty_counts[gx][gy] == 1:
+                captured_stones |= state.group_sets[gx][gy]
+        # add captured stones to liberties if they are neighboring the 'group_set_after'
+        # i.e. if they will become liberties once capture is resolved
+        if len(captured_stones) > 0:
+            for (gx, gy) in group_set_after:
+                # intersection of group's neighbors and captured stones will become liberties
+                lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones
+        if (x, y) in lib_set_after:
+            lib_set_after.remove((x, y))
+        # check if this move resulted in atari
+        if len(lib_set_after) == 1:
+            group_size = len(group_set_after)
+            # 0th plane used for size=1, so group_size-1 is the index
+            planes[min(group_size - 1, maximum - 1), x, y] = 1
+    return planes
 
 
 def get_liberties_after(state, maximum=8):
-	"""A feature encoding what the number of liberties *would be* of the group connected to
-	the stone *if* played at a location
-
-	Note:
-	- there is no zero-liberties plane; the 0th plane indicates groups in atari
-	- the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
-	- illegal move locations are all-zero features
-	"""
-	planes = np.zeros((maximum, state.size, state.size))
-	# note - left as all zeros if not a legal move
-	for (x, y) in state.get_legal_moves():
-		# make a copy of the set of liberties at (x,y) so we can add to it
-		lib_set_after = set(state.liberty_sets[x][y])
-		group_set_after = set()
-		group_set_after.add((x, y))
-		captured_stones = set()
-		for neighbor_group in state.get_groups_around((x, y)):
-			# if the neighboring group is of the same color as the current player
-			# then playing here will connect this stone to that group and
-			# therefore add in all that group's liberties
-			(gx, gy) = next(iter(neighbor_group))
-			if state.board[gx, gy] == state.current_player:
-				lib_set_after |= state.liberty_sets[gx][gy]
-				group_set_after |= state.group_sets[gx][gy]
-			# if instead neighboring group is opponent *and about to be captured*
-			# then we might gain new liberties
-			elif state.liberty_counts[gx][gy] == 1:
-				captured_stones |= state.group_sets[gx][gy]
-		# add captured stones to liberties if they are neighboring the 'group_set_after'
-		# i.e. if they will become liberties once capture is resolved
-		if len(captured_stones) > 0:
-			for (gx, gy) in group_set_after:
-				# intersection of group's neighbors and captured stones will become liberties
-				lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones
-		# (x,y) itself may have made its way back in, but shouldn't count
-		# since it's clearly not a liberty after playing there
-		if (x, y) in lib_set_after:
-			lib_set_after.remove((x, y))
-		planes[min(maximum - 1, len(lib_set_after) - 1), x, y] = 1
-	return planes
+    """A feature encoding what the number of liberties *would be* of the group connected to
+    the stone *if* played at a location
+
+    Note:
+    - there is no zero-liberties plane; the 0th plane indicates groups in atari
+    - the [maximum-1] plane is used for any stone with liberties greater than or equal to maximum
+    - illegal move locations are all-zero features
+    """
+    planes = np.zeros((maximum, state.size, state.size))
+    # note - left as all zeros if not a legal move
+    for (x, y) in state.get_legal_moves():
+        # make a copy of the set of liberties at (x,y) so we can add to it
+        lib_set_after = set(state.liberty_sets[x][y])
+        group_set_after = set()
+        group_set_after.add((x, y))
+        captured_stones = set()
+        for neighbor_group in state.get_groups_around((x, y)):
+            # if the neighboring group is of the same color as the current player
+            # then playing here will connect this stone to that group and
+            # therefore add in all that group's liberties
+            (gx, gy) = next(iter(neighbor_group))
+            if state.board[gx, gy] == state.current_player:
+                lib_set_after |= state.liberty_sets[gx][gy]
+                group_set_after |= state.group_sets[gx][gy]
+            # if instead neighboring group is opponent *and about to be captured*
+            # then we might gain new liberties
+            elif state.liberty_counts[gx][gy] == 1:
+                captured_stones |= state.group_sets[gx][gy]
+        # add captured stones to liberties if they are neighboring the 'group_set_after'
+        # i.e. if they will become liberties once capture is resolved
+        if len(captured_stones) > 0:
+            for (gx, gy) in group_set_after:
+                # intersection of group's neighbors and captured stones will become liberties
+                lib_set_after |= set(state._neighbors((gx, gy))) & captured_stones
+        # (x,y) itself may have made its way back in, but shouldn't count
+        # since it's clearly not a liberty after playing there
+        if (x, y) in lib_set_after:
+            lib_set_after.remove((x, y))
+        planes[min(maximum - 1, len(lib_set_after) - 1), x, y] = 1
+    return planes
 
 
 def get_ladder_capture(state):
-	raise NotImplementedError()
+    raise NotImplementedError()
 
 
 def get_ladder_escape(state):
-	raise NotImplementedError()
+    raise NotImplementedError()
 
 
 def get_sensibleness(state):
-	"""A move is 'sensible' if it is legal and if it does not fill the current_player's own eye
-	"""
-	feature = np.zeros((1, state.size, state.size))
-	for (x, y) in state.get_legal_moves(include_eyes=False):
-		feature[0, x, y] = 1
-	return feature
+    """A move is 'sensible' if it is legal and if it does not fill the current_player's own eye
+    """
+    feature = np.zeros((1, state.size, state.size))
+    for (x, y) in state.get_legal_moves(include_eyes=False):
+        feature[0, x, y] = 1
+    return feature
 
 
 def get_legal(state):
-	"""Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done
-	"""
-	feature = np.zeros((1, state.size, state.size))
-	for (x, y) in state.get_legal_moves():
-		feature[0, x, y] = 1
-	return feature
+    """Zero at all illegal moves, one at all legal moves. Unlike sensibleness, no eye check is done
+    """
+    feature = np.zeros((1, state.size, state.size))
+    for (x, y) in state.get_legal_moves():
+        feature[0, x, y] = 1
+    return feature
 
 
 # named features and their sizes are defined here
 FEATURES = {
-	"board": {
-		"size": 3,
-		"function": get_board
-	},
-	"ones": {
-		"size": 1,
-		"function": lambda state: np.ones((1, state.size, state.size))
-	},
-	"turns_since": {
-		"size": 8,
-		"function": get_turns_since
-	},
-	"liberties": {
-		"size": 8,
-		"function": get_liberties
-	},
-	"capture_size": {
-		"size": 8,
-		"function": get_capture_size
-	},
-	"self_atari_size": {
-		"size": 8,
-		"function": get_self_atari_size
-	},
-	"liberties_after": {
-		"size": 8,
-		"function": get_liberties_after
-	},
-	"ladder_capture": {
-		"size": 1,
-		"function": get_ladder_capture
-	},
-	"ladder_escape": {
-		"size": 1,
-		"function": get_ladder_escape
-	},
-	"sensibleness": {
-		"size": 1,
-		"function": get_sensibleness
-	},
-	"zeros": {
-		"size": 1,
-		"function": lambda state: np.zeros((1, state.size, state.size))
-	},
-	"legal": {
-		"size": 1,
-		"function": get_legal
-	}
+    "board": {
+        "size": 3,
+        "function": get_board
+    },
+    "ones": {
+        "size": 1,
+        "function": lambda state: np.ones((1, state.size, state.size))
+    },
+    "turns_since": {
+        "size": 8,
+        "function": get_turns_since
+    },
+    "liberties": {
+        "size": 8,
+        "function": get_liberties
+    },
+    "capture_size": {
+        "size": 8,
+        "function": get_capture_size
+    },
+    "self_atari_size": {
+        "size": 8,
+        "function": get_self_atari_size
+    },
+    "liberties_after": {
+        "size": 8,
+        "function": get_liberties_after
+    },
+    "ladder_capture": {
+        "size": 1,
+        "function": get_ladder_capture
+    },
+    "ladder_escape": {
+        "size": 1,
+        "function": get_ladder_escape
+    },
+    "sensibleness": {
+        "size": 1,
+        "function": get_sensibleness
+    },
+    "zeros": {
+        "size": 1,
+        "function": lambda state: np.zeros((1, state.size, state.size))
+    },
+    "legal": {
+        "size": 1,
+        "function": get_legal
+    }
 }
 
 DEFAULT_FEATURES = [
-	"board", "ones", "turns_since", "liberties", "capture_size",
-	"self_atari_size", "liberties_after", "ladder_capture", "ladder_escape",
-	"sensibleness", "zeros"]
+    "board", "ones", "turns_since", "liberties", "capture_size",
+    "self_atari_size", "liberties_after", "ladder_capture", "ladder_escape",
+    "sensibleness", "zeros"]
 
 
 class Preprocess(object):
-	"""a class to convert from AlphaGo GameState objects to tensors of one-hot
-	features for NN inputs
-	"""
-
-	def __init__(self, feature_list=DEFAULT_FEATURES):
-		"""create a preprocessor object that will concatenate together the
-		given list of features
-		"""
-
-		self.output_dim = 0
-		self.feature_list = feature_list
-		self.processors = [None] * len(feature_list)
-		for i in range(len(feature_list)):
-			feat = feature_list[i].lower()
-			if feat in FEATURES:
-				self.processors[i] = FEATURES[feat]["function"]
-				self.output_dim += FEATURES[feat]["size"]
-			else:
-				raise ValueError("uknown feature: %s" % feat)
-
-	def state_to_tensor(self, state):
-		"""Convert a GameState to a Theano-compatible tensor
-		"""
-		feat_tensors = [proc(state) for proc in self.processors]
-
-		# concatenate along feature dimension then add in a singleton 'batch' dimension
-		f, s = self.output_dim, state.size
-		return np.concatenate(feat_tensors).reshape((1, f, s, s))
+    """a class to convert from AlphaGo GameState objects to tensors of one-hot
+    features for NN inputs
+    """
+
+    def __init__(self, feature_list=DEFAULT_FEATURES):
+        """create a preprocessor object that will concatenate together the
+        given list of features
+        """
+
+        self.output_dim = 0
+        self.feature_list = feature_list
+        self.processors = [None] * len(feature_list)
+        for i in range(len(feature_list)):
+            feat = feature_list[i].lower()
+            if feat in FEATURES:
+                self.processors[i] = FEATURES[feat]["function"]
+                self.output_dim += FEATURES[feat]["size"]
+            else:
+                raise ValueError("uknown feature: %s" % feat)
+
+    def state_to_tensor(self, state):
+        """Convert a GameState to a Theano-compatible tensor
+        """
+        feat_tensors = [proc(state) for proc in self.processors]
+
+        # concatenate along feature dimension then add in a singleton 'batch' dimension
+        f, s = self.output_dim, state.size
+        return np.concatenate(feat_tensors).reshape((1, f, s, s))
diff --git a/AlphaGo/training/reinforcement_policy_trainer.py b/AlphaGo/training/reinforcement_policy_trainer.py
index b4ab67771..8bf36cc94 100644
--- a/AlphaGo/training/reinforcement_policy_trainer.py
+++ b/AlphaGo/training/reinforcement_policy_trainer.py
@@ -12,277 +12,277 @@
 
 
 class BatchedReinforcementLearningSGD(Optimizer):
-	'''A Keras Optimizer that sums gradients together for each game, applying them only once the
-	winner is known.
-
-	It is the responsibility of the calling code to call set_current_game() before each example to
-	tell the optimizer for which game gradients should be accumulated, and to call set_result() to
-	tell the optimizer what the sign of the gradient for each game should be and when all games are
-	over.
-
-	Arguments
-		lr: float >= 0. Learning rate.
-		ng: int > 0. Number of games played in parallel. Each one has its own cumulative gradient.
-	'''
-	def __init__(self, lr=0.01, ng=20, **kwargs):
-		super(BatchedReinforcementLearningSGD, self).__init__(**kwargs)
-		self.__dict__.update(locals())
-		self.lr = K.variable(lr)
-		self.cumulative_gradients = []
-		self.num_games = ng
-		self.game_idx = K.variable(0)  # which gradient to accumulate in the next batch.
-		self.gradient_sign = [K.variable(0) for _ in range(ng)]
-		self.running_games = K.variable(self.num_games)
-
-	def set_current_game(self, game_idx):
-		K.set_value(self.game_idx, game_idx)
-
-	def set_result(self, game_idx, won_game):
-		'''Mark the outcome of the game at index game_idx. Once all games are complete, updates
-		are automatically triggered in the next call to a keras fit function.
-		'''
-		K.set_value(self.gradient_sign[game_idx], +1 if won_game else -1)
-		# Note: using '-= 1' would create a new variable, which would invalidate the dependencies
-		# in get_updates().
-		K.set_value(self.running_games, K.get_value(self.running_games) - 1)
-
-	def get_updates(self, params, constraints, loss):
-		# Note: get_updates is called *once* by keras. Its job is to return a set of 'update
-		# operations' to any K.variable (e.g. model weights or self.num_games). Updates are applied
-		# whenever Keras' train_function is evaluated, i.e. in every batch. Model.fit_on_batch()
-		# will trigger exactly one update. All updates use the 'old' value of parameters - there is
-		# no dependency on the order of the list of updates.
-		self.updates = []
-		# Get expressions for gradients of model parameters.
-		grads = self.get_gradients(loss, params)
-		# Create a set of accumulated gradients, one for each game.
-		shapes = [K.get_variable_shape(p) for p in params]
-		self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] for _ in range(self.num_games)]
-
-		def conditional_update(cond, variable, new_value):
-			'''Helper function to create updates that only happen when cond is True. Writes to
-			self.updates and returns the new variable.
-
-			Note: K.update(x, x) is cheap, but K.update_add(x, K.zeros_like(x)) can be expensive.
-			'''
-			maybe_new_value = K.switch(cond, new_value, variable)
-			self.updates.append(K.update(variable, maybe_new_value))
-			return maybe_new_value
-
-		# Update cumulative gradient at index game_idx. This is done by returning an update for all
-		# gradients that is a no-op everywhere except for the game_idx'th one. When game_idx is
-		# changed by a call to set_current_game(), it will change the gradient that is getting
-		# accumulated.
-		# new_cumulative_gradients keeps references to the updated variables for use below in
-		# updating parameters with the freshly-accumulated gradients.
-		new_cumulative_gradients = [[None] * len(cgs) for cgs in self.cumulative_gradients]
-		for i, cgs in enumerate(self.cumulative_gradients):
-			for j, (g, cg) in enumerate(zip(grads, cgs)):
-				new_gradient = conditional_update(K.equal(self.game_idx, i), cg, cg + g)
-				new_cumulative_gradients[i][j] = new_gradient
-
-		# Compute the net update to parameters, taking into account the sign of each cumulative
-		# gradient.
-		net_grads = [K.zeros_like(g) for g in grads]
-		for i, cgs in enumerate(new_cumulative_gradients):
-			for j, cg in enumerate(cgs):
-				net_grads[j] += self.gradient_sign[i] * cg
-
-		# Trigger a full update when all games have finished.
-		self.trigger_update = K.lesser_equal(self.running_games, 0)
-
-		# Update model parameters conditional on trigger_update.
-		for p, g in zip(params, net_grads):
-			new_p = p + g * self.lr
-			if p in constraints:
-				c = constraints[p]
-				new_p = c(new_p)
-			conditional_update(self.trigger_update, p, new_p)
-
-		# 'reset' game counter and gradient signs when parameters are updated.
-		for sign in self.gradient_sign:
-			conditional_update(self.trigger_update, sign, K.variable(0))
-		conditional_update(self.trigger_update, self.running_games, K.variable(self.num_games))
-		return self.updates
-
-	def get_config(self):
-		config = {
-			'lr': float(K.get_value(self.lr)),
-			'ng': self.num_games}
-		base_config = super(BatchedReinforcementLearningSGD, self).get_config()
-		return dict(list(base_config.items()) + list(config.items()))
+    '''A Keras Optimizer that sums gradients together for each game, applying them only once the
+    winner is known.
+
+    It is the responsibility of the calling code to call set_current_game() before each example to
+    tell the optimizer for which game gradients should be accumulated, and to call set_result() to
+    tell the optimizer what the sign of the gradient for each game should be and when all games are
+    over.
+
+    Arguments
+        lr: float >= 0. Learning rate.
+        ng: int > 0. Number of games played in parallel. Each one has its own cumulative gradient.
+    '''
+    def __init__(self, lr=0.01, ng=20, **kwargs):
+        super(BatchedReinforcementLearningSGD, self).__init__(**kwargs)
+        self.__dict__.update(locals())
+        self.lr = K.variable(lr)
+        self.cumulative_gradients = []
+        self.num_games = ng
+        self.game_idx = K.variable(0)  # which gradient to accumulate in the next batch.
+        self.gradient_sign = [K.variable(0) for _ in range(ng)]
+        self.running_games = K.variable(self.num_games)
+
+    def set_current_game(self, game_idx):
+        K.set_value(self.game_idx, game_idx)
+
+    def set_result(self, game_idx, won_game):
+        '''Mark the outcome of the game at index game_idx. Once all games are complete, updates
+        are automatically triggered in the next call to a keras fit function.
+        '''
+        K.set_value(self.gradient_sign[game_idx], +1 if won_game else -1)
+        # Note: using '-= 1' would create a new variable, which would invalidate the dependencies
+        # in get_updates().
+        K.set_value(self.running_games, K.get_value(self.running_games) - 1)
+
+    def get_updates(self, params, constraints, loss):
+        # Note: get_updates is called *once* by keras. Its job is to return a set of 'update
+        # operations' to any K.variable (e.g. model weights or self.num_games). Updates are applied
+        # whenever Keras' train_function is evaluated, i.e. in every batch. Model.fit_on_batch()
+        # will trigger exactly one update. All updates use the 'old' value of parameters - there is
+        # no dependency on the order of the list of updates.
+        self.updates = []
+        # Get expressions for gradients of model parameters.
+        grads = self.get_gradients(loss, params)
+        # Create a set of accumulated gradients, one for each game.
+        shapes = [K.get_variable_shape(p) for p in params]
+        self.cumulative_gradients = [[K.zeros(shape) for shape in shapes] for _ in range(self.num_games)]
+
+        def conditional_update(cond, variable, new_value):
+            '''Helper function to create updates that only happen when cond is True. Writes to
+            self.updates and returns the new variable.
+
+            Note: K.update(x, x) is cheap, but K.update_add(x, K.zeros_like(x)) can be expensive.
+            '''
+            maybe_new_value = K.switch(cond, new_value, variable)
+            self.updates.append(K.update(variable, maybe_new_value))
+            return maybe_new_value
+
+        # Update cumulative gradient at index game_idx. This is done by returning an update for all
+        # gradients that is a no-op everywhere except for the game_idx'th one. When game_idx is
+        # changed by a call to set_current_game(), it will change the gradient that is getting
+        # accumulated.
+        # new_cumulative_gradients keeps references to the updated variables for use below in
+        # updating parameters with the freshly-accumulated gradients.
+        new_cumulative_gradients = [[None] * len(cgs) for cgs in self.cumulative_gradients]
+        for i, cgs in enumerate(self.cumulative_gradients):
+            for j, (g, cg) in enumerate(zip(grads, cgs)):
+                new_gradient = conditional_update(K.equal(self.game_idx, i), cg, cg + g)
+                new_cumulative_gradients[i][j] = new_gradient
+
+        # Compute the net update to parameters, taking into account the sign of each cumulative
+        # gradient.
+        net_grads = [K.zeros_like(g) for g in grads]
+        for i, cgs in enumerate(new_cumulative_gradients):
+            for j, cg in enumerate(cgs):
+                net_grads[j] += self.gradient_sign[i] * cg
+
+        # Trigger a full update when all games have finished.
+        self.trigger_update = K.lesser_equal(self.running_games, 0)
+
+        # Update model parameters conditional on trigger_update.
+        for p, g in zip(params, net_grads):
+            new_p = p + g * self.lr
+            if p in constraints:
+                c = constraints[p]
+                new_p = c(new_p)
+            conditional_update(self.trigger_update, p, new_p)
+
+        # 'reset' game counter and gradient signs when parameters are updated.
+        for sign in self.gradient_sign:
+            conditional_update(self.trigger_update, sign, K.variable(0))
+        conditional_update(self.trigger_update, self.running_games, K.variable(self.num_games))
+        return self.updates
+
+    def get_config(self):
+        config = {
+            'lr': float(K.get_value(self.lr)),
+            'ng': self.num_games}
+        base_config = super(BatchedReinforcementLearningSGD, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
 
 
 def _make_training_pair(st, mv, preprocessor):
-	# Convert move to one-hot
-	st_tensor = preprocessor.state_to_tensor(st)
-	mv_tensor = np.zeros((1, st.size * st.size))
-	mv_tensor[(0, flatten_idx(mv, st.size))] = 1
-	return (st_tensor, mv_tensor)
+    # Convert move to one-hot
+    st_tensor = preprocessor.state_to_tensor(st)
+    mv_tensor = np.zeros((1, st.size * st.size))
+    mv_tensor[(0, flatten_idx(mv, st.size))] = 1
+    return (st_tensor, mv_tensor)
 
 
 def run_n_games(optimizer, learner, opponent, num_games):
-	'''Run num_games games to completion, calling train_batch() on each position the learner sees.
-
-	(Note: optimizer only accumulates gradients in its update function until all games have finished)
-	'''
-	board_size = learner.policy.model.input_shape[-1]
-	states = [GameState(size=board_size) for _ in range(num_games)]
-	learner_net = learner.policy.model
-
-	# Start all odd games with moves by 'opponent'. Even games will have 'learner' black.
-	learner_color = [go.BLACK if i % 2 == 0 else go.WHITE for i in range(num_games)]
-	odd_states = states[1::2]
-	moves = opponent.get_moves(odd_states)
-	for st, mv in zip(odd_states, moves):
-		st.do_move(mv)
-
-	current = learner
-	other = opponent
-	# Need to keep track of the index of unfinished states so that we can communicate which one is
-	# being updated to the optimizer.
-	idxs_to_unfinished_states = {i: states[i] for i in range(num_games)}
-	while len(idxs_to_unfinished_states) > 0:
-		# Get next moves by current player for all unfinished states.
-		moves = current.get_moves(idxs_to_unfinished_states.values())
-		just_finished = []
-		# Do each move to each state in order.
-		for (idx, state), mv in zip(idxs_to_unfinished_states.iteritems(), moves):
-			# Order is important here. We must first get the training pair on the unmodified state.
-			# Next, the state is updated and checked to see if the game is over. If it is over, the
-			# optimizer is notified via set_result. Finally, train_on_batch is called, which
-			# will trigger an update of all parameters only if set_result() has been called
-			# for all games already (so set_result must come before train_on_batch).
-			is_learnable = current is learner and mv is not go.PASS_MOVE
-			if is_learnable:
-				(X, y) = _make_training_pair(state, mv, learner.policy.preprocessor)
-			state.do_move(mv)
-			if state.is_end_of_game:
-				learner_is_winner = state.get_winner() == learner_color[idx]
-				optimizer.set_result(idx, learner_is_winner)
-				just_finished.append(idx)
-			if is_learnable:
-				optimizer.set_current_game(idx)
-				learner_net.train_on_batch(X, y)
-
-		# Remove games that have finished from dict.
-		for idx in just_finished:
-			del idxs_to_unfinished_states[idx]
-
-		# Swap 'current' and 'other' for next turn.
-		current, other = other, current
-
-	# Return the win ratio.
-	wins = sum(state.get_winner() == pc for (state, pc) in zip(states, learner_color))
-	return float(wins) / num_games
+    '''Run num_games games to completion, calling train_batch() on each position the learner sees.
+
+    (Note: optimizer only accumulates gradients in its update function until all games have finished)
+    '''
+    board_size = learner.policy.model.input_shape[-1]
+    states = [GameState(size=board_size) for _ in range(num_games)]
+    learner_net = learner.policy.model
+
+    # Start all odd games with moves by 'opponent'. Even games will have 'learner' black.
+    learner_color = [go.BLACK if i % 2 == 0 else go.WHITE for i in range(num_games)]
+    odd_states = states[1::2]
+    moves = opponent.get_moves(odd_states)
+    for st, mv in zip(odd_states, moves):
+        st.do_move(mv)
+
+    current = learner
+    other = opponent
+    # Need to keep track of the index of unfinished states so that we can communicate which one is
+    # being updated to the optimizer.
+    idxs_to_unfinished_states = {i: states[i] for i in range(num_games)}
+    while len(idxs_to_unfinished_states) > 0:
+        # Get next moves by current player for all unfinished states.
+        moves = current.get_moves(idxs_to_unfinished_states.values())
+        just_finished = []
+        # Do each move to each state in order.
+        for (idx, state), mv in zip(idxs_to_unfinished_states.iteritems(), moves):
+            # Order is important here. We must first get the training pair on the unmodified state.
+            # Next, the state is updated and checked to see if the game is over. If it is over, the
+            # optimizer is notified via set_result. Finally, train_on_batch is called, which
+            # will trigger an update of all parameters only if set_result() has been called
+            # for all games already (so set_result must come before train_on_batch).
+            is_learnable = current is learner and mv is not go.PASS_MOVE
+            if is_learnable:
+                (X, y) = _make_training_pair(state, mv, learner.policy.preprocessor)
+            state.do_move(mv)
+            if state.is_end_of_game:
+                learner_is_winner = state.get_winner() == learner_color[idx]
+                optimizer.set_result(idx, learner_is_winner)
+                just_finished.append(idx)
+            if is_learnable:
+                optimizer.set_current_game(idx)
+                learner_net.train_on_batch(X, y)
+
+        # Remove games that have finished from dict.
+        for idx in just_finished:
+            del idxs_to_unfinished_states[idx]
+
+        # Swap 'current' and 'other' for next turn.
+        current, other = other, current
+
+    # Return the win ratio.
+    wins = sum(state.get_winner() == pc for (state, pc) in zip(states, learner_color))
+    return float(wins) / num_games
 
 
 def run_training(cmd_line_args=None):
-	import argparse
-	parser = argparse.ArgumentParser(description='Perform reinforcement learning to improve given policy network. Second phase of pipeline.')
-	parser.add_argument("model_json", help="Path to policy model JSON.")
-	parser.add_argument("initial_weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).")
-	parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.")
-	parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.001)", type=float, default=0.001)
-	parser.add_argument("--policy-temp", help="Distribution temperature of players using policies (Default: 0.67)", type=float, default=0.67)
-	parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 500)", type=int, default=500)
-	parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 20)", type=int, default=20)
-	parser.add_argument("--move-limit", help="Maximum number of moves per game", type=int, default=500)
-	parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 10000)", type=int, default=10000)
-	parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true")
-	parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
-	# Baseline function (TODO) default lambda state: 0  (receives either file
-	# paths to JSON and weights or None, in which case it uses default baseline 0)
-	if cmd_line_args is None:
-		args = parser.parse_args()
-	else:
-		args = parser.parse_args(cmd_line_args)
-
-	ZEROTH_FILE = "weights.00000.hdf5"
-
-	if args.resume:
-		if not os.path.exists(os.path.join(args.out_directory, "metadata.json")):
-			raise ValueError("Cannot resume without existing output directory")
-
-	if not os.path.exists(args.out_directory):
-		if args.verbose:
-			print "creating output directory {}".format(args.out_directory)
-		os.makedirs(args.out_directory)
-
-	if not args.resume:
-		# make a copy of weights file, "weights.00000.hdf5" in the output directory
-		copyfile(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE))
-		if args.verbose:
-			print "copied {} to {}".format(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE))
-		player_weights = ZEROTH_FILE
-	else:
-		# if resuming, we expect initial_weights to be just a "weights.#####.hdf5" file, not a full path
-		args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights))
-		if not os.path.exists(args.initial_weights):
-			raise ValueError("Cannot resume; weights {} do not exist".format(args.initial_weights))
-		elif args.verbose:
-			print "Resuming with weights {}".format(args.initial_weights)
-		player_weights = os.path.basename(args.initial_weights)
-
-	# Set initial conditions
-	policy = CNNPolicy.load_model(args.model_json)
-	policy.model.load_weights(args.initial_weights)
-	player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, move_limit=args.move_limit)
-
-	# different opponents come from simply changing the weights of 'opponent.policy.model'. That
-	# is, only 'opp_policy' needs to be changed, and 'opponent' will change.
-	opp_policy = CNNPolicy.load_model(args.model_json)
-	opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, move_limit=args.move_limit)
-
-	if args.verbose:
-		print "created player and opponent with temperature {}".format(args.policy_temp)
-
-	if not args.resume:
-		metadata = {
-			"model_file": args.model_json,
-			"init_weights": args.initial_weights,
-			"learning_rate": args.learning_rate,
-			"temperature": args.policy_temp,
-			"game_batch": args.game_batch,
-			"opponents": [ZEROTH_FILE],  # which weights from which to sample an opponent each batch
-			"win_ratio": {}  # map from player to tuple of (opponent, win ratio) Useful for validating in lieu of 'accuracy/loss'
-		}
-	else:
-		with open(os.path.join(args.out_directory, "metadata.json"), "r") as f:
-			metadata = json.load(f)
-
-	# Append args of current run to history of full command args.
-	metadata["cmd_line_args"] = metadata.get("cmd_line_args", []).append(vars(args))
-
-	def save_metadata():
-		with open(os.path.join(args.out_directory, "metadata.json"), "w") as f:
-			json.dump(metadata, f, sort_keys=True, indent=2)
-
-	optimizer = BatchedReinforcementLearningSGD(lr=args.learning_rate, ng=args.game_batch)
-	player.policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
-	for i_iter in xrange(1, args.iterations + 1):
-		# Randomly choose opponent from pool (possibly self), and playing game_batch games against
-		# them.
-		opp_weights = np.random.choice(metadata["opponents"])
-		opp_path = os.path.join(args.out_directory, opp_weights)
-
-		# Load new weights into opponent's network, but keep the same opponent object.
-		opponent.policy.model.load_weights(opp_path)
-		if args.verbose:
-			print "Batch {}\tsampled opponent is {}".format(i_iter, opp_weights)
-
-		# Run games (and learn from results). Keep track of the win ratio vs each opponent over time.
-		win_ratio = run_n_games(optimizer, player, opponent, args.game_batch)
-		metadata["win_ratio"][player_weights] = (opp_weights, win_ratio)
-
-		# Save all intermediate models.
-		player_weights = "weights.%05d.hdf5" % i_iter
-		player.policy.model.save_weights(os.path.join(args.out_directory, player_weights))
-
-		# Add player to batch of oppenents once in a while.
-		if i_iter % args.save_every == 0:
-			metadata["opponents"].append(player_weights)
-		save_metadata()
+    import argparse
+    parser = argparse.ArgumentParser(description='Perform reinforcement learning to improve given policy network. Second phase of pipeline.')
+    parser.add_argument("model_json", help="Path to policy model JSON.")
+    parser.add_argument("initial_weights", help="Path to HDF5 file with inital weights (i.e. result of supervised training).")
+    parser.add_argument("out_directory", help="Path to folder where the model params and metadata will be saved after each epoch.")
+    parser.add_argument("--learning-rate", help="Keras learning rate (Default: 0.001)", type=float, default=0.001)
+    parser.add_argument("--policy-temp", help="Distribution temperature of players using policies (Default: 0.67)", type=float, default=0.67)
+    parser.add_argument("--save-every", help="Save policy as a new opponent every n batches (Default: 500)", type=int, default=500)
+    parser.add_argument("--game-batch", help="Number of games per mini-batch (Default: 20)", type=int, default=20)
+    parser.add_argument("--move-limit", help="Maximum number of moves per game", type=int, default=500)
+    parser.add_argument("--iterations", help="Number of training batches/iterations (Default: 10000)", type=int, default=10000)
+    parser.add_argument("--resume", help="Load latest weights in out_directory and resume", default=False, action="store_true")
+    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
+    # Baseline function (TODO) default lambda state: 0  (receives either file
+    # paths to JSON and weights or None, in which case it uses default baseline 0)
+    if cmd_line_args is None:
+        args = parser.parse_args()
+    else:
+        args = parser.parse_args(cmd_line_args)
+
+    ZEROTH_FILE = "weights.00000.hdf5"
+
+    if args.resume:
+        if not os.path.exists(os.path.join(args.out_directory, "metadata.json")):
+            raise ValueError("Cannot resume without existing output directory")
+
+    if not os.path.exists(args.out_directory):
+        if args.verbose:
+            print "creating output directory {}".format(args.out_directory)
+        os.makedirs(args.out_directory)
+
+    if not args.resume:
+        # make a copy of weights file, "weights.00000.hdf5" in the output directory
+        copyfile(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE))
+        if args.verbose:
+            print "copied {} to {}".format(args.initial_weights, os.path.join(args.out_directory, ZEROTH_FILE))
+        player_weights = ZEROTH_FILE
+    else:
+        # if resuming, we expect initial_weights to be just a "weights.#####.hdf5" file, not a full path
+        args.initial_weights = os.path.join(args.out_directory, os.path.basename(args.initial_weights))
+        if not os.path.exists(args.initial_weights):
+            raise ValueError("Cannot resume; weights {} do not exist".format(args.initial_weights))
+        elif args.verbose:
+            print "Resuming with weights {}".format(args.initial_weights)
+        player_weights = os.path.basename(args.initial_weights)
+
+    # Set initial conditions
+    policy = CNNPolicy.load_model(args.model_json)
+    policy.model.load_weights(args.initial_weights)
+    player = ProbabilisticPolicyPlayer(policy, temperature=args.policy_temp, move_limit=args.move_limit)
+
+    # different opponents come from simply changing the weights of 'opponent.policy.model'. That
+    # is, only 'opp_policy' needs to be changed, and 'opponent' will change.
+    opp_policy = CNNPolicy.load_model(args.model_json)
+    opponent = ProbabilisticPolicyPlayer(opp_policy, temperature=args.policy_temp, move_limit=args.move_limit)
+
+    if args.verbose:
+        print "created player and opponent with temperature {}".format(args.policy_temp)
+
+    if not args.resume:
+        metadata = {
+            "model_file": args.model_json,
+            "init_weights": args.initial_weights,
+            "learning_rate": args.learning_rate,
+            "temperature": args.policy_temp,
+            "game_batch": args.game_batch,
+            "opponents": [ZEROTH_FILE],  # which weights from which to sample an opponent each batch
+            "win_ratio": {}  # map from player to tuple of (opponent, win ratio) Useful for validating in lieu of 'accuracy/loss'
+        }
+    else:
+        with open(os.path.join(args.out_directory, "metadata.json"), "r") as f:
+            metadata = json.load(f)
+
+    # Append args of current run to history of full command args.
+    metadata["cmd_line_args"] = metadata.get("cmd_line_args", []).append(vars(args))
+
+    def save_metadata():
+        with open(os.path.join(args.out_directory, "metadata.json"), "w") as f:
+            json.dump(metadata, f, sort_keys=True, indent=2)
+
+    optimizer = BatchedReinforcementLearningSGD(lr=args.learning_rate, ng=args.game_batch)
+    player.policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
+    for i_iter in xrange(1, args.iterations + 1):
+        # Randomly choose opponent from pool (possibly self), and playing game_batch games against
+        # them.
+        opp_weights = np.random.choice(metadata["opponents"])
+        opp_path = os.path.join(args.out_directory, opp_weights)
+
+        # Load new weights into opponent's network, but keep the same opponent object.
+        opponent.policy.model.load_weights(opp_path)
+        if args.verbose:
+            print "Batch {}\tsampled opponent is {}".format(i_iter, opp_weights)
+
+        # Run games (and learn from results). Keep track of the win ratio vs each opponent over time.
+        win_ratio = run_n_games(optimizer, player, opponent, args.game_batch)
+        metadata["win_ratio"][player_weights] = (opp_weights, win_ratio)
+
+        # Save all intermediate models.
+        player_weights = "weights.%05d.hdf5" % i_iter
+        player.policy.model.save_weights(os.path.join(args.out_directory, player_weights))
+
+        # Add player to batch of oppenents once in a while.
+        if i_iter % args.save_every == 0:
+            metadata["opponents"].append(player_weights)
+        save_metadata()
 
 if __name__ == '__main__':
-	run_training()
+    run_training()
diff --git a/AlphaGo/training/supervised_policy_trainer.py b/AlphaGo/training/supervised_policy_trainer.py
index 7e04dd6e5..ec247812c 100644
--- a/AlphaGo/training/supervised_policy_trainer.py
+++ b/AlphaGo/training/supervised_policy_trainer.py
@@ -8,218 +8,218 @@
 
 
 def one_hot_action(action, size=19):
-	"""Convert an (x,y) action into a size x size array of zeros with a 1 at x,y
-	"""
-	categorical = np.zeros((size, size))
-	categorical[action] = 1
-	return categorical
+    """Convert an (x,y) action into a size x size array of zeros with a 1 at x,y
+    """
+    categorical = np.zeros((size, size))
+    categorical[action] = 1
+    return categorical
 
 
 def shuffled_hdf5_batch_generator(state_dataset, action_dataset, indices, batch_size, transforms=[]):
-	"""A generator of batches of training data for use with the fit_generator function
-	of Keras. Data is accessed in the order of the given indices for shuffling.
-	"""
-	state_batch_shape = (batch_size,) + state_dataset.shape[1:]
-	game_size = state_batch_shape[-1]
-	Xbatch = np.zeros(state_batch_shape)
-	Ybatch = np.zeros((batch_size, game_size * game_size))
-	batch_idx = 0
-	while True:
-		for data_idx in indices:
-			# choose a random transformation of the data (rotations/reflections of the board)
-			transform = np.random.choice(transforms)
-			# get state from dataset and transform it.
-			# loop comprehension is used so that the transformation acts on the 3rd and 4th dimensions
-			state = np.array([transform(plane) for plane in state_dataset[data_idx]])
-			# must be cast to a tuple so that it is interpreted as (x,y) not [(x,:), (y,:)]
-			action_xy = tuple(action_dataset[data_idx])
-			action = transform(one_hot_action(action_xy, game_size))
-			Xbatch[batch_idx] = state
-			Ybatch[batch_idx] = action.flatten()
-			batch_idx += 1
-			if batch_idx == batch_size:
-				batch_idx = 0
-				yield (Xbatch, Ybatch)
+    """A generator of batches of training data for use with the fit_generator function
+    of Keras. Data is accessed in the order of the given indices for shuffling.
+    """
+    state_batch_shape = (batch_size,) + state_dataset.shape[1:]
+    game_size = state_batch_shape[-1]
+    Xbatch = np.zeros(state_batch_shape)
+    Ybatch = np.zeros((batch_size, game_size * game_size))
+    batch_idx = 0
+    while True:
+        for data_idx in indices:
+            # choose a random transformation of the data (rotations/reflections of the board)
+            transform = np.random.choice(transforms)
+            # get state from dataset and transform it.
+            # loop comprehension is used so that the transformation acts on the 3rd and 4th dimensions
+            state = np.array([transform(plane) for plane in state_dataset[data_idx]])
+            # must be cast to a tuple so that it is interpreted as (x,y) not [(x,:), (y,:)]
+            action_xy = tuple(action_dataset[data_idx])
+            action = transform(one_hot_action(action_xy, game_size))
+            Xbatch[batch_idx] = state
+            Ybatch[batch_idx] = action.flatten()
+            batch_idx += 1
+            if batch_idx == batch_size:
+                batch_idx = 0
+                yield (Xbatch, Ybatch)
 
 
 class MetadataWriterCallback(Callback):
 
-	def __init__(self, path):
-		self.file = path
-		self.metadata = {
-			"epochs": [],
-			"best_epoch": 0
-		}
+    def __init__(self, path):
+        self.file = path
+        self.metadata = {
+            "epochs": [],
+            "best_epoch": 0
+        }
 
-	def on_epoch_end(self, epoch, logs={}):
-		# in case appending to logs (resuming training), get epoch number ourselves
-		epoch = len(self.metadata["epochs"])
+    def on_epoch_end(self, epoch, logs={}):
+        # in case appending to logs (resuming training), get epoch number ourselves
+        epoch = len(self.metadata["epochs"])
 
-		self.metadata["epochs"].append(logs)
+        self.metadata["epochs"].append(logs)
 
-		if "val_loss" in logs:
-			key = "val_loss"
-		else:
-			key = "loss"
+        if "val_loss" in logs:
+            key = "val_loss"
+        else:
+            key = "loss"
 
-		best_loss = self.metadata["epochs"][self.metadata["best_epoch"]][key]
-		if logs.get(key) < best_loss:
-			self.metadata["best_epoch"] = epoch
+        best_loss = self.metadata["epochs"][self.metadata["best_epoch"]][key]
+        if logs.get(key) < best_loss:
+            self.metadata["best_epoch"] = epoch
 
-		with open(self.file, "w") as f:
-			json.dump(self.metadata, f, indent=2)
+        with open(self.file, "w") as f:
+            json.dump(self.metadata, f, indent=2)
 
 
 BOARD_TRANSFORMATIONS = {
-	"noop": lambda feature: feature,
-	"rot90": lambda feature: np.rot90(feature, 1),
-	"rot180": lambda feature: np.rot90(feature, 2),
-	"rot270": lambda feature: np.rot90(feature, 3),
-	"fliplr": lambda feature: np.fliplr(feature),
-	"flipud": lambda feature: np.flipud(feature),
-	"diag1": lambda feature: np.transpose(feature),
-	"diag2": lambda feature: np.fliplr(np.rot90(feature, 1))
+    "noop": lambda feature: feature,
+    "rot90": lambda feature: np.rot90(feature, 1),
+    "rot180": lambda feature: np.rot90(feature, 2),
+    "rot270": lambda feature: np.rot90(feature, 3),
+    "fliplr": lambda feature: np.fliplr(feature),
+    "flipud": lambda feature: np.flipud(feature),
+    "diag1": lambda feature: np.transpose(feature),
+    "diag2": lambda feature: np.fliplr(np.rot90(feature, 1))
 }
 
 
 def run_training(cmd_line_args=None):
-	"""Run training. command-line args may be passed in as a list
-	"""
-	import argparse
-	parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.')
-	# required args
-	parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())")
-	parser.add_argument("train_data", help="A .h5 file of training data")
-	parser.add_argument("out_directory", help="directory where metadata and weights will be saved")
-	# frequently used args
-	parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16)
-	parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10)
-	parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None)
-	parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03)
-	parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001)
-	parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
-	# slightly fancier args
-	parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None)
-	parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02])
-	parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2')
-	# TODO - an argument to specify which transformations to use, put it in metadata
-
-	if cmd_line_args is None:
-		args = parser.parse_args()
-	else:
-		args = parser.parse_args(cmd_line_args)
-
-	# TODO - what follows here should be refactored into a series of small functions
-
-	resume = args.weights is not None
-
-	if args.verbose:
-		if resume:
-			print "trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights))
-		else:
-			if os.path.exists(args.out_directory):
-				print "directory %s exists. any previous data will be overwritten" % args.out_directory
-			else:
-				print "starting fresh output directory %s" % args.out_directory
-
-	# load model from json spec
-	model = CNNPolicy.load_model(args.model).model
-	if resume:
-		model.load_weights(os.path.join(args.out_directory, args.weights))
-
-	# TODO - (waiting on game_converter) verify that features of model match features of training data
-	dataset = h5.File(args.train_data)
-	n_total_data = len(dataset["states"])
-	n_train_data = int(args.train_val_test[0] * n_total_data)
-	# Need to make sure training data is divisible by minibatch size or get warning mentioning accuracy from keras
-	n_train_data = n_train_data - (n_train_data % args.minibatch)
-	n_val_data = n_total_data - n_train_data
-	# n_test_data = n_total_data - (n_train_data + n_val_data)
-
-	if args.verbose:
-		print "datset loaded"
-		print "\t%d total samples" % n_total_data
-		print "\t%d training samples" % n_train_data
-		print "\t%d validaion samples" % n_val_data
-
-	# ensure output directory is available
-	if not os.path.exists(args.out_directory):
-		os.makedirs(args.out_directory)
-
-	# create metadata file and the callback object that will write to it
-	meta_file = os.path.join(args.out_directory, "metadata.json")
-	meta_writer = MetadataWriterCallback(meta_file)
-	# load prior data if it already exists
-	if os.path.exists(meta_file) and resume:
-		with open(meta_file, "r") as f:
-			meta_writer.metadata = json.load(f)
-		if args.verbose:
-			print "previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"])
-	elif args.verbose:
-		print "starting with empty metadata"
-	# the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add in anything else we like here
-	# TODO - model and train_data are saved in meta_file; check that they match (and make args optional when restarting?)
-	meta_writer.metadata["training_data"] = args.train_data
-	meta_writer.metadata["model_file"] = args.model
-	# Record all command line args in a list so that all args are recorded even when training is stopped and resumed.
-	meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []).append(vars(args))
-
-	# create ModelCheckpoint to save weights every epoch
-	checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5")
-	checkpointer = ModelCheckpoint(checkpoint_template)
-
-	# load precomputed random-shuffle indices or create them
-	# TODO - save each train/val/test indices separately so there's no danger of
-	# changing args.train_val_test when resuming
-	shuffle_file = os.path.join(args.out_directory, "shuffle.npz")
-	if os.path.exists(shuffle_file) and resume:
-		with open(shuffle_file, "r") as f:
-			shuffle_indices = np.load(f)
-		if args.verbose:
-			print "loading previous data shuffling indices"
-	else:
-		# create shuffled indices
-		shuffle_indices = np.random.permutation(n_total_data)
-		with open(shuffle_file, "w") as f:
-			np.save(f, shuffle_indices)
-		if args.verbose:
-			print "created new data shuffling indices"
-	# training indices are the first consecutive set of shuffled indices, val next, then test gets the remainder
-	train_indices = shuffle_indices[0:n_train_data]
-	val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data]
-	# test_indices = shuffle_indices[n_train_data + n_val_data:]
-
-	symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")]
-
-	# create dataset generators
-	train_data_generator = shuffled_hdf5_batch_generator(
-		dataset["states"],
-		dataset["actions"],
-		train_indices,
-		args.minibatch,
-		symmetries)
-	val_data_generator = shuffled_hdf5_batch_generator(
-		dataset["states"],
-		dataset["actions"],
-		val_indices,
-		args.minibatch,
-		symmetries)
-
-	sgd = SGD(lr=args.learning_rate, decay=args.decay)
-	model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])
-
-	samples_per_epoch = args.epoch_length or n_train_data
-
-	if args.verbose:
-		print "STARTING TRAINING"
-
-	model.fit_generator(
-		generator=train_data_generator,
-		samples_per_epoch=samples_per_epoch,
-		nb_epoch=args.epochs,
-		callbacks=[checkpointer, meta_writer],
-		validation_data=val_data_generator,
-		nb_val_samples=n_val_data)
+    """Run training. command-line args may be passed in as a list
+    """
+    import argparse
+    parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.')
+    # required args
+    parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())")
+    parser.add_argument("train_data", help="A .h5 file of training data")
+    parser.add_argument("out_directory", help="directory where metadata and weights will be saved")
+    # frequently used args
+    parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16)
+    parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10)
+    parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None)
+    parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03)
+    parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001)
+    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")
+    # slightly fancier args
+    parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None)
+    parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02])
+    parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2')
+    # TODO - an argument to specify which transformations to use, put it in metadata
+
+    if cmd_line_args is None:
+        args = parser.parse_args()
+    else:
+        args = parser.parse_args(cmd_line_args)
+
+    # TODO - what follows here should be refactored into a series of small functions
+
+    resume = args.weights is not None
+
+    if args.verbose:
+        if resume:
+            print "trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights))
+        else:
+            if os.path.exists(args.out_directory):
+                print "directory %s exists. any previous data will be overwritten" % args.out_directory
+            else:
+                print "starting fresh output directory %s" % args.out_directory
+
+    # load model from json spec
+    model = CNNPolicy.load_model(args.model).model
+    if resume:
+        model.load_weights(os.path.join(args.out_directory, args.weights))
+
+    # TODO - (waiting on game_converter) verify that features of model match features of training data
+    dataset = h5.File(args.train_data)
+    n_total_data = len(dataset["states"])
+    n_train_data = int(args.train_val_test[0] * n_total_data)
+    # Need to make sure training data is divisible by minibatch size or get warning mentioning accuracy from keras
+    n_train_data = n_train_data - (n_train_data % args.minibatch)
+    n_val_data = n_total_data - n_train_data
+    # n_test_data = n_total_data - (n_train_data + n_val_data)
+
+    if args.verbose:
+        print "datset loaded"
+        print "\t%d total samples" % n_total_data
+        print "\t%d training samples" % n_train_data
+        print "\t%d validaion samples" % n_val_data
+
+    # ensure output directory is available
+    if not os.path.exists(args.out_directory):
+        os.makedirs(args.out_directory)
+
+    # create metadata file and the callback object that will write to it
+    meta_file = os.path.join(args.out_directory, "metadata.json")
+    meta_writer = MetadataWriterCallback(meta_file)
+    # load prior data if it already exists
+    if os.path.exists(meta_file) and resume:
+        with open(meta_file, "r") as f:
+            meta_writer.metadata = json.load(f)
+        if args.verbose:
+            print "previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"])
+    elif args.verbose:
+        print "starting with empty metadata"
+    # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add in anything else we like here
+    # TODO - model and train_data are saved in meta_file; check that they match (and make args optional when restarting?)
+    meta_writer.metadata["training_data"] = args.train_data
+    meta_writer.metadata["model_file"] = args.model
+    # Record all command line args in a list so that all args are recorded even when training is stopped and resumed.
+    meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []).append(vars(args))
+
+    # create ModelCheckpoint to save weights every epoch
+    checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5")
+    checkpointer = ModelCheckpoint(checkpoint_template)
+
+    # load precomputed random-shuffle indices or create them
+    # TODO - save each train/val/test indices separately so there's no danger of
+    # changing args.train_val_test when resuming
+    shuffle_file = os.path.join(args.out_directory, "shuffle.npz")
+    if os.path.exists(shuffle_file) and resume:
+        with open(shuffle_file, "r") as f:
+            shuffle_indices = np.load(f)
+        if args.verbose:
+            print "loading previous data shuffling indices"
+    else:
+        # create shuffled indices
+        shuffle_indices = np.random.permutation(n_total_data)
+        with open(shuffle_file, "w") as f:
+            np.save(f, shuffle_indices)
+        if args.verbose:
+            print "created new data shuffling indices"
+    # training indices are the first consecutive set of shuffled indices, val next, then test gets the remainder
+    train_indices = shuffle_indices[0:n_train_data]
+    val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data]
+    # test_indices = shuffle_indices[n_train_data + n_val_data:]
+
+    symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")]
+
+    # create dataset generators
+    train_data_generator = shuffled_hdf5_batch_generator(
+        dataset["states"],
+        dataset["actions"],
+        train_indices,
+        args.minibatch,
+        symmetries)
+    val_data_generator = shuffled_hdf5_batch_generator(
+        dataset["states"],
+        dataset["actions"],
+        val_indices,
+        args.minibatch,
+        symmetries)
+
+    sgd = SGD(lr=args.learning_rate, decay=args.decay)
+    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])
+
+    samples_per_epoch = args.epoch_length or n_train_data
+
+    if args.verbose:
+        print "STARTING TRAINING"
+
+    model.fit_generator(
+        generator=train_data_generator,
+        samples_per_epoch=samples_per_epoch,
+        nb_epoch=args.epochs,
+        callbacks=[checkpointer, meta_writer],
+        validation_data=val_data_generator,
+        nb_val_samples=n_val_data)
 
 if __name__ == '__main__':
-	run_training()
+    run_training()
diff --git a/AlphaGo/util.py b/AlphaGo/util.py
index b801a397f..6acb5062d 100644
--- a/AlphaGo/util.py
+++ b/AlphaGo/util.py
@@ -8,116 +8,116 @@
 
 
 def flatten_idx(position, size):
-	(x, y) = position
-	return x * size + y
+    (x, y) = position
+    return x * size + y
 
 
 def unflatten_idx(idx, size):
-	x, y = divmod(idx, size)
-	return (x, y)
+    x, y = divmod(idx, size)
+    return (x, y)
 
 
 def _parse_sgf_move(node_value):
-	"""Given a well-formed move string, return either PASS_MOVE or the (x, y) position
-	"""
-	if node_value == '' or node_value == 'tt':
-		return go.PASS_MOVE
-	else:
-		# GameState expects (x, y) where x is column and y is row
-		col = LETTERS.index(node_value[0].upper())
-		row = LETTERS.index(node_value[1].upper())
-		return (col, row)
+    """Given a well-formed move string, return either PASS_MOVE or the (x, y) position
+    """
+    if node_value == '' or node_value == 'tt':
+        return go.PASS_MOVE
+    else:
+        # GameState expects (x, y) where x is column and y is row
+        col = LETTERS.index(node_value[0].upper())
+        row = LETTERS.index(node_value[1].upper())
+        return (col, row)
 
 
 def _sgf_init_gamestate(sgf_root):
-	"""Helper function to set up a GameState object from the root node
-	of an SGF file
-	"""
-	props = sgf_root.properties
-	s_size = props.get('SZ', ['19'])[0]
-	s_player = props.get('PL', ['B'])[0]
-	# init board with specified size
-	gs = go.GameState(int(s_size))
-	# handle 'add black' property
-	if 'AB' in props:
-		for stone in props['AB']:
-			gs.do_move(_parse_sgf_move(stone), go.BLACK)
-	# handle 'add white' property
-	if 'AW' in props:
-		for stone in props['AW']:
-			gs.do_move(_parse_sgf_move(stone), go.WHITE)
-	# setup done; set player according to 'PL' property
-	gs.current_player = go.BLACK if s_player == 'B' else go.WHITE
-	return gs
+    """Helper function to set up a GameState object from the root node
+    of an SGF file
+    """
+    props = sgf_root.properties
+    s_size = props.get('SZ', ['19'])[0]
+    s_player = props.get('PL', ['B'])[0]
+    # init board with specified size
+    gs = go.GameState(int(s_size))
+    # handle 'add black' property
+    if 'AB' in props:
+        for stone in props['AB']:
+            gs.do_move(_parse_sgf_move(stone), go.BLACK)
+    # handle 'add white' property
+    if 'AW' in props:
+        for stone in props['AW']:
+            gs.do_move(_parse_sgf_move(stone), go.WHITE)
+    # setup done; set player according to 'PL' property
+    gs.current_player = go.BLACK if s_player == 'B' else go.WHITE
+    return gs
 
 
 def sgf_to_gamestate(sgf_string):
-	"""Creates a GameState object from the first game in the given collection
-	"""
-	# Don't Repeat Yourself; parsing handled by sgf_iter_states
-	for (gs, move, player) in sgf_iter_states(sgf_string, True):
-		pass
-	# gs has been updated in-place to the final state by the time
-	# sgf_iter_states returns
-	return gs
+    """Creates a GameState object from the first game in the given collection
+    """
+    # Don't Repeat Yourself; parsing handled by sgf_iter_states
+    for (gs, move, player) in sgf_iter_states(sgf_string, True):
+        pass
+    # gs has been updated in-place to the final state by the time
+    # sgf_iter_states returns
+    return gs
 
 
 def save_gamestate_to_sgf(gamestate, path, filename, black_player_name='Unknown', white_player_name='Unknown', size=19, komi=7.5):
-	"""Creates a simplified sgf for viewing playouts or positions
-	"""
-	str_list = []
-	# Game info
-	str_list.append('(;GM[1]FF[4]CA[UTF-8]')
-	str_list.append('SZ[{}]'.format(size))
-	str_list.append('KM[{}]'.format(komi))
-	str_list.append('PB[{}]'.format(black_player_name))
-	str_list.append('PW[{}]'.format(white_player_name))
-	cycle_string = 'BW'
-	# Handle handicaps
-	if len(gamestate.handicaps) > 0:
-		cycle_string = 'WB'
-		str_list.append('HA[{}]'.format(len(gamestate.handicaps)))
-		str_list.append(';AB')
-		for handicap in gamestate.handicaps:
-			str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), LETTERS[handicap[1]].lower()))
-	# Move list
-	for move, color in zip(gamestate.history, itertools.cycle(cycle_string)):
-		# Move color prefix
-		str_list.append(';{}'.format(color))
-		# Move coordinates
-		if move is None:
-			str_list.append('[tt]')
-		else:
-			str_list.append('[{}{}]'.format(LETTERS[move[0]].lower(), LETTERS[move[1]].lower()))
-	str_list.append(')')
-	with open(os.path.join(path, filename), "w") as f:
-		f.write(''.join(str_list))
+    """Creates a simplified sgf for viewing playouts or positions
+    """
+    str_list = []
+    # Game info
+    str_list.append('(;GM[1]FF[4]CA[UTF-8]')
+    str_list.append('SZ[{}]'.format(size))
+    str_list.append('KM[{}]'.format(komi))
+    str_list.append('PB[{}]'.format(black_player_name))
+    str_list.append('PW[{}]'.format(white_player_name))
+    cycle_string = 'BW'
+    # Handle handicaps
+    if len(gamestate.handicaps) > 0:
+        cycle_string = 'WB'
+        str_list.append('HA[{}]'.format(len(gamestate.handicaps)))
+        str_list.append(';AB')
+        for handicap in gamestate.handicaps:
+            str_list.append('[{}{}]'.format(LETTERS[handicap[0]].lower(), LETTERS[handicap[1]].lower()))
+    # Move list
+    for move, color in zip(gamestate.history, itertools.cycle(cycle_string)):
+        # Move color prefix
+        str_list.append(';{}'.format(color))
+        # Move coordinates
+        if move is None:
+            str_list.append('[tt]')
+        else:
+            str_list.append('[{}{}]'.format(LETTERS[move[0]].lower(), LETTERS[move[1]].lower()))
+    str_list.append(')')
+    with open(os.path.join(path, filename), "w") as f:
+        f.write(''.join(str_list))
 
 
 def sgf_iter_states(sgf_string, include_end=True):
-	"""Iterates over (GameState, move, player) tuples in the first game of the given SGF file.
-
-	Ignores variations - only the main line is returned.
-	The state object is modified in-place, so don't try to, for example, keep track of it through time
-
-	If include_end is False, the final tuple yielded is the penultimate state, but the state
-	will still be left in the final position at the end of iteration because 'gs' is modified
-	in-place the state. See sgf_to_gamestate
-	"""
-	collection = sgf.parse(sgf_string)
-	game = collection[0]
-	gs = _sgf_init_gamestate(game.root)
-	if game.rest is not None:
-		for node in game.rest:
-			props = node.properties
-			if 'W' in props:
-				move = _parse_sgf_move(props['W'][0])
-				player = go.WHITE
-			elif 'B' in props:
-				move = _parse_sgf_move(props['B'][0])
-				player = go.BLACK
-			yield (gs, move, player)
-			# update state to n+1
-			gs.do_move(move, player)
-	if include_end:
-		yield (gs, None, None)
+    """Iterates over (GameState, move, player) tuples in the first game of the given SGF file.
+
+    Ignores variations - only the main line is returned.
+    The state object is modified in-place, so don't try to, for example, keep track of it through time
+
+    If include_end is False, the final tuple yielded is the penultimate state, but the state
+    will still be left in the final position at the end of iteration because 'gs' is modified
+    in-place the state. See sgf_to_gamestate
+    """
+    collection = sgf.parse(sgf_string)
+    game = collection[0]
+    gs = _sgf_init_gamestate(game.root)
+    if game.rest is not None:
+        for node in game.rest:
+            props = node.properties
+            if 'W' in props:
+                move = _parse_sgf_move(props['W'][0])
+                player = go.WHITE
+            elif 'B' in props:
+                move = _parse_sgf_move(props['B'][0])
+                player = go.BLACK
+            yield (gs, move, player)
+            # update state to n+1
+            gs.do_move(move, player)
+    if include_end:
+        yield (gs, None, None)
diff --git a/benchmarks/preprocessing_benchmark.py b/benchmarks/preprocessing_benchmark.py
index 922c681ce..635c0fd55 100644
--- a/benchmarks/preprocessing_benchmark.py
+++ b/benchmarks/preprocessing_benchmark.py
@@ -9,8 +9,8 @@
 
 
 def run_convert_game():
-	for traindata in gc.convert_game(*args):
-		pass
+    for traindata in gc.convert_game(*args):
+        pass
 
 prof.runcall(run_convert_game)
 prof.dump_stats('bench_results.prof')
diff --git a/benchmarks/reinforcement_policy_training_benchmark.py b/benchmarks/reinforcement_policy_training_benchmark.py
index c016a42ec..4fc66407c 100644
--- a/benchmarks/reinforcement_policy_training_benchmark.py
+++ b/benchmarks/reinforcement_policy_training_benchmark.py
@@ -15,9 +15,9 @@
 stats_file = os.path.join(datadir, 'reinforcement_policy_trainer.prof')
 
 if not os.path.exists(datadir):
-	os.makedirs(datadir)
+    os.makedirs(datadir)
 if not os.path.exists(weights):
-	policy.model.save_weights(weights)
+    policy.model.save_weights(weights)
 policy.save_model(modelfile)
 
 profile = Profile()
diff --git a/benchmarks/supervised_policy_training_benchmark.py b/benchmarks/supervised_policy_training_benchmark.py
index 0293f7af9..d84676611 100644
--- a/benchmarks/supervised_policy_training_benchmark.py
+++ b/benchmarks/supervised_policy_training_benchmark.py
@@ -14,7 +14,7 @@
 
 
 def run_supervised_policy_training():
-	run_training(*arguments)
+    run_training(*arguments)
 
 profile.runcall(run_supervised_policy_training)
 profile.dump_stats('supervised_policy_training_bench_results.prof')
diff --git a/interface/Play.py b/interface/Play.py
index 57d750cc9..58e3fe56d 100644
--- a/interface/Play.py
+++ b/interface/Play.py
@@ -3,32 +3,32 @@
 
 
 class play_match(object):
-	"""Interface to handle play between two players."""
-	def __init__(self, player1, player2, save_dir=None, size=19):
-		# super(ClassName, self).__init__()
-		self.player1 = player1
-		self.player2 = player2
-		self.state = GameState(size=size)
-		# I Propose that GameState should take a top-level save directory,
-		# then automatically generate the specific file name
+    """Interface to handle play between two players."""
+    def __init__(self, player1, player2, save_dir=None, size=19):
+        # super(ClassName, self).__init__()
+        self.player1 = player1
+        self.player2 = player2
+        self.state = GameState(size=size)
+        # I Propose that GameState should take a top-level save directory,
+        # then automatically generate the specific file name
 
-	def _play(self, player):
-		move = player.get_move(self.state)
-		# TODO: Fix is_eye?
-		self.state.do_move(move)  # Return max prob sensible legal move
-		# self.state.write_to_disk()
-		if len(self.state.history) > 1:
-			if self.state.history[-1] is None and self.state.history[-2] is None \
-				and self.state.current_player == -1:
-				end_of_game = True
-			else:
-				end_of_game = False
-		else:
-			end_of_game = False
-		return end_of_game
+    def _play(self, player):
+        move = player.get_move(self.state)
+        # TODO: Fix is_eye?
+        self.state.do_move(move)  # Return max prob sensible legal move
+        # self.state.write_to_disk()
+        if len(self.state.history) > 1:
+            if self.state.history[-1] is None and self.state.history[-2] is None \
+                and self.state.current_player == -1:
+                end_of_game = True
+            else:
+                end_of_game = False
+        else:
+            end_of_game = False
+        return end_of_game
 
-	def play(self):
-		"""Play one turn, update game state, save to disk"""
-		end_of_game = self._play(self.player1)
-		# This is incorrect.
-		return end_of_game
+    def play(self):
+        """Play one turn, update game state, save to disk"""
+        end_of_game = self._play(self.player1)
+        # This is incorrect.
+        return end_of_game
diff --git a/interface/gtp_wrapper.py b/interface/gtp_wrapper.py
index 437aa4ff8..89d35f242 100644
--- a/interface/gtp_wrapper.py
+++ b/interface/gtp_wrapper.py
@@ -6,148 +6,148 @@
 
 
 def run_gnugo(sgf_file_name, command):
-	from distutils import spawn
-	if spawn.find_executable('gnugo'):
-		from subprocess import Popen, PIPE
-		p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], stdout=PIPE, stdin=PIPE, stderr=PIPE)
-		out_bytes = p.communicate(input=command)[0]
-		return out_bytes.decode('utf-8')[2:]
-	else:
-		return ''
+    from distutils import spawn
+    if spawn.find_executable('gnugo'):
+        from subprocess import Popen, PIPE
+        p = Popen(['gnugo', '--chinese-rules', '--mode', 'gtp', '-l', sgf_file_name], stdout=PIPE, stdin=PIPE, stderr=PIPE)
+        out_bytes = p.communicate(input=command)[0]
+        return out_bytes.decode('utf-8')[2:]
+    else:
+        return ''
 
 
 class ExtendedGtpEngine(gtp.Engine):
 
-	recommended_handicaps = {
-		2: "D4 Q16",
-		3: "D4 Q16 D16",
-		4: "D4 Q16 D16 Q4",
-		5: "D4 Q16 D16 Q4 K10",
-		6: "D4 Q16 D16 Q4 D10 Q10",
-		7: "D4 Q16 D16 Q4 D10 Q10 K10",
-		8: "D4 Q16 D16 Q4 D10 Q10 K4 K16",
-		9: "D4 Q16 D16 Q4 D10 Q10 K4 K16 K10"
-	}
-
-	def call_gnugo(self, sgf_file_name, command):
-		try:
-			pool = multiprocessing.Pool(processes=1)
-			result = pool.apply_async(run_gnugo, (sgf_file_name, command))
-			output = result.get(timeout=10)
-			pool.close()
-			return output
-		except multiprocessing.TimeoutError:
-			pool.terminate()
-			# if can't get answer from GnuGo, return no result
-			return ''
-
-	def cmd_time_left(self, arguments):
-		pass
-
-	def cmd_place_free_handicap(self, arguments):
-		try:
-			number_of_stones = int(arguments)
-		except Exception:
-			raise ValueError('Number of handicaps could not be parsed: {}'.format(arguments))
-		if number_of_stones < 2 or number_of_stones > 9:
-			raise ValueError('Invalid number of handicap stones: {}'.format(number_of_stones))
-		vertex_string = ExtendedGtpEngine.recommended_handicaps[number_of_stones]
-		self.cmd_set_free_handicap(vertex_string)
-		return vertex_string
-
-	def cmd_set_free_handicap(self, arguments):
-		vertices = arguments.strip().split()
-		moves = [gtp.parse_vertex(vertex) for vertex in vertices]
-		self._game.place_handicaps(moves)
-
-	def cmd_final_score(self, arguments):
-		sgf_file_name = self._game.get_current_state_as_sgf()
-		return self.call_gnugo(sgf_file_name, 'final_score\n')
-
-	def cmd_final_status_list(self, arguments):
-		sgf_file_name = self._game.get_current_state_as_sgf()
-		return self.call_gnugo(sgf_file_name, 'final_status_list {}\n'.format(arguments))
-
-	def cmd_load_sgf(self, arguments):
-		pass
-
-	def cmd_save_sgf(self, arguments):
-		pass
-
-	# def cmd_kgs_genmove_cleanup(self, arguments):
-	# 	return self.cmd_genmove(arguments)
+    recommended_handicaps = {
+        2: "D4 Q16",
+        3: "D4 Q16 D16",
+        4: "D4 Q16 D16 Q4",
+        5: "D4 Q16 D16 Q4 K10",
+        6: "D4 Q16 D16 Q4 D10 Q10",
+        7: "D4 Q16 D16 Q4 D10 Q10 K10",
+        8: "D4 Q16 D16 Q4 D10 Q10 K4 K16",
+        9: "D4 Q16 D16 Q4 D10 Q10 K4 K16 K10"
+    }
+
+    def call_gnugo(self, sgf_file_name, command):
+        try:
+            pool = multiprocessing.Pool(processes=1)
+            result = pool.apply_async(run_gnugo, (sgf_file_name, command))
+            output = result.get(timeout=10)
+            pool.close()
+            return output
+        except multiprocessing.TimeoutError:
+            pool.terminate()
+            # if can't get answer from GnuGo, return no result
+            return ''
+
+    def cmd_time_left(self, arguments):
+        pass
+
+    def cmd_place_free_handicap(self, arguments):
+        try:
+            number_of_stones = int(arguments)
+        except Exception:
+            raise ValueError('Number of handicaps could not be parsed: {}'.format(arguments))
+        if number_of_stones < 2 or number_of_stones > 9:
+            raise ValueError('Invalid number of handicap stones: {}'.format(number_of_stones))
+        vertex_string = ExtendedGtpEngine.recommended_handicaps[number_of_stones]
+        self.cmd_set_free_handicap(vertex_string)
+        return vertex_string
+
+    def cmd_set_free_handicap(self, arguments):
+        vertices = arguments.strip().split()
+        moves = [gtp.parse_vertex(vertex) for vertex in vertices]
+        self._game.place_handicaps(moves)
+
+    def cmd_final_score(self, arguments):
+        sgf_file_name = self._game.get_current_state_as_sgf()
+        return self.call_gnugo(sgf_file_name, 'final_score\n')
+
+    def cmd_final_status_list(self, arguments):
+        sgf_file_name = self._game.get_current_state_as_sgf()
+        return self.call_gnugo(sgf_file_name, 'final_status_list {}\n'.format(arguments))
+
+    def cmd_load_sgf(self, arguments):
+        pass
+
+    def cmd_save_sgf(self, arguments):
+        pass
+
+    # def cmd_kgs_genmove_cleanup(self, arguments):
+    #     return self.cmd_genmove(arguments)
 
 
 class GTPGameConnector(object):
-	"""A class implementing the functions of a 'game' object required by the GTP
-	Engine by wrapping a GameState and Player instance
-	"""
-
-	def __init__(self, player):
-		self._state = go.GameState(enforce_superko=True)
-		self._player = player
-
-	def clear(self):
-		self._state = go.GameState(self._state.size, enforce_superko=True)
-
-	def make_move(self, color, vertex):
-		# vertex in GTP language is 1-indexed, whereas GameState's are zero-indexed
-		try:
-			if vertex == gtp.PASS:
-				self._state.do_move(go.PASS_MOVE)
-			else:
-				(x, y) = vertex
-				self._state.do_move((x - 1, y - 1), color)
-			return True
-		except go.IllegalMove:
-			return False
-
-	def set_size(self, n):
-		self._state = go.GameState(n, enforce_superko=True)
-
-	def set_komi(self, k):
-		self._state.komi = k
-
-	def get_move(self, color):
-		self._state.current_player = color
-		move = self._player.get_move(self._state)
-		if move == go.PASS_MOVE:
-			return gtp.PASS
-		else:
-			(x, y) = move
-			return (x + 1, y + 1)
-
-	def get_current_state_as_sgf(self):
-		from tempfile import NamedTemporaryFile
-		temp_file = NamedTemporaryFile(delete=False)
-		save_gamestate_to_sgf(self._state, '', temp_file.name)
-		return temp_file.name
-
-	def place_handicaps(self, vertices):
-		actions = []
-		for vertex in vertices:
-			(x, y) = vertex
-			actions.append((x - 1, y - 1))
-		self._state.place_handicaps(actions)
+    """A class implementing the functions of a 'game' object required by the GTP
+    Engine by wrapping a GameState and Player instance
+    """
+
+    def __init__(self, player):
+        self._state = go.GameState(enforce_superko=True)
+        self._player = player
+
+    def clear(self):
+        self._state = go.GameState(self._state.size, enforce_superko=True)
+
+    def make_move(self, color, vertex):
+        # vertex in GTP language is 1-indexed, whereas GameState's are zero-indexed
+        try:
+            if vertex == gtp.PASS:
+                self._state.do_move(go.PASS_MOVE)
+            else:
+                (x, y) = vertex
+                self._state.do_move((x - 1, y - 1), color)
+            return True
+        except go.IllegalMove:
+            return False
+
+    def set_size(self, n):
+        self._state = go.GameState(n, enforce_superko=True)
+
+    def set_komi(self, k):
+        self._state.komi = k
+
+    def get_move(self, color):
+        self._state.current_player = color
+        move = self._player.get_move(self._state)
+        if move == go.PASS_MOVE:
+            return gtp.PASS
+        else:
+            (x, y) = move
+            return (x + 1, y + 1)
+
+    def get_current_state_as_sgf(self):
+        from tempfile import NamedTemporaryFile
+        temp_file = NamedTemporaryFile(delete=False)
+        save_gamestate_to_sgf(self._state, '', temp_file.name)
+        return temp_file.name
+
+    def place_handicaps(self, vertices):
+        actions = []
+        for vertex in vertices:
+            (x, y) = vertex
+            actions.append((x - 1, y - 1))
+        self._state.place_handicaps(actions)
 
 
 def run_gtp(player_obj, inpt_fn=None, name="Gtp Player", version="0.0"):
-	gtp_game = GTPGameConnector(player_obj)
-	gtp_engine = ExtendedGtpEngine(gtp_game, name, version)
-	if inpt_fn is None:
-		inpt_fn = raw_input
-
-	sys.stderr.write("GTP engine ready\n")
-	sys.stderr.flush()
-	while not gtp_engine.disconnect:
-		inpt = inpt_fn()
-		# handle either single lines at a time
-		# or multiple commands separated by '\n'
-		try:
-			cmd_list = inpt.split("\n")
-		except:
-			cmd_list = [inpt]
-		for cmd in cmd_list:
-			engine_reply = gtp_engine.send(cmd)
-			sys.stdout.write(engine_reply)
-			sys.stdout.flush()
+    gtp_game = GTPGameConnector(player_obj)
+    gtp_engine = ExtendedGtpEngine(gtp_game, name, version)
+    if inpt_fn is None:
+        inpt_fn = raw_input
+
+    sys.stderr.write("GTP engine ready\n")
+    sys.stderr.flush()
+    while not gtp_engine.disconnect:
+        inpt = inpt_fn()
+        # handle either single lines at a time
+        # or multiple commands separated by '\n'
+        try:
+            cmd_list = inpt.split("\n")
+        except:
+            cmd_list = [inpt]
+        for cmd in cmd_list:
+            engine_reply = gtp_engine.send(cmd)
+            sys.stdout.write(engine_reply)
+            sys.stdout.flush()
diff --git a/tests/test_game_converter.py b/tests/test_game_converter.py
index 0d75c11e5..e0f4963f7 100644
--- a/tests/test_game_converter.py
+++ b/tests/test_game_converter.py
@@ -5,22 +5,22 @@
 
 
 class TestSGFLoading(unittest.TestCase):
-	def test_ab_aw(self):
-		with open('tests/test_data/sgf/ab_aw.sgf', 'r') as f:
-			sgf_to_gamestate(f.read())
+    def test_ab_aw(self):
+        with open('tests/test_data/sgf/ab_aw.sgf', 'r') as f:
+            sgf_to_gamestate(f.read())
 
 
 class TestCmdlineConverter(unittest.TestCase):
 
-	def test_directory_conversion(self):
-		args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data/sgf/']
-		run_game_converter(args)
-		os.remove('.tmp.testing.h5')
+    def test_directory_conversion(self):
+        args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data/sgf/']
+        run_game_converter(args)
+        os.remove('.tmp.testing.h5')
 
-	def test_directory_walk(self):
-		args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data', '--recurse']
-		run_game_converter(args)
-		os.remove('.tmp.testing.h5')
+    def test_directory_walk(self):
+        args = ['--features', 'board,ones,turns_since', '--outfile', '.tmp.testing.h5', '--directory', 'tests/test_data', '--recurse']
+        run_game_converter(args)
+        os.remove('.tmp.testing.h5')
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_gamestate.py b/tests/test_gamestate.py
index 9f3e9a7f9..f1f28818b 100644
--- a/tests/test_gamestate.py
+++ b/tests/test_gamestate.py
@@ -6,159 +6,159 @@
 
 class TestKo(unittest.TestCase):
 
-	def test_standard_ko(self):
-		gs = GameState(size=9)
-		gs.do_move((1, 0))  # B
-		gs.do_move((2, 0))  # W
-		gs.do_move((0, 1))  # B
-		gs.do_move((3, 1))  # W
-		gs.do_move((1, 2))  # B
-		gs.do_move((2, 2))  # W
-		gs.do_move((2, 1))  # B
-
-		gs.do_move((1, 1))  # W trigger capture and ko
-
-		self.assertEqual(gs.num_black_prisoners, 1)
-		self.assertEqual(gs.num_white_prisoners, 0)
-
-		self.assertFalse(gs.is_legal((2, 1)))
-
-		gs.do_move((5, 5))
-		gs.do_move((5, 6))
-
-		self.assertTrue(gs.is_legal((2, 1)))
-
-	def test_snapback_is_not_ko(self):
-		gs = GameState(size=5)
-		# B o W B .
-		# W W B . .
-		# . . . . .
-		# . . . . .
-		# . . . . .
-		# here, imagine black plays at 'o' capturing
-		# the white stone at (2, 0). White may play
-		# again at (2, 0) to capture the black stones
-		# at (0, 0), (1, 0). this is 'snapback' not 'ko'
-		# since it doesn't return the game to a
-		# previous position
-		B = [(0, 0), (2, 1), (3, 0)]
-		W = [(0, 1), (1, 1), (2, 0)]
-		for (b, w) in zip(B, W):
-			gs.do_move(b)
-			gs.do_move(w)
-		# do the capture of the single white stone
-		gs.do_move((1, 0))
-		# there should be no ko
-		self.assertIsNone(gs.ko)
-		self.assertTrue(gs.is_legal((2, 0)))
-		# now play the snapback
-		gs.do_move((2, 0))
-		# check that the numbers worked out
-		self.assertEqual(gs.num_black_prisoners, 2)
-		self.assertEqual(gs.num_white_prisoners, 1)
-
-	def test_positional_superko(self):
-		move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)]
-
-		gs = GameState(size=9)
-		for move in move_list:
-			gs.do_move(move)
-		self.assertTrue(gs.is_legal((1, 0)))
-
-		gs = GameState(size=9, enforce_superko=True)
-		for move in move_list:
-			gs.do_move(move)
-		self.assertFalse(gs.is_legal((1, 0)))
+    def test_standard_ko(self):
+        gs = GameState(size=9)
+        gs.do_move((1, 0))  # B
+        gs.do_move((2, 0))  # W
+        gs.do_move((0, 1))  # B
+        gs.do_move((3, 1))  # W
+        gs.do_move((1, 2))  # B
+        gs.do_move((2, 2))  # W
+        gs.do_move((2, 1))  # B
+
+        gs.do_move((1, 1))  # W trigger capture and ko
+
+        self.assertEqual(gs.num_black_prisoners, 1)
+        self.assertEqual(gs.num_white_prisoners, 0)
+
+        self.assertFalse(gs.is_legal((2, 1)))
+
+        gs.do_move((5, 5))
+        gs.do_move((5, 6))
+
+        self.assertTrue(gs.is_legal((2, 1)))
+
+    def test_snapback_is_not_ko(self):
+        gs = GameState(size=5)
+        # B o W B .
+        # W W B . .
+        # . . . . .
+        # . . . . .
+        # . . . . .
+        # here, imagine black plays at 'o' capturing
+        # the white stone at (2, 0). White may play
+        # again at (2, 0) to capture the black stones
+        # at (0, 0), (1, 0). this is 'snapback' not 'ko'
+        # since it doesn't return the game to a
+        # previous position
+        B = [(0, 0), (2, 1), (3, 0)]
+        W = [(0, 1), (1, 1), (2, 0)]
+        for (b, w) in zip(B, W):
+            gs.do_move(b)
+            gs.do_move(w)
+        # do the capture of the single white stone
+        gs.do_move((1, 0))
+        # there should be no ko
+        self.assertIsNone(gs.ko)
+        self.assertTrue(gs.is_legal((2, 0)))
+        # now play the snapback
+        gs.do_move((2, 0))
+        # check that the numbers worked out
+        self.assertEqual(gs.num_black_prisoners, 2)
+        self.assertEqual(gs.num_white_prisoners, 1)
+
+    def test_positional_superko(self):
+        move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)]
+
+        gs = GameState(size=9)
+        for move in move_list:
+            gs.do_move(move)
+        self.assertTrue(gs.is_legal((1, 0)))
+
+        gs = GameState(size=9, enforce_superko=True)
+        for move in move_list:
+            gs.do_move(move)
+        self.assertFalse(gs.is_legal((1, 0)))
 
 
 class TestEye(unittest.TestCase):
 
-	def test_simple_eye(self):
-
-		# create a black eye in top left (1, 1), white in bottom right (5, 5)
-
-		gs = GameState(size=7)
-		gs.do_move((1, 0))  # B
-		gs.do_move((5, 4))  # W
-		gs.do_move((2, 1))  # B
-		gs.do_move((6, 5))  # W
-		gs.do_move((1, 2))  # B
-		gs.do_move((5, 6))  # W
-		gs.do_move((0, 1))  # B
-		gs.do_move((4, 5))  # W
-
-		# test black eye top left
-		self.assertTrue(gs.is_eyeish((1, 1), go.BLACK))
-		self.assertFalse(gs.is_eyeish((1, 1), go.WHITE))
-
-		# test white eye bottom right
-		self.assertTrue(gs.is_eyeish((5, 5), go.WHITE))
-		self.assertFalse(gs.is_eyeish((5, 5), go.BLACK))
-
-		# test no eye in other random positions
-		self.assertFalse(gs.is_eyeish((1, 0), go.BLACK))
-		self.assertFalse(gs.is_eyeish((1, 0), go.WHITE))
-		self.assertFalse(gs.is_eyeish((2, 2), go.BLACK))
-		self.assertFalse(gs.is_eyeish((2, 2), go.WHITE))
-
-	def test_true_eye(self):
-		gs = GameState(size=7)
-		gs.do_move((1, 0), go.BLACK)
-		gs.do_move((0, 1), go.BLACK)
-
-		# false eye at 0, 0
-		self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
-		self.assertFalse(gs.is_eye((0, 0), go.BLACK))
-
-		# make it a true eye by turning the corner (1, 1) into an eye itself
-		gs.do_move((1, 2), go.BLACK)
-		gs.do_move((2, 1), go.BLACK)
-		gs.do_move((2, 2), go.BLACK)
-		gs.do_move((0, 2), go.BLACK)
-
-		self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
-		self.assertTrue(gs.is_eye((0, 0), go.BLACK))
-		self.assertTrue(gs.is_eye((1, 1), go.BLACK))
-
-	def test_eye_recursion(self):
-		# a checkerboard pattern of black is 'technically' all true eyes
-		# mutually supporting each other
-		gs = GameState(7)
-		for x in range(gs.size):
-			for y in range(gs.size):
-				if (x + y) % 2 == 1:
-					gs.do_move((x, y), go.BLACK)
-		self.assertTrue(gs.is_eye((0, 0), go.BLACK))
-
-	def test_liberties_after_capture(self):
-		# creates 3x3 black group in the middle, that is then all captured
-		# ...then an assertion is made that the resulting liberties after
-		# capture are the same as if the group had never been there
-		gs_capture = GameState(7)
-		gs_reference = GameState(7)
-		# add in 3x3 black stones
-		for x in range(2, 5):
-			for y in range(2, 5):
-				gs_capture.do_move((x, y), go.BLACK)
-		# surround the black group with white stones
-		# and set the same white stones in gs_reference
-		for x in range(2, 5):
-			gs_capture.do_move((x, 1), go.WHITE)
-			gs_capture.do_move((x, 5), go.WHITE)
-			gs_reference.do_move((x, 1), go.WHITE)
-			gs_reference.do_move((x, 5), go.WHITE)
-		gs_capture.do_move((1, 1), go.WHITE)
-		gs_reference.do_move((1, 1), go.WHITE)
-		for y in range(2, 5):
-			gs_capture.do_move((1, y), go.WHITE)
-			gs_capture.do_move((5, y), go.WHITE)
-			gs_reference.do_move((1, y), go.WHITE)
-			gs_reference.do_move((5, y), go.WHITE)
-
-		# board configuration and liberties of gs_capture and of gs_reference should be identical
-		self.assertTrue(np.all(gs_reference.board == gs_capture.board))
-		self.assertTrue(np.all(gs_reference.liberty_counts == gs_capture.liberty_counts))
+    def test_simple_eye(self):
+
+        # create a black eye in top left (1, 1), white in bottom right (5, 5)
+
+        gs = GameState(size=7)
+        gs.do_move((1, 0))  # B
+        gs.do_move((5, 4))  # W
+        gs.do_move((2, 1))  # B
+        gs.do_move((6, 5))  # W
+        gs.do_move((1, 2))  # B
+        gs.do_move((5, 6))  # W
+        gs.do_move((0, 1))  # B
+        gs.do_move((4, 5))  # W
+
+        # test black eye top left
+        self.assertTrue(gs.is_eyeish((1, 1), go.BLACK))
+        self.assertFalse(gs.is_eyeish((1, 1), go.WHITE))
+
+        # test white eye bottom right
+        self.assertTrue(gs.is_eyeish((5, 5), go.WHITE))
+        self.assertFalse(gs.is_eyeish((5, 5), go.BLACK))
+
+        # test no eye in other random positions
+        self.assertFalse(gs.is_eyeish((1, 0), go.BLACK))
+        self.assertFalse(gs.is_eyeish((1, 0), go.WHITE))
+        self.assertFalse(gs.is_eyeish((2, 2), go.BLACK))
+        self.assertFalse(gs.is_eyeish((2, 2), go.WHITE))
+
+    def test_true_eye(self):
+        gs = GameState(size=7)
+        gs.do_move((1, 0), go.BLACK)
+        gs.do_move((0, 1), go.BLACK)
+
+        # false eye at 0, 0
+        self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
+        self.assertFalse(gs.is_eye((0, 0), go.BLACK))
+
+        # make it a true eye by turning the corner (1, 1) into an eye itself
+        gs.do_move((1, 2), go.BLACK)
+        gs.do_move((2, 1), go.BLACK)
+        gs.do_move((2, 2), go.BLACK)
+        gs.do_move((0, 2), go.BLACK)
+
+        self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
+        self.assertTrue(gs.is_eye((0, 0), go.BLACK))
+        self.assertTrue(gs.is_eye((1, 1), go.BLACK))
+
+    def test_eye_recursion(self):
+        # a checkerboard pattern of black is 'technically' all true eyes
+        # mutually supporting each other
+        gs = GameState(7)
+        for x in range(gs.size):
+            for y in range(gs.size):
+                if (x + y) % 2 == 1:
+                    gs.do_move((x, y), go.BLACK)
+        self.assertTrue(gs.is_eye((0, 0), go.BLACK))
+
+    def test_liberties_after_capture(self):
+        # creates 3x3 black group in the middle, that is then all captured
+        # ...then an assertion is made that the resulting liberties after
+        # capture are the same as if the group had never been there
+        gs_capture = GameState(7)
+        gs_reference = GameState(7)
+        # add in 3x3 black stones
+        for x in range(2, 5):
+            for y in range(2, 5):
+                gs_capture.do_move((x, y), go.BLACK)
+        # surround the black group with white stones
+        # and set the same white stones in gs_reference
+        for x in range(2, 5):
+            gs_capture.do_move((x, 1), go.WHITE)
+            gs_capture.do_move((x, 5), go.WHITE)
+            gs_reference.do_move((x, 1), go.WHITE)
+            gs_reference.do_move((x, 5), go.WHITE)
+        gs_capture.do_move((1, 1), go.WHITE)
+        gs_reference.do_move((1, 1), go.WHITE)
+        for y in range(2, 5):
+            gs_capture.do_move((1, y), go.WHITE)
+            gs_capture.do_move((5, y), go.WHITE)
+            gs_reference.do_move((1, y), go.WHITE)
+            gs_reference.do_move((5, y), go.WHITE)
+
+        # board configuration and liberties of gs_capture and of gs_reference should be identical
+        self.assertTrue(np.all(gs_reference.board == gs_capture.board))
+        self.assertTrue(np.all(gs_reference.liberty_counts == gs_capture.liberty_counts))
 
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_gtp_wrapper.py b/tests/test_gtp_wrapper.py
index 5ab4ddb7f..345b09afa 100644
--- a/tests/test_gtp_wrapper.py
+++ b/tests/test_gtp_wrapper.py
@@ -5,26 +5,26 @@
 
 
 class PassPlayer(object):
-	def get_move(self, state):
-		return go.PASS_MOVE
+    def get_move(self, state):
+        return go.PASS_MOVE
 
 
 class TestGTPProcess(unittest.TestCase):
 
-	def test_run_commands(self):
-		def stdin_simulator():
-			return "\n".join([
-				"1 name",
-				"2 boardsize 19",
-				"3 clear_board",
-				"4 genmove black",
-				"5 genmove white",
-				"99 quit"])
+    def test_run_commands(self):
+        def stdin_simulator():
+            return "\n".join([
+                "1 name",
+                "2 boardsize 19",
+                "3 clear_board",
+                "4 genmove black",
+                "5 genmove white",
+                "99 quit"])
 
-		gtp_proc = Process(target=run_gtp, args=(PassPlayer(), stdin_simulator))
-		gtp_proc.start()
-		gtp_proc.join(timeout=1)
+        gtp_proc = Process(target=run_gtp, args=(PassPlayer(), stdin_simulator))
+        gtp_proc.start()
+        gtp_proc.join(timeout=1)
 
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_liberties.py b/tests/test_liberties.py
index 4ea0bb728..3bcca7a6b 100644
--- a/tests/test_liberties.py
+++ b/tests/test_liberties.py
@@ -4,40 +4,40 @@
 
 class TestLiberties(unittest.TestCase):
 
-	def setUp(self):
-		self.s = GameState()
-		self.s.do_move((4, 5))
-		self.s.do_move((5, 5))
-		self.s.do_move((5, 6))
-		self.s.do_move((10, 10))
-		self.s.do_move((4, 6))
-		self.s.do_move((10, 11))
-		self.s.do_move((6, 6))
-		self.s.do_move((9, 10))
-
-	def test_curr_liberties(self):
-		self.assertEqual(self.s.liberty_counts[5][5], 2)
-		self.assertEqual(self.s.liberty_counts[4][5], 8)
-		self.assertEqual(self.s.liberty_counts[5][6], 8)
-
-	def test_neighbors_edge_cases(self):
-
-		st = GameState()
-		st.do_move((0, 0))  # B B . . . . .
-		st.do_move((5, 5))  # B W . . . . .
-		st.do_move((0, 1))  # . . . . . . .
-		st.do_move((6, 6))  # . . . . . . .
-		st.do_move((1, 0))  # . . . . . W .
-		st.do_move((1, 1))  # . . . . . . W
-
-		# get_group in the corner
-		self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner")
-
-		# get_group of an empty space
-		self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space")
-
-		# get_group of a single piece
-		self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece")
+    def setUp(self):
+        self.s = GameState()
+        self.s.do_move((4, 5))
+        self.s.do_move((5, 5))
+        self.s.do_move((5, 6))
+        self.s.do_move((10, 10))
+        self.s.do_move((4, 6))
+        self.s.do_move((10, 11))
+        self.s.do_move((6, 6))
+        self.s.do_move((9, 10))
+
+    def test_curr_liberties(self):
+        self.assertEqual(self.s.liberty_counts[5][5], 2)
+        self.assertEqual(self.s.liberty_counts[4][5], 8)
+        self.assertEqual(self.s.liberty_counts[5][6], 8)
+
+    def test_neighbors_edge_cases(self):
+
+        st = GameState()
+        st.do_move((0, 0))  # B B . . . . .
+        st.do_move((5, 5))  # B W . . . . .
+        st.do_move((0, 1))  # . . . . . . .
+        st.do_move((6, 6))  # . . . . . . .
+        st.do_move((1, 0))  # . . . . . W .
+        st.do_move((1, 1))  # . . . . . . W
+
+        # get_group in the corner
+        self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner")
+
+        # get_group of an empty space
+        self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space")
+
+        # get_group of a single piece
+        self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece")
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_mcts.py b/tests/test_mcts.py
index c6ab6f803..3537515a9 100644
--- a/tests/test_mcts.py
+++ b/tests/test_mcts.py
@@ -6,109 +6,109 @@
 
 class TestTreeNode(unittest.TestCase):
 
-	def setUp(self):
-		self.gs = GameState()
-		self.node = TreeNode(None, 1.0)
-
-	def test_selection(self):
-		self.node.expand(dummy_policy(self.gs))
-		action, next_node = self.node.select()
-		self.assertEqual(action, (18, 18))  # according to the dummy policy below
-		self.assertIsNotNone(next_node)
-
-	def test_expansion(self):
-		self.assertEqual(0, len(self.node._children))
-		self.node.expand(dummy_policy(self.gs))
-		self.assertEqual(19 * 19, len(self.node._children))
-		for a, p in dummy_policy(self.gs):
-			self.assertEqual(p, self.node._children[a]._P)
-
-	def test_update(self):
-		self.node.expand(dummy_policy(self.gs))
-		child = self.node._children[(18, 18)]
-		# Note: the root must be updated first for the visit count to work.
-		self.node.update(leaf_value=1.0, c_puct=5.0)
-		child.update(leaf_value=1.0, c_puct=5.0)
-		expected_score = 1.0 + 5.0 * dummy_distribution[-1] * 0.5
-		self.assertEqual(expected_score, child.get_value())
-		# After a second update, the Q value should be the average of the two, and the u value
-		# should be multiplied by  sqrt(parent visits) / (node visits + 1) (which was simply equal
-		# to 0.5 before)
-		self.node.update(leaf_value=0.0, c_puct=5.0)
-		child.update(leaf_value=0.0, c_puct=5.0)
-		expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0
-		self.assertEqual(expected_score, child.get_value())
-
-	def test_update_recursive(self):
-		# Assertions are identical to test_treenode_update.
-		self.node.expand(dummy_policy(self.gs))
-		child = self.node._children[(18, 18)]
-		child.update_recursive(leaf_value=1.0, c_puct=5.0)
-		expected_score = 1.0 + 5.0 * dummy_distribution[-1] / 2.0
-		self.assertEqual(expected_score, child.get_value())
-		child.update_recursive(leaf_value=0.0, c_puct=5.0)
-		expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0
-		self.assertEqual(expected_score, child.get_value())
+    def setUp(self):
+        self.gs = GameState()
+        self.node = TreeNode(None, 1.0)
+
+    def test_selection(self):
+        self.node.expand(dummy_policy(self.gs))
+        action, next_node = self.node.select()
+        self.assertEqual(action, (18, 18))  # according to the dummy policy below
+        self.assertIsNotNone(next_node)
+
+    def test_expansion(self):
+        self.assertEqual(0, len(self.node._children))
+        self.node.expand(dummy_policy(self.gs))
+        self.assertEqual(19 * 19, len(self.node._children))
+        for a, p in dummy_policy(self.gs):
+            self.assertEqual(p, self.node._children[a]._P)
+
+    def test_update(self):
+        self.node.expand(dummy_policy(self.gs))
+        child = self.node._children[(18, 18)]
+        # Note: the root must be updated first for the visit count to work.
+        self.node.update(leaf_value=1.0, c_puct=5.0)
+        child.update(leaf_value=1.0, c_puct=5.0)
+        expected_score = 1.0 + 5.0 * dummy_distribution[-1] * 0.5
+        self.assertEqual(expected_score, child.get_value())
+        # After a second update, the Q value should be the average of the two, and the u value
+        # should be multiplied by  sqrt(parent visits) / (node visits + 1) (which was simply equal
+        # to 0.5 before)
+        self.node.update(leaf_value=0.0, c_puct=5.0)
+        child.update(leaf_value=0.0, c_puct=5.0)
+        expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0
+        self.assertEqual(expected_score, child.get_value())
+
+    def test_update_recursive(self):
+        # Assertions are identical to test_treenode_update.
+        self.node.expand(dummy_policy(self.gs))
+        child = self.node._children[(18, 18)]
+        child.update_recursive(leaf_value=1.0, c_puct=5.0)
+        expected_score = 1.0 + 5.0 * dummy_distribution[-1] / 2.0
+        self.assertEqual(expected_score, child.get_value())
+        child.update_recursive(leaf_value=0.0, c_puct=5.0)
+        expected_score = 0.5 + 5.0 * dummy_distribution[-1] * np.sqrt(2.0) / 3.0
+        self.assertEqual(expected_score, child.get_value())
 
 
 class TestMCTS(unittest.TestCase):
 
-	def setUp(self):
-		self.gs = GameState()
-		self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2)
-
-	def _count_expansions(self):
-		"""Helper function to count the number of expansions past the root using the dummy policy
-		"""
-		node = self.mcts._root
-		expansions = 0
-		# Loop over actions in decreasing probability.
-		for action, _ in sorted(dummy_policy(self.gs), key=lambda (a, p): p, reverse=True):
-			if action in node._children:
-				expansions += 1
-				node = node._children[action]
-			else:
-				break
-		return expansions
-
-	def test_playout(self):
-		self.mcts._playout(self.gs.copy(), 8)
-		# Assert that the most likely child was visited (according to the dummy policy below).
-		self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
-		# Assert that the search depth expanded nodes 8 times.
-		self.assertEqual(8, self._count_expansions())
-
-	def test_playout_with_pass(self):
-		# Test that playout handles the end of the game (i.e. passing/no moves). Mock this by
-		# creating a policy that returns nothing after 4 moves.
-		def stop_early_policy(state):
-			if len(state.history) <= 4:
-				return dummy_policy(state)
-			else:
-				return []
-		self.mcts = MCTS(dummy_value, stop_early_policy, stop_early_policy, n_playout=2)
-		self.mcts._playout(self.gs.copy(), 8)
-		# Assert that (18, 18) and (18, 17) are still only visited once.
-		self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
-		# Assert that no expansions happened after reaching the "end" in 4 moves.
-		self.assertEqual(5, self._count_expansions())
-
-	def test_get_move(self):
-		move = self.mcts.get_move(self.gs)
-		self.mcts.update_with_move(move)
-		# success if no errors
-
-	def test_update_with_move(self):
-		move = self.mcts.get_move(self.gs)
-		self.gs.do_move(move)
-		self.mcts.update_with_move(move)
-		# Assert that the new root still has children.
-		self.assertTrue(len(self.mcts._root._children) > 0)
-		# Assert that the new root has no parent (the rest of the tree will be garbage collected).
-		self.assertIsNone(self.mcts._root._parent)
-		# Assert that the next best move according to the root is (18, 17), according to the
-		# dummy policy below.
-		self.assertEqual((18, 17), self.mcts._root.select()[0])
+    def setUp(self):
+        self.gs = GameState()
+        self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2)
+
+    def _count_expansions(self):
+        """Helper function to count the number of expansions past the root using the dummy policy
+        """
+        node = self.mcts._root
+        expansions = 0
+        # Loop over actions in decreasing probability.
+        for action, _ in sorted(dummy_policy(self.gs), key=lambda (a, p): p, reverse=True):
+            if action in node._children:
+                expansions += 1
+                node = node._children[action]
+            else:
+                break
+        return expansions
+
+    def test_playout(self):
+        self.mcts._playout(self.gs.copy(), 8)
+        # Assert that the most likely child was visited (according to the dummy policy below).
+        self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
+        # Assert that the search depth expanded nodes 8 times.
+        self.assertEqual(8, self._count_expansions())
+
+    def test_playout_with_pass(self):
+        # Test that playout handles the end of the game (i.e. passing/no moves). Mock this by
+        # creating a policy that returns nothing after 4 moves.
+        def stop_early_policy(state):
+            if len(state.history) <= 4:
+                return dummy_policy(state)
+            else:
+                return []
+        self.mcts = MCTS(dummy_value, stop_early_policy, stop_early_policy, n_playout=2)
+        self.mcts._playout(self.gs.copy(), 8)
+        # Assert that (18, 18) and (18, 17) are still only visited once.
+        self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
+        # Assert that no expansions happened after reaching the "end" in 4 moves.
+        self.assertEqual(5, self._count_expansions())
+
+    def test_get_move(self):
+        move = self.mcts.get_move(self.gs)
+        self.mcts.update_with_move(move)
+        # success if no errors
+
+    def test_update_with_move(self):
+        move = self.mcts.get_move(self.gs)
+        self.gs.do_move(move)
+        self.mcts.update_with_move(move)
+        # Assert that the new root still has children.
+        self.assertTrue(len(self.mcts._root._children) > 0)
+        # Assert that the new root has no parent (the rest of the tree will be garbage collected).
+        self.assertIsNone(self.mcts._root._parent)
+        # Assert that the next best move according to the root is (18, 17), according to the
+        # dummy policy below.
+        self.assertEqual((18, 17), self.mcts._root.select()[0])
 
 
 # A distribution over positions that is smallest at (0,0) and largest at (18,18)
@@ -117,17 +117,17 @@ def test_update_with_move(self):
 
 
 def dummy_policy(state):
-	moves = state.get_legal_moves(include_eyes=False)
-	return zip(moves, dummy_distribution)
+    moves = state.get_legal_moves(include_eyes=False)
+    return zip(moves, dummy_distribution)
 
 # Rollout is a clone of the policy function.
 dummy_rollout = dummy_policy
 
 
 def dummy_value(state):
-	# it's not very confident
-	return 0.0
+    # it's not very confident
+    return 0.0
 
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_policy.py b/tests/test_policy.py
index ddca1de6a..9494f4765 100644
--- a/tests/test_policy.py
+++ b/tests/test_policy.py
@@ -9,143 +9,143 @@
 
 class TestCNNPolicy(unittest.TestCase):
 
-	def test_default_policy(self):
-		policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
-		policy.eval_state(GameState())
-		# just hope nothing breaks
+    def test_default_policy(self):
+        policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
+        policy.eval_state(GameState())
+        # just hope nothing breaks
 
-	def test_batch_eval_state(self):
-		policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
-		results = policy.batch_eval_state([GameState(), GameState()])
-		self.assertEqual(len(results), 2)  # one result per GameState
-		self.assertEqual(len(results[0]), 361)  # each one has 361 (move,prob) pairs
+    def test_batch_eval_state(self):
+        policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
+        results = policy.batch_eval_state([GameState(), GameState()])
+        self.assertEqual(len(results), 2)  # one result per GameState
+        self.assertEqual(len(results[0]), 361)  # each one has 361 (move,prob) pairs
 
-	def test_output_size(self):
-		policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19)
-		output = policy19.forward(policy19.preprocessor.state_to_tensor(GameState(19)))
-		self.assertEqual(output.shape, (1, 19 * 19))
+    def test_output_size(self):
+        policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19)
+        output = policy19.forward(policy19.preprocessor.state_to_tensor(GameState(19)))
+        self.assertEqual(output.shape, (1, 19 * 19))
 
-		policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13)
-		output = policy13.forward(policy13.preprocessor.state_to_tensor(GameState(13)))
-		self.assertEqual(output.shape, (1, 13 * 13))
+        policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13)
+        output = policy13.forward(policy13.preprocessor.state_to_tensor(GameState(13)))
+        self.assertEqual(output.shape, (1, 13 * 13))
 
-	def test_save_load(self):
-		policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
+    def test_save_load(self):
+        policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"])
 
-		model_file = 'TESTPOLICY.json'
-		weights_file = 'TESTWEIGHTS.h5'
-		model_file2 = 'TESTPOLICY2.json'
-		weights_file2 = 'TESTWEIGHTS2.h5'
+        model_file = 'TESTPOLICY.json'
+        weights_file = 'TESTWEIGHTS.h5'
+        model_file2 = 'TESTPOLICY2.json'
+        weights_file2 = 'TESTWEIGHTS2.h5'
 
-		# test saving model/weights separately
-		policy.save_model(model_file)
-		policy.model.save_weights(weights_file, overwrite=True)
-		# test saving them together
-		policy.save_model(model_file2, weights_file2)
+        # test saving model/weights separately
+        policy.save_model(model_file)
+        policy.model.save_weights(weights_file, overwrite=True)
+        # test saving them together
+        policy.save_model(model_file2, weights_file2)
 
-		copypolicy = CNNPolicy.load_model(model_file)
-		copypolicy.model.load_weights(weights_file)
+        copypolicy = CNNPolicy.load_model(model_file)
+        copypolicy.model.load_weights(weights_file)
 
-		copypolicy2 = CNNPolicy.load_model(model_file2)
+        copypolicy2 = CNNPolicy.load_model(model_file2)
 
-		for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()):
-			self.assertTrue(np.all(w1 == w2))
+        for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()):
+            self.assertTrue(np.all(w1 == w2))
 
-		os.remove(model_file)
-		os.remove(weights_file)
-		os.remove(model_file2)
-		os.remove(weights_file2)
+        os.remove(model_file)
+        os.remove(weights_file)
+        os.remove(model_file2)
+        os.remove(weights_file2)
 
 
 class TestResnetPolicy(unittest.TestCase):
-	def test_default_policy(self):
-		policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
-		policy.eval_state(GameState())
-		# just hope nothing breaks
+    def test_default_policy(self):
+        policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
+        policy.eval_state(GameState())
+        # just hope nothing breaks
 
-	def test_batch_eval_state(self):
-		policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
-		results = policy.batch_eval_state([GameState(), GameState()])
-		self.assertEqual(len(results), 2)  # one result per GameState
-		self.assertEqual(len(results[0]), 361)  # each one has 361 (move,prob) pairs
+    def test_batch_eval_state(self):
+        policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
+        results = policy.batch_eval_state([GameState(), GameState()])
+        self.assertEqual(len(results), 2)  # one result per GameState
+        self.assertEqual(len(results[0]), 361)  # each one has 361 (move,prob) pairs
 
-	def test_save_load(self):
-		"""Identical to above test_save_load
-		"""
-		policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
+    def test_save_load(self):
+        """Identical to above test_save_load
+        """
+        policy = ResnetPolicy(["board", "liberties", "sensibleness", "capture_size"])
 
-		model_file = 'TESTPOLICY.json'
-		weights_file = 'TESTWEIGHTS.h5'
-		model_file2 = 'TESTPOLICY2.json'
-		weights_file2 = 'TESTWEIGHTS2.h5'
+        model_file = 'TESTPOLICY.json'
+        weights_file = 'TESTWEIGHTS.h5'
+        model_file2 = 'TESTPOLICY2.json'
+        weights_file2 = 'TESTWEIGHTS2.h5'
 
-		# test saving model/weights separately
-		policy.save_model(model_file)
-		policy.model.save_weights(weights_file, overwrite=True)
-		# test saving them together
-		policy.save_model(model_file2, weights_file2)
+        # test saving model/weights separately
+        policy.save_model(model_file)
+        policy.model.save_weights(weights_file, overwrite=True)
+        # test saving them together
+        policy.save_model(model_file2, weights_file2)
 
-		copypolicy = ResnetPolicy.load_model(model_file)
-		copypolicy.model.load_weights(weights_file)
+        copypolicy = ResnetPolicy.load_model(model_file)
+        copypolicy.model.load_weights(weights_file)
 
-		copypolicy2 = ResnetPolicy.load_model(model_file2)
+        copypolicy2 = ResnetPolicy.load_model(model_file2)
 
-		for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()):
-			self.assertTrue(np.all(w1 == w2))
+        for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()):
+            self.assertTrue(np.all(w1 == w2))
 
-		# check that save/load keeps the ResnetPolicy class
-		self.assertTrue(type(policy) == type(copypolicy))
+        # check that save/load keeps the ResnetPolicy class
+        self.assertTrue(type(policy) == type(copypolicy))
 
-		os.remove(model_file)
-		os.remove(weights_file)
-		os.remove(model_file2)
-		os.remove(weights_file2)
+        os.remove(model_file)
+        os.remove(weights_file)
+        os.remove(model_file2)
+        os.remove(weights_file2)
 
 
 class TestPlayers(unittest.TestCase):
 
-	def test_greedy_player(self):
-		gs = GameState()
-		policy = CNNPolicy(["board", "ones", "turns_since"])
-		player = GreedyPolicyPlayer(policy)
-		for i in range(20):
-			move = player.get_move(gs)
-			self.assertIsNotNone(move)
-			gs.do_move(move)
-
-	def test_probabilistic_player(self):
-		gs = GameState()
-		policy = CNNPolicy(["board", "ones", "turns_since"])
-		player = ProbabilisticPolicyPlayer(policy)
-		for i in range(20):
-			move = player.get_move(gs)
-			self.assertIsNotNone(move)
-			gs.do_move(move)
-
-	def test_sensible_probabilistic(self):
-		gs = GameState()
-		policy = CNNPolicy(["board", "ones", "turns_since"])
-		player = ProbabilisticPolicyPlayer(policy)
-		empty = (10, 10)
-		for x in range(19):
-			for y in range(19):
-				if (x, y) != empty:
-					gs.do_move((x, y), go.BLACK)
-		gs.current_player = go.BLACK
-		self.assertIsNone(player.get_move(gs))
-
-	def test_sensible_greedy(self):
-		gs = GameState()
-		policy = CNNPolicy(["board", "ones", "turns_since"])
-		player = GreedyPolicyPlayer(policy)
-		empty = (10, 10)
-		for x in range(19):
-			for y in range(19):
-				if (x, y) != empty:
-					gs.do_move((x, y), go.BLACK)
-		gs.current_player = go.BLACK
-		self.assertIsNone(player.get_move(gs))
+    def test_greedy_player(self):
+        gs = GameState()
+        policy = CNNPolicy(["board", "ones", "turns_since"])
+        player = GreedyPolicyPlayer(policy)
+        for i in range(20):
+            move = player.get_move(gs)
+            self.assertIsNotNone(move)
+            gs.do_move(move)
+
+    def test_probabilistic_player(self):
+        gs = GameState()
+        policy = CNNPolicy(["board", "ones", "turns_since"])
+        player = ProbabilisticPolicyPlayer(policy)
+        for i in range(20):
+            move = player.get_move(gs)
+            self.assertIsNotNone(move)
+            gs.do_move(move)
+
+    def test_sensible_probabilistic(self):
+        gs = GameState()
+        policy = CNNPolicy(["board", "ones", "turns_since"])
+        player = ProbabilisticPolicyPlayer(policy)
+        empty = (10, 10)
+        for x in range(19):
+            for y in range(19):
+                if (x, y) != empty:
+                    gs.do_move((x, y), go.BLACK)
+        gs.current_player = go.BLACK
+        self.assertIsNone(player.get_move(gs))
+
+    def test_sensible_greedy(self):
+        gs = GameState()
+        policy = CNNPolicy(["board", "ones", "turns_since"])
+        player = GreedyPolicyPlayer(policy)
+        empty = (10, 10)
+        for x in range(19):
+            for y in range(19):
+                if (x, y) != empty:
+                    gs.do_move((x, y), go.BLACK)
+        gs.current_player = go.BLACK
+        self.assertIsNone(player.get_move(gs))
 
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 509abeb26..a21e1851d 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -5,332 +5,332 @@
 
 
 def simple_board():
-	# make a tiny board for the sake of testing and hand-coding expected results
-	#
-	#         X
-	#   0 1 2 3 4 5 6
-	#   B W . . . . . 0
-	#   B W . . . . . 1
-	#   B . . . B . . 2
-	# Y . . . B k B . 3
-	#   . . . W B W . 4
-	#   . . . . W . . 5
-	#   . . . . . . . 6
-	#
-	# where k is a ko position (white was just captured)
-
-	gs = go.GameState(size=7)
-
-	# ladder-looking thing in the top-left
-	gs.do_move((0, 0))  # B
-	gs.do_move((1, 0))  # W
-	gs.do_move((0, 1))  # B
-	gs.do_move((1, 1))  # W
-	gs.do_move((0, 2))  # B
-
-	# ko position in the middle
-	gs.do_move((3, 4))  # W
-	gs.do_move((3, 3))  # B
-	gs.do_move((4, 5))  # W
-	gs.do_move((4, 2))  # B
-	gs.do_move((5, 4))  # W
-	gs.do_move((5, 3))  # B
-	gs.do_move((4, 3))  # W - the ko position
-	gs.do_move((4, 4))  # B - does the capture
-
-	return gs
+    # make a tiny board for the sake of testing and hand-coding expected results
+    #
+    #         X
+    #   0 1 2 3 4 5 6
+    #   B W . . . . . 0
+    #   B W . . . . . 1
+    #   B . . . B . . 2
+    # Y . . . B k B . 3
+    #   . . . W B W . 4
+    #   . . . . W . . 5
+    #   . . . . . . . 6
+    #
+    # where k is a ko position (white was just captured)
+
+    gs = go.GameState(size=7)
+
+    # ladder-looking thing in the top-left
+    gs.do_move((0, 0))  # B
+    gs.do_move((1, 0))  # W
+    gs.do_move((0, 1))  # B
+    gs.do_move((1, 1))  # W
+    gs.do_move((0, 2))  # B
+
+    # ko position in the middle
+    gs.do_move((3, 4))  # W
+    gs.do_move((3, 3))  # B
+    gs.do_move((4, 5))  # W
+    gs.do_move((4, 2))  # B
+    gs.do_move((5, 4))  # W
+    gs.do_move((5, 3))  # B
+    gs.do_move((4, 3))  # W - the ko position
+    gs.do_move((4, 4))  # B - does the capture
+
+    return gs
 
 
 def self_atari_board():
-	# another tiny board for testing self-atari specifically.
-	# positions marked with 'a' are self-atari for black
-	#
-	#         X
-	#   0 1 2 3 4 5 6
-	#   a W . . . W B 0
-	#   . . . . . . . 1
-	#   . . . . . . . 2
-	# Y . . W . W . . 3
-	#   . W B a B W . 4
-	#   . . W W W . . 5
-	#   . . . . . . . 6
-	#
-	# current_player = black
-	gs = go.GameState(size=7)
-
-	gs.do_move((2, 4), go.BLACK)
-	gs.do_move((4, 4), go.BLACK)
-	gs.do_move((6, 0), go.BLACK)
-
-	gs.do_move((1, 0), go.WHITE)
-	gs.do_move((5, 0), go.WHITE)
-	gs.do_move((2, 3), go.WHITE)
-	gs.do_move((4, 3), go.WHITE)
-	gs.do_move((1, 4), go.WHITE)
-	gs.do_move((5, 4), go.WHITE)
-	gs.do_move((2, 5), go.WHITE)
-	gs.do_move((3, 5), go.WHITE)
-	gs.do_move((4, 5), go.WHITE)
-
-	return gs
+    # another tiny board for testing self-atari specifically.
+    # positions marked with 'a' are self-atari for black
+    #
+    #         X
+    #   0 1 2 3 4 5 6
+    #   a W . . . W B 0
+    #   . . . . . . . 1
+    #   . . . . . . . 2
+    # Y . . W . W . . 3
+    #   . W B a B W . 4
+    #   . . W W W . . 5
+    #   . . . . . . . 6
+    #
+    # current_player = black
+    gs = go.GameState(size=7)
+
+    gs.do_move((2, 4), go.BLACK)
+    gs.do_move((4, 4), go.BLACK)
+    gs.do_move((6, 0), go.BLACK)
+
+    gs.do_move((1, 0), go.WHITE)
+    gs.do_move((5, 0), go.WHITE)
+    gs.do_move((2, 3), go.WHITE)
+    gs.do_move((4, 3), go.WHITE)
+    gs.do_move((1, 4), go.WHITE)
+    gs.do_move((5, 4), go.WHITE)
+    gs.do_move((2, 5), go.WHITE)
+    gs.do_move((3, 5), go.WHITE)
+    gs.do_move((4, 5), go.WHITE)
+
+    return gs
 
 
 def capture_board():
-	# another small board, this one with imminent captures
-	#
-	#         X
-	#   0 1 2 3 4 5 6
-	#   . . B B . . . 0
-	#   . B W W B . . 1
-	#   . B W . . . . 2
-	# Y . . B . . . . 3
-	#   . . . . W B . 4
-	#   . . . W . W B 5
-	#   . . . . W B . 6
-	#
-	# current_player = black
-	gs = go.GameState(size=7)
-
-	black = [(2, 0), (3, 0), (1, 1), (4, 1), (1, 2), (2, 3), (5, 4), (6, 5), (5, 6)]
-	white = [(2, 1), (3, 1), (2, 2), (4, 4), (3, 5), (5, 5), (4, 6)]
-
-	for B in black:
-		gs.do_move(B, go.BLACK)
-	for W in white:
-		gs.do_move(W, go.WHITE)
-	gs.current_player = go.BLACK
-
-	return gs
+    # another small board, this one with imminent captures
+    #
+    #         X
+    #   0 1 2 3 4 5 6
+    #   . . B B . . . 0
+    #   . B W W B . . 1
+    #   . B W . . . . 2
+    # Y . . B . . . . 3
+    #   . . . . W B . 4
+    #   . . . W . W B 5
+    #   . . . . W B . 6
+    #
+    # current_player = black
+    gs = go.GameState(size=7)
+
+    black = [(2, 0), (3, 0), (1, 1), (4, 1), (1, 2), (2, 3), (5, 4), (6, 5), (5, 6)]
+    white = [(2, 1), (3, 1), (2, 2), (4, 4), (3, 5), (5, 5), (4, 6)]
+
+    for B in black:
+        gs.do_move(B, go.BLACK)
+    for W in white:
+        gs.do_move(W, go.WHITE)
+    gs.current_player = go.BLACK
+
+    return gs
 
 
 class TestPreprocessingFeatures(unittest.TestCase):
-	"""Test the functions in preprocessing.py
-
-	note that the hand-coded features look backwards from what is depicted
-	in simple_board() because of the x/y column/row transpose thing (i.e.
-	numpy is typically thought of as indexing rows first, but we use (x,y)
-	indexes, so a numpy row is like a go column and vice versa)
-	"""
-
-	def test_get_board(self):
-		gs = simple_board()
-		pp = Preprocess(["board"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		white_pos = np.asarray([
-			[0, 0, 0, 0, 0, 0, 0],
-			[1, 1, 0, 0, 0, 0, 0],
-			[0, 0, 0, 0, 0, 0, 0],
-			[0, 0, 0, 0, 1, 0, 0],
-			[0, 0, 0, 0, 0, 1, 0],
-			[0, 0, 0, 0, 1, 0, 0],
-			[0, 0, 0, 0, 0, 0, 0]])
-		black_pos = np.asarray([
-			[1, 1, 1, 0, 0, 0, 0],
-			[0, 0, 0, 0, 0, 0, 0],
-			[0, 0, 0, 0, 0, 0, 0],
-			[0, 0, 0, 1, 0, 0, 0],
-			[0, 0, 1, 0, 1, 0, 0],
-			[0, 0, 0, 1, 0, 0, 0],
-			[0, 0, 0, 0, 0, 0, 0]])
-		empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos)
-
-		# check number of planes
-		self.assertEqual(feature.shape, (gs.size, gs.size, 3))
-		# check return value against hand-coded expectation
-		# (given that current_player is white)
-		self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
-
-	def test_get_turns_since(self):
-		gs = simple_board()
-		pp = Preprocess(["turns_since"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		one_hot_turns = np.zeros((gs.size, gs.size, 8))
-
-		rev_moves = gs.history[::-1]
-
-		for x in range(gs.size):
-			for y in range(gs.size):
-				if gs.board[x, y] != go.EMPTY:
-					# find most recent move at x, y
-					age = rev_moves.index((x, y))
-					one_hot_turns[x, y, min(age, 7)] = 1
-
-		self.assertTrue(np.all(feature == one_hot_turns))
-
-	def test_get_liberties(self):
-		gs = simple_board()
-		pp = Preprocess(["liberties"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		# todo - test liberties when > 8
-
-		one_hot_liberties = np.zeros((gs.size, gs.size, 8))
-		# black piece at (4,4) has a single liberty: (4,3)
-		one_hot_liberties[4, 4, 0] = 1
-
-		# the black group in the top left corner has 2 liberties
-		one_hot_liberties[0, 0:3, 1] = 1
-		# 	.. as do the white pieces on the left and right of the eye
-		one_hot_liberties[3, 4, 1] = 1
-		one_hot_liberties[5, 4, 1] = 1
-
-		# the white group in the top left corner has 3 liberties
-		one_hot_liberties[1, 0:2, 2] = 1
-		# 	...as does the white piece at (4,5)
-		one_hot_liberties[4, 5, 2] = 1
-		# 	...and the black pieces on the sides of the eye
-		one_hot_liberties[3, 3, 2] = 1
-		one_hot_liberties[5, 3, 2] = 1
-
-		# the black piece at (4,2) has 4 liberties
-		one_hot_liberties[4, 2, 3] = 1
-
-		for i in range(8):
-			self.assertTrue(
-				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
-				"bad expectation: stones with %d liberties" % (i + 1))
-
-	def test_get_capture_size(self):
-		gs = capture_board()
-		pp = Preprocess(["capture_size"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		score_before = gs.num_white_prisoners
-		one_hot_capture = np.zeros((gs.size, gs.size, 8))
-		# there is no capture available; all legal moves are zero-capture
-		for (x, y) in gs.get_legal_moves():
-			copy = gs.copy()
-			copy.do_move((x, y))
-			num_captured = copy.num_white_prisoners - score_before
-			one_hot_capture[x, y, min(7, num_captured)] = 1
-
-		for i in range(8):
-			self.assertTrue(
-				np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
-				"bad expectation: capturing %d stones" % i)
-
-	def test_get_self_atari_size(self):
-		gs = self_atari_board()
-		pp = Preprocess(["self_atari_size"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
-		# self atari of size 1 at position 0,0
-		one_hot_self_atari[0, 0, 0] = 1
-		# self atari of size 3 at position 3,4
-		one_hot_self_atari[3, 4, 2] = 1
-
-		self.assertTrue(np.all(feature == one_hot_self_atari))
-
-	def test_get_self_atari_size_cap(self):
-		gs = capture_board()
-		pp = Preprocess(["self_atari_size"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
-		# self atari of size 1 at the ko position and just below it
-		one_hot_self_atari[4, 5, 0] = 1
-		one_hot_self_atari[3, 6, 0] = 1
-		# self atari of size 3 at bottom corner
-		one_hot_self_atari[6, 6, 2] = 1
-
-		self.assertTrue(np.all(feature == one_hot_self_atari))
-
-	def test_get_liberties_after(self):
-		gs = simple_board()
-		pp = Preprocess(["liberties_after"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		one_hot_liberties = np.zeros((gs.size, gs.size, 8))
-
-		# TODO (?) hand-code?
-		for (x, y) in gs.get_legal_moves():
-			copy = gs.copy()
-			copy.do_move((x, y))
-			libs = copy.liberty_counts[x, y]
-			if libs < 7:
-				one_hot_liberties[x, y, libs - 1] = 1
-			else:
-				one_hot_liberties[x, y, 7] = 1
-
-		for i in range(8):
-			self.assertTrue(
-				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
-				"bad expectation: stones with %d liberties after move" % (i + 1))
-
-	def test_get_liberties_after_cap(self):
-		"""A copy of test_get_liberties_after but where captures are imminent
-		"""
-		gs = capture_board()
-		pp = Preprocess(["liberties_after"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		one_hot_liberties = np.zeros((gs.size, gs.size, 8))
-
-		for (x, y) in gs.get_legal_moves():
-			copy = gs.copy()
-			copy.do_move((x, y))
-			libs = copy.liberty_counts[x, y]
-			one_hot_liberties[x, y, min(libs - 1, 7)] = 1
-
-		for i in range(8):
-			self.assertTrue(
-				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
-				"bad expectation: stones with %d liberties after move" % (i + 1))
-
-	def test_get_ladder_capture(self):
-		pass
-
-	def test_get_ladder_escape(self):
-		pass
-
-	def test_get_sensibleness(self):
-		# TODO - there are no legal eyes at the moment
-
-		gs = simple_board()
-		pp = Preprocess(["sensibleness"])
-		feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose
-
-		expectation = np.zeros((gs.size, gs.size))
-		for (x, y) in gs.get_legal_moves():
-			if not (gs.is_eye((x, y), go.WHITE)):
-				expectation[x, y] = 1
-		self.assertTrue(np.all(expectation == feature))
-
-	def test_get_legal(self):
-		gs = simple_board()
-		pp = Preprocess(["legal"])
-		feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose
-
-		expectation = np.zeros((gs.size, gs.size))
-		for (x, y) in gs.get_legal_moves():
-			expectation[x, y] = 1
-		self.assertTrue(np.all(expectation == feature))
-
-	def test_feature_concatenation(self):
-		gs = simple_board()
-		pp = Preprocess(["board", "sensibleness", "capture_size"])
-		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
-
-		expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))
-
-		# first three planes: board
-		expectation[:, :, 0] = (gs.board == go.WHITE) * 1
-		expectation[:, :, 1] = (gs.board == go.BLACK) * 1
-		expectation[:, :, 2] = (gs.board == go.EMPTY) * 1
-
-		# 4th plane: sensibleness (as in test_get_sensibleness)
-		for (x, y) in gs.get_legal_moves():
-			if not (gs.is_eye((x, y), go.WHITE)):
-				expectation[x, y, 3] = 1
-
-		# 5th through 12th plane: capture size (all zero-capture)
-		for (x, y) in gs.get_legal_moves():
-			expectation[x, y, 4] = 1
-
-		self.assertTrue(np.all(expectation == feature))
+    """Test the functions in preprocessing.py
+
+    note that the hand-coded features look backwards from what is depicted
+    in simple_board() because of the x/y column/row transpose thing (i.e.
+    numpy is typically thought of as indexing rows first, but we use (x,y)
+    indexes, so a numpy row is like a go column and vice versa)
+    """
+
+    def test_get_board(self):
+        gs = simple_board()
+        pp = Preprocess(["board"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        white_pos = np.asarray([
+            [0, 0, 0, 0, 0, 0, 0],
+            [1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 1, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0],
+            [0, 0, 0, 0, 1, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0]])
+        black_pos = np.asarray([
+            [1, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 1, 0, 0, 0],
+            [0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0]])
+        empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos)
+
+        # check number of planes
+        self.assertEqual(feature.shape, (gs.size, gs.size, 3))
+        # check return value against hand-coded expectation
+        # (given that current_player is white)
+        self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
+
+    def test_get_turns_since(self):
+        gs = simple_board()
+        pp = Preprocess(["turns_since"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        one_hot_turns = np.zeros((gs.size, gs.size, 8))
+
+        rev_moves = gs.history[::-1]
+
+        for x in range(gs.size):
+            for y in range(gs.size):
+                if gs.board[x, y] != go.EMPTY:
+                    # find most recent move at x, y
+                    age = rev_moves.index((x, y))
+                    one_hot_turns[x, y, min(age, 7)] = 1
+
+        self.assertTrue(np.all(feature == one_hot_turns))
+
+    def test_get_liberties(self):
+        gs = simple_board()
+        pp = Preprocess(["liberties"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        # todo - test liberties when > 8
+
+        one_hot_liberties = np.zeros((gs.size, gs.size, 8))
+        # black piece at (4,4) has a single liberty: (4,3)
+        one_hot_liberties[4, 4, 0] = 1
+
+        # the black group in the top left corner has 2 liberties
+        one_hot_liberties[0, 0:3, 1] = 1
+        #     .. as do the white pieces on the left and right of the eye
+        one_hot_liberties[3, 4, 1] = 1
+        one_hot_liberties[5, 4, 1] = 1
+
+        # the white group in the top left corner has 3 liberties
+        one_hot_liberties[1, 0:2, 2] = 1
+        #     ...as does the white piece at (4,5)
+        one_hot_liberties[4, 5, 2] = 1
+        #     ...and the black pieces on the sides of the eye
+        one_hot_liberties[3, 3, 2] = 1
+        one_hot_liberties[5, 3, 2] = 1
+
+        # the black piece at (4,2) has 4 liberties
+        one_hot_liberties[4, 2, 3] = 1
+
+        for i in range(8):
+            self.assertTrue(
+                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
+                "bad expectation: stones with %d liberties" % (i + 1))
+
+    def test_get_capture_size(self):
+        gs = capture_board()
+        pp = Preprocess(["capture_size"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        score_before = gs.num_white_prisoners
+        one_hot_capture = np.zeros((gs.size, gs.size, 8))
+        # there is no capture available; all legal moves are zero-capture
+        for (x, y) in gs.get_legal_moves():
+            copy = gs.copy()
+            copy.do_move((x, y))
+            num_captured = copy.num_white_prisoners - score_before
+            one_hot_capture[x, y, min(7, num_captured)] = 1
+
+        for i in range(8):
+            self.assertTrue(
+                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
+                "bad expectation: capturing %d stones" % i)
+
+    def test_get_self_atari_size(self):
+        gs = self_atari_board()
+        pp = Preprocess(["self_atari_size"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
+        # self atari of size 1 at position 0,0
+        one_hot_self_atari[0, 0, 0] = 1
+        # self atari of size 3 at position 3,4
+        one_hot_self_atari[3, 4, 2] = 1
+
+        self.assertTrue(np.all(feature == one_hot_self_atari))
+
+    def test_get_self_atari_size_cap(self):
+        gs = capture_board()
+        pp = Preprocess(["self_atari_size"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
+        # self atari of size 1 at the ko position and just below it
+        one_hot_self_atari[4, 5, 0] = 1
+        one_hot_self_atari[3, 6, 0] = 1
+        # self atari of size 3 at bottom corner
+        one_hot_self_atari[6, 6, 2] = 1
+
+        self.assertTrue(np.all(feature == one_hot_self_atari))
+
+    def test_get_liberties_after(self):
+        gs = simple_board()
+        pp = Preprocess(["liberties_after"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        one_hot_liberties = np.zeros((gs.size, gs.size, 8))
+
+        # TODO (?) hand-code?
+        for (x, y) in gs.get_legal_moves():
+            copy = gs.copy()
+            copy.do_move((x, y))
+            libs = copy.liberty_counts[x, y]
+            if libs < 7:
+                one_hot_liberties[x, y, libs - 1] = 1
+            else:
+                one_hot_liberties[x, y, 7] = 1
+
+        for i in range(8):
+            self.assertTrue(
+                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
+                "bad expectation: stones with %d liberties after move" % (i + 1))
+
+    def test_get_liberties_after_cap(self):
+        """A copy of test_get_liberties_after but where captures are imminent
+        """
+        gs = capture_board()
+        pp = Preprocess(["liberties_after"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        one_hot_liberties = np.zeros((gs.size, gs.size, 8))
+
+        for (x, y) in gs.get_legal_moves():
+            copy = gs.copy()
+            copy.do_move((x, y))
+            libs = copy.liberty_counts[x, y]
+            one_hot_liberties[x, y, min(libs - 1, 7)] = 1
+
+        for i in range(8):
+            self.assertTrue(
+                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
+                "bad expectation: stones with %d liberties after move" % (i + 1))
+
+    def test_get_ladder_capture(self):
+        pass
+
+    def test_get_ladder_escape(self):
+        pass
+
+    def test_get_sensibleness(self):
+        # TODO - there are no legal eyes at the moment
+
+        gs = simple_board()
+        pp = Preprocess(["sensibleness"])
+        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose
+
+        expectation = np.zeros((gs.size, gs.size))
+        for (x, y) in gs.get_legal_moves():
+            if not (gs.is_eye((x, y), go.WHITE)):
+                expectation[x, y] = 1
+        self.assertTrue(np.all(expectation == feature))
+
+    def test_get_legal(self):
+        gs = simple_board()
+        pp = Preprocess(["legal"])
+        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose
+
+        expectation = np.zeros((gs.size, gs.size))
+        for (x, y) in gs.get_legal_moves():
+            expectation[x, y] = 1
+        self.assertTrue(np.all(expectation == feature))
+
+    def test_feature_concatenation(self):
+        gs = simple_board()
+        pp = Preprocess(["board", "sensibleness", "capture_size"])
+        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))
+
+        expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))
+
+        # first three planes: board
+        expectation[:, :, 0] = (gs.board == go.WHITE) * 1
+        expectation[:, :, 1] = (gs.board == go.BLACK) * 1
+        expectation[:, :, 2] = (gs.board == go.EMPTY) * 1
+
+        # 4th plane: sensibleness (as in test_get_sensibleness)
+        for (x, y) in gs.get_legal_moves():
+            if not (gs.is_eye((x, y), go.WHITE)):
+                expectation[x, y, 3] = 1
+
+        # 5th through 12th plane: capture size (all zero-capture)
+        for (x, y) in gs.get_legal_moves():
+            expectation[x, y, 4] = 1
+
+        self.assertTrue(np.all(expectation == feature))
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_reinforcement_policy_trainer.py b/tests/test_reinforcement_policy_trainer.py
index 702d25332..03b041696 100644
--- a/tests/test_reinforcement_policy_trainer.py
+++ b/tests/test_reinforcement_policy_trainer.py
@@ -10,124 +10,124 @@
 
 class TestReinforcementPolicyTrainer(unittest.TestCase):
 
-	def testTrain(self):
-		model = os.path.join('tests', 'test_data', 'minimodel.json')
-		init_weights = os.path.join('tests', 'test_data', 'hdf5', 'random_minimodel_weights.hdf5')
-		output = os.path.join('tests', 'test_data', '.tmp.rl.training/')
-		args = [model, init_weights, output, '--game-batch', '1', '--iterations', '1']
-		run_training(args)
+    def testTrain(self):
+        model = os.path.join('tests', 'test_data', 'minimodel.json')
+        init_weights = os.path.join('tests', 'test_data', 'hdf5', 'random_minimodel_weights.hdf5')
+        output = os.path.join('tests', 'test_data', '.tmp.rl.training/')
+        args = [model, init_weights, output, '--game-batch', '1', '--iterations', '1']
+        run_training(args)
 
-		os.remove(os.path.join(output, 'metadata.json'))
-		os.remove(os.path.join(output, 'weights.00000.hdf5'))
-		os.remove(os.path.join(output, 'weights.00001.hdf5'))
-		os.rmdir(output)
+        os.remove(os.path.join(output, 'metadata.json'))
+        os.remove(os.path.join(output, 'weights.00000.hdf5'))
+        os.remove(os.path.join(output, 'weights.00001.hdf5'))
+        os.rmdir(output)
 
 
 class TestOptimizer(unittest.TestCase):
 
-	def testApplyAndResetOnGamesFinished(self):
-		policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
-		state = GameState(size=19)
-		optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
-		policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
-
-		# Helper to check initial conditions of the optimizer.
-		def assertOptimizerInitialConditions():
-			for v in optimizer.gradient_sign:
-				self.assertEqual(K.eval(v), 0)
-			self.assertEqual(K.eval(optimizer.running_games), 2)
-
-		initial_parameters = policy.model.get_weights()
-
-		def assertModelEffect(changed):
-			any_change = False
-			for cur, init in zip(policy.model.get_weights(), initial_parameters):
-				if not np.allclose(init, cur):
-					any_change = True
-					break
-			self.assertEqual(any_change, changed)
-
-		assertOptimizerInitialConditions()
-
-		# Make moves on the state and get trainable (state, action) pairs from them.
-		state_tensors = []
-		action_tensors = []
-		moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
-		for m in moves:
-			(st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor)
-			state_tensors.append(st_tensor)
-			action_tensors.append(mv_tensor)
-			state.do_move(m)
-
-		for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
-			# Even moves in game 0, odd moves in game 1
-			game_idx = i % 2
-			optimizer.set_current_game(game_idx)
-			is_last_move = i + 2 >= len(moves)
-			if is_last_move:
-				# Mark game 0 as a win and game 1 as a loss.
-				optimizer.set_result(game_idx, game_idx == 0)
-			else:
-				# Games not finished yet; assert no change to optimizer state.
-				assertOptimizerInitialConditions()
-			# train_on_batch accumulates gradients, and should only cause a change to parameters
-			# on the first call after the final set_result() call
-			policy.model.train_on_batch(s, a)
-			if i + 1 < len(moves):
-				assertModelEffect(changed=False)
-			else:
-				assertModelEffect(changed=True)
-		# Once both games finished, the last call to train_on_batch() should have triggered a reset
-		# to the optimizer parameters back to initial conditions.
-		assertOptimizerInitialConditions()
-
-	def testGradientDirectionChangesWithGameResult(self):
-
-		def run_and_get_new_weights(init_weights, win0, win1):
-			state = GameState(size=19)
-			policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
-			policy.model.set_weights(init_weights)
-			optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
-			policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
-
-			# Make moves on the state and get trainable (state, action) pairs from them.
-			moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
-			state_tensors = []
-			action_tensors = []
-			for m in moves:
-				(st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor)
-				state_tensors.append(st_tensor)
-				action_tensors.append(mv_tensor)
-				state.do_move(m)
-
-			for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
-				# Put even state/action pairs in game 0, odd ones in game 1.
-				game_idx = i % 2
-				optimizer.set_current_game(game_idx)
-				is_last_move = i + 2 >= len(moves)
-				if is_last_move:
-					if game_idx == 0:
-						optimizer.set_result(game_idx, win0)
-					else:
-						optimizer.set_result(game_idx, win1)
-				# train_on_batch accumulates gradients, and should only cause a change to parameters
-				# on the first call after the final set_result() call
-				policy.model.train_on_batch(s, a)
-			return policy.model.get_weights()
-
-		policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
-		initial_parameters = policy.model.get_weights()
-		# Cases 1 and 2 have identical starting models and identical (state, action) pairs,
-		# but they differ in who won the games.
-		parameters1 = run_and_get_new_weights(initial_parameters, True, False)
-		parameters2 = run_and_get_new_weights(initial_parameters, False, True)
-
-		# Changes in case 1 should be equal and opposite to changes in case 2. Allowing 0.1%
-		# difference in precision.
-		for (i, p1, p2) in zip(initial_parameters, parameters1, parameters2):
-			diff1 = p1 - i
-			diff2 = p2 - i
-			npt.assert_allclose(diff1, -diff2, rtol=1e-3)
+    def testApplyAndResetOnGamesFinished(self):
+        policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
+        state = GameState(size=19)
+        optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
+        policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
+
+        # Helper to check initial conditions of the optimizer.
+        def assertOptimizerInitialConditions():
+            for v in optimizer.gradient_sign:
+                self.assertEqual(K.eval(v), 0)
+            self.assertEqual(K.eval(optimizer.running_games), 2)
+
+        initial_parameters = policy.model.get_weights()
+
+        def assertModelEffect(changed):
+            any_change = False
+            for cur, init in zip(policy.model.get_weights(), initial_parameters):
+                if not np.allclose(init, cur):
+                    any_change = True
+                    break
+            self.assertEqual(any_change, changed)
+
+        assertOptimizerInitialConditions()
+
+        # Make moves on the state and get trainable (state, action) pairs from them.
+        state_tensors = []
+        action_tensors = []
+        moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
+        for m in moves:
+            (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor)
+            state_tensors.append(st_tensor)
+            action_tensors.append(mv_tensor)
+            state.do_move(m)
+
+        for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
+            # Even moves in game 0, odd moves in game 1
+            game_idx = i % 2
+            optimizer.set_current_game(game_idx)
+            is_last_move = i + 2 >= len(moves)
+            if is_last_move:
+                # Mark game 0 as a win and game 1 as a loss.
+                optimizer.set_result(game_idx, game_idx == 0)
+            else:
+                # Games not finished yet; assert no change to optimizer state.
+                assertOptimizerInitialConditions()
+            # train_on_batch accumulates gradients, and should only cause a change to parameters
+            # on the first call after the final set_result() call
+            policy.model.train_on_batch(s, a)
+            if i + 1 < len(moves):
+                assertModelEffect(changed=False)
+            else:
+                assertModelEffect(changed=True)
+        # Once both games finished, the last call to train_on_batch() should have triggered a reset
+        # to the optimizer parameters back to initial conditions.
+        assertOptimizerInitialConditions()
+
+    def testGradientDirectionChangesWithGameResult(self):
+
+        def run_and_get_new_weights(init_weights, win0, win1):
+            state = GameState(size=19)
+            policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
+            policy.model.set_weights(init_weights)
+            optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
+            policy.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
+
+            # Make moves on the state and get trainable (state, action) pairs from them.
+            moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
+            state_tensors = []
+            action_tensors = []
+            for m in moves:
+                (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor)
+                state_tensors.append(st_tensor)
+                action_tensors.append(mv_tensor)
+                state.do_move(m)
+
+            for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
+                # Put even state/action pairs in game 0, odd ones in game 1.
+                game_idx = i % 2
+                optimizer.set_current_game(game_idx)
+                is_last_move = i + 2 >= len(moves)
+                if is_last_move:
+                    if game_idx == 0:
+                        optimizer.set_result(game_idx, win0)
+                    else:
+                        optimizer.set_result(game_idx, win1)
+                # train_on_batch accumulates gradients, and should only cause a change to parameters
+                # on the first call after the final set_result() call
+                policy.model.train_on_batch(s, a)
+            return policy.model.get_weights()
+
+        policy = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json'))
+        initial_parameters = policy.model.get_weights()
+        # Cases 1 and 2 have identical starting models and identical (state, action) pairs,
+        # but they differ in who won the games.
+        parameters1 = run_and_get_new_weights(initial_parameters, True, False)
+        parameters2 = run_and_get_new_weights(initial_parameters, False, True)
+
+        # Changes in case 1 should be equal and opposite to changes in case 2. Allowing 0.1%
+        # difference in precision.
+        for (i, p1, p2) in zip(initial_parameters, parameters1, parameters2):
+            diff1 = p1 - i
+            diff2 = p2 - i
+            npt.assert_allclose(diff1, -diff2, rtol=1e-3)
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()
diff --git a/tests/test_supervised_policy_trainer.py b/tests/test_supervised_policy_trainer.py
index fea59f361..6ff12baa0 100644
--- a/tests/test_supervised_policy_trainer.py
+++ b/tests/test_supervised_policy_trainer.py
@@ -4,17 +4,17 @@
 
 
 class TestSupervisedPolicyTrainer(unittest.TestCase):
-	def testTrain(self):
-		model = 'tests/test_data/minimodel.json'
-		data = 'tests/test_data/hdf5/alphago-vs-lee-sedol-features.hdf5'
-		output = 'tests/test_data/.tmp.training/'
-		args = [model, data, output, '--epochs', '1']
-		run_training(args)
+    def testTrain(self):
+        model = 'tests/test_data/minimodel.json'
+        data = 'tests/test_data/hdf5/alphago-vs-lee-sedol-features.hdf5'
+        output = 'tests/test_data/.tmp.training/'
+        args = [model, data, output, '--epochs', '1']
+        run_training(args)
 
-		os.remove(os.path.join(output, 'metadata.json'))
-		os.remove(os.path.join(output, 'shuffle.npz'))
-		os.remove(os.path.join(output, 'weights.00000.hdf5'))
-		os.rmdir(output)
+        os.remove(os.path.join(output, 'metadata.json'))
+        os.remove(os.path.join(output, 'shuffle.npz'))
+        os.remove(os.path.join(output, 'weights.00000.hdf5'))
+        os.rmdir(output)
 
 if __name__ == '__main__':
-	unittest.main()
+    unittest.main()