-
Notifications
You must be signed in to change notification settings - Fork 155
/
Copy pathtictactoe.py
150 lines (112 loc) · 4.45 KB
/
tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Tic Tac Toe Game functions
# Games are defined by (see Chapter 5):
# * Actions(s) Legal moves in state s.
# * Result(s, a) Transition model.
# * Terminal(s) Test for terminal states.
# * Utility(s) Utility for player Max for terminal states.
# I represent the state (board) as a vector of length 9.
# The values are ' ', 'x', 'o'.
import numpy as np
def empty_board():
"""create and empty board"""
return [' '] * 9
def actions(board):
"""return possible actions as a vector of indices"""
return np.where(np.array(board) == ' ')[0].tolist()
# randomize the action order
# return np.random.shuffle(np.where(np.array(board) == ' ')[0]).tolist()
def result(state, player, action):
"""Add a single symbol to the board."""
if state[action] != ' ':
raise Exception(f"Illegal action {action} by player {player}!")
state = state.copy()
state[action] = player
return state
def terminal(state):
"""is the state terminal?"""
return check_board(state) != 'n'
def utility(state, player = 'x'):
"""utility of state. None defined for non-terminal states."""
goal = check_board(state)
if goal == player: return +1 # win
if goal == 'd': return 0 # draw
if goal == other(player): return -1 # loss
return None # utility is not defined
## helper functions
def check_board(state):
"""check the board and return one of x, o, d (draw), or n (for next move)"""
state = np.array(state).reshape((3,3))
diagonals = np.array([[state[i][i] for i in range(len(state))],
[state[i][len(state)-i-1] for i in range(len(state))]])
for a_board in [state, np.transpose(state), diagonals]:
for row in a_board:
if len(set(row)) == 1 and row[0] != ' ':
return row[0]
# check for draw
if(np.sum(state == ' ') < 1):
return 'd'
return 'n'
def other(player):
if player == 'x': return 'o'
else: return 'x'
def show_board_text(board):
"""display the board"""
b = np.array(board).reshape((3,3))
print(b)
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
def show_board(board, help = True, dpi = 40, colors = {' ': 'white', 'x': 'red', 'o': 'black'}):
"""Show the tic-tac-toe-board. help adds the array index, dpi changes the size and
colors sets the colors"""
b = np.array(board).reshape((3,3))
with plt.rc_context({'figure.dpi': dpi}):
fig = plt.matshow(np.zeros((3, 3)), cmap = ListedColormap(['w']))
fig.axes.axis('off')
plt.hlines([.5, 1.5], -.5, 2.5)
plt.vlines([.5, 1.5], -.5, 2.5)
for row in range(3):
for col in range(3):
plt.text(row, col, b[col, row],
fontsize = 64,
color = colors[b[col, row]],
horizontalalignment = 'center',
verticalalignment = 'center')
if help:
for row in range(3):
for col in range(3):
plt.text(col, row - .35, col + 3 * row,
fontsize = 12,
color = 'gray',
horizontalalignment = 'center',
verticalalignment = 'center')
plt.show()
# Random Baseline player
def random_player(board, player = None):
"""Simple player that chooses a random empty square (equal probability of all permissible actions).
player is unused."""
return np.random.choice(actions(board))
# Simple Environment
def play(x, o, N = 100, show_final_board = False):
"""Let two agents play each other N times. x starts. x and y are agent functions that
get the board as the percept and return their next action."""
results = {'x': 0, 'o': 0, 'd': 0}
for i in range(N):
board = empty_board()
while True:
# x moves
a = x(board, 'x')
board = result(board, 'x', a)
win = check_board(board) # returns the 'n' if the game is not done.
if win != 'n':
results[win] += 1
break
# o moves
a = o(board, 'o')
board = result(board, 'o', a)
win = check_board(board) # returns the 'n' if the game is not done.
if win != 'n':
results[win] += 1
break
if show_final_board:
show_board(board)
return results