-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame.py
121 lines (91 loc) · 3.76 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""A reusable framework for iterating through games"""
from collections import namedtuple
GameStep = namedtuple('GameStep', ['prev_state', 'move', 'new_state', 'player'])
MDPStep = namedtuple('MDPStep', ['prev_state', 'move', 'post_opp_move_state', 'player', 'reward'])
def episode(mdp, players):
curr_state = mdp.initial_state()
while not curr_state.game_over():
curr_player = players[0]
move = curr_player.move(curr_state)
new_state = curr_state.move(move)
# This yields the player that moved from previous state to new_state
yield GameStep(prev_state = curr_state,
move = move,
new_state = new_state,
player = curr_player)
curr_state = new_state
players = (*players[1:], players[0])
def transitions(mdp, players):
game = episode(mdp, players)
prev = next(game)
for step in game:
reward = mdp.reward(prev.prev_state,
prev.move,
prev.new_state,
step.move,
step.new_state)
yield MDPStep(prev_state = prev.prev_state,
move = prev.move,
post_opp_move_state = step.new_state,
player = prev.player,
reward = reward)
prev = step
final_reward = mdp.reward(prev.prev_state,
prev.move,
prev.new_state)
yield MDPStep(prev_state = prev.prev_state,
move = prev.move,
post_opp_move_state = prev.new_state.response(),
player = prev.player,
reward = final_reward)
if __name__ == '__main__':
import tictactoe as rules
import agent
from mdp import MDP
# TODO: This is duplicated from train.py -- find a good place for it!
class AgentWrapper:
def __init__(self, agent):
self.agent = agent
def move(self, move):
raise NotImplementedError("AgentWrapper.move is abstract!")
class TrainingWrapper(AgentWrapper):
def update(self, *args):
self.agent.update(*args)
def move(self, state):
return self.agent.choose_next_action(state)
class TestingWrapper(AgentWrapper):
def update(self, *args):
pass
def move(self, state):
return self.agent.optimal_action(state)
## END TODO
class HumanConsolePlayer:
def __init__(self, name):
self.name = name
def __repr__(self):
return self.name
def __str__(self):
return self.__repr__()
def move(self, curr_state):
print('The current state is: \n' + str(curr_state))
while True:
raw_move = input('Please input row and column separated by a space: ')
row, col = raw_move.split(' ')
row, col = int(row), int(col)
if (row, col) in curr_state.possible_moves():
break
else:
print("Invalid Move!")
return (row, col)
mdp = MDP(rules)
# game = transitions(mdp,
# (HumanConsolePlayer("X's"), HumanConsolePlayer("O's")))
# game = transitions(mdp,
# (HumanConsolePlayer("X's"),
# TestingWrapper(agent.QLearner.load('champion.p'))))
game = transitions(mdp,
(HumanConsolePlayer("X's"),
TestingWrapper(agent.Minimaxer(mdp, depth=6))))
for step in game:
print(str(step.player) + " earns " + str(step.reward) + " points!")
print(step.post_opp_move_state)