-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathMineSweeperLearner.py
151 lines (145 loc) · 7.14 KB
/
MineSweeperLearner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
from MineSweeper import MineSweeper
import time
import os
class MineSweeperLearner:
def __init__(self, name, model):
self.name = name
self.model = model
self.dim1 = 16
self.dim2 = 30
self.totalCells = self.dim1*self.dim2
# ultimately want to put this in the model so each can extract its own shit
def getPredictorsFromGameState(self, state):
out = np.zeros((11, self.dim1, self.dim2))
# channel 0: cell number "holds contains information", i.e. has been revealed
out[0] = np.where(np.isnan(state), 0, 1)
# channel 1: cell is on game board (useful for detecting edges when conv does 0 padding)
out[1] = np.ones((self.dim1, self.dim2))
# the numeric channels: one layer each for 0 to 8 neighbors; one-hot encoding
for i in range(0, 9):
out[i + 2] = np.where(state == i, 1, 0)
return out
def learnMineSweeper(self, nSamples, nBatches, nEpochsPerBatch, verbose=True):
X = np.zeros((nSamples, 11, self.dim1, self.dim2)) # 11 channels: 1 for if has been revealed, 1 for is-on-board, 1 for each number
X2 = np.zeros((nSamples, 1, self.dim1, self.dim2))
y = np.zeros((nSamples, 1, self.dim1, self.dim2))
for i in range(nBatches):
cellsRevealed = 0
gamesPlayed = 0
gamesWon = 0
samplesTaken = 0
while samplesTaken < nSamples:
# initiate game
game = MineSweeper()
#pick middle on first selection. better than corner.
game.selectCell((int(self.dim1 / 2), int(self.dim2 / 2)))
while not (game.gameOver or samplesTaken == nSamples):
# get data input from game state
Xnow = self.getPredictorsFromGameState(game.state)
X[samplesTaken] = Xnow
X2now = np.array([np.where(Xnow[0] == 0, 1, 0)])
X2[samplesTaken] = X2now
# make probability predictions
out = self.model.predict([np.array([Xnow]), np.array([X2now])])
# choose best remaining cell
orderedProbs = np.argsort(out[0][0]+Xnow[0], axis=None) #add Xnow[0] so that already selected cells aren't chosen
selected = orderedProbs[0]
selected1 = int(selected / self.dim2)
selected2 = selected % self.dim2
game.selectCell((selected1, selected2))
# find truth
truth = out
truth[0, 0, selected1, selected2] = game.mines[selected1, selected2]
y[samplesTaken] = truth[0]
samplesTaken += 1
if game.gameOver:
gamesPlayed += 1
cellsRevealed += self.totalCells - np.sum(np.isnan(game.state))
if game.victory:
gamesWon += 1
if gamesPlayed > 0:
meanCellsRevealed = float(cellsRevealed) / gamesPlayed
propGamesWon = float(gamesWon) / gamesPlayed
if verbose:
print("Games played, batch " + str(i) + ": " + str(gamesPlayed))
print("Mean cells revealed, batch " + str(i) + ": " + str(meanCellsRevealed))
print("Proportion of games won, batch " + str(i) + ": " + str(propGamesWon))
#train
self.model.fit([X, X2], y, batch_size=nSamples, epochs=nEpochsPerBatch)
#save it every 100
if (i+1) % 100 == 0:
self.model.save("trainedModels/" + self.name + ".h5")
def testMe(self, nGames):
cellsRevealed = 0
gamesWon = 0
for i in range(nGames):
if (i % 10) == 0:
print("Playing game " + str(i+1) + "...")
# initiate game
game = MineSweeper()
# pick middle on first selection. better than corner.
game.selectCell((int(self.dim1 / 2), int(self.dim2 / 2)))
while not game.gameOver:
# get data input from game state
Xnow = self.getPredictorsFromGameState(game.state)
X2now = np.array([np.where(Xnow[0] == 0, 1, 0)])
# make probability predictions
out = self.model.predict([np.array([Xnow]), np.array([X2now])])
# choose best remaining cell
orderedProbs = np.argsort(out[0][0] + Xnow[0], axis=None) # add Xnow[0] so that already selected cells aren't chosen
selected = orderedProbs[0]
selected1 = int(selected / self.dim2)
selected2 = selected % self.dim2
game.selectCell((selected1, selected2))
cellsRevealed += self.totalCells - np.sum(np.isnan(game.state))
if game.victory:
gamesWon += 1
meanCellsRevealed = float(cellsRevealed) / nGames
propGamesWon = float(gamesWon) / nGames
print("Proportion of games won, batch " + str(i) + ": " + str(propGamesWon))
print("Mean cells revealed, batch " + str(i) + ": " + str(meanCellsRevealed))
def watchMePlay(self):
play = True
while play:
game = MineSweeper()
os.system("clear")
print("Beginning play")
print("Game board:")
print(game.state)
#make first selection in the middle. better than corner.
selected1 = int(self.dim1/2)
selected2 = int(self.dim2/2)
game.selectCell((selected1, selected2))
time.sleep(0.05)
os.system("clear")
#now the rest
while not game.gameOver:
print("Last selection: (" + str(selected1+1) + "," + str(selected2+1) + ")")
if 'out' in locals():
print("Confidence: " + str(np.round(100*(1-np.amin(out[0][0] + Xnow[0])),2)) + "%")
print("Game board:")
print(game.state)
Xnow = self.getPredictorsFromGameState(game.state)
X2now = np.array([np.where(Xnow[0] == 0, 1, 0)])
# make probability predictions
out = self.model.predict([np.array([Xnow]), np.array([X2now])])
# choose best remaining cell
orderedProbs = np.argsort(out[0][0] + Xnow[0], axis=None) # add Xnow[0] so that already selected cells aren't chosen
selected = orderedProbs[0]
selected1 = int(selected / self.dim2)
selected2 = selected % self.dim2
game.selectCell((selected1, selected2))
time.sleep(0.05)
os.system("clear")
print("Last selection: (" + str(selected1+1) + "," + str(selected2+1) + ")")
print("Confidence: " + str(np.round(100 * (1 - np.amin(out[0][0] + Xnow[0])), 2)) + "%")
print("Game board:")
print(game.state)
if game.victory:
print("Victory!")
else:
print("Game Over")
get = input("Watch me play again? (y/n): ")
if get != "y":
play = False