Skip to content

Commit

Permalink
improvements to documentation and code clean ups
Browse files Browse the repository at this point in the history
  • Loading branch information
Stéphane Champailler committed Mar 14, 2021
1 parent 10bb23d commit a4afe49
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 100 deletions.
149 changes: 97 additions & 52 deletions mine1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,37 @@
import numpy as np
np.set_printoptions(precision=2)

random.seed(14)
# Need both
random.seed(10)
np.random.seed(15)


def entropy(probabilities: np.array):
"""
Computes H(X)
X is given as a numpy array, all elements of the array
are assumed to represent the distribution (to the shape
of the array is not meaningful)
"""

# Avoid situations where log can't be computed
non_zero = probabilities[probabilities != 0]
return - np.sum(non_zero * np.log2(non_zero))



# for i in range(4,1,-1):
# mine = 2
# e = entropy(np.array([mine/i]*i))
# print(f"H={e}")
# print()
# for i in range(4,1,-1):
# mine = 0
# e = entropy(np.array([mine/i]*i))
# print(f"H={e}")
# exit()


"""
Expand Down Expand Up @@ -71,6 +101,9 @@ def clue(px,py,display=False):
clue_entropy = 0
else:
p = float(mines)/unrevealed_neighbours

# H = -sum_{p_i} p_i log2 p_i

clue_entropy = - unrevealed_neighbours * p * np.log2(p)

if display:
Expand Down Expand Up @@ -102,20 +135,18 @@ def draw_board(entropies):

def compute_board_entropies():
entropies = np.ones((10,10)) * 99
#print(REVEAL)
# print("")
for y in range(10):
s = ""
for x in range(10):
if REVEAL[y,x] == 1:
if MINES[y,x] == 0:
mines, uneighbours, clue_entropy = clue(x, y)
entropies[y,x] = clue_entropy
s += f"{mines}/{entropies[y,x]:.2f} "
if REVEAL[y, x] == 1: # and MINES[y,x] == 0:
mines, uneighbours, clue_entropy = clue(x, y)
entropies[y, x] = clue_entropy

#print(entropies)
return entropies


all_turns = 0
for game in range(1):
for game in range(200):
MINES = np.random.randint(2, size=(10,10))
for i in range(100):
x = random.randint(0,9)
Expand All @@ -124,76 +155,90 @@ def compute_board_entropies():

#print(MINES)
REVEAL = np.zeros((10,10),dtype=int)
REVEAL[0:1,:] = 1
#REVEAL[0:1,:] = 1
#REVEAL[1,3:7] = 1

turns = 0
while True:
entropies = compute_board_entropies()
draw_board(entropies)

print("\nNo mine")
clue(0,0,True)
REVEAL[1,0] = 1
clue(0,0,True)
REVEAL[1,0] = 0
entropies = compute_board_entropies()
print("\nMine")
clue(3,0,True)
REVEAL[1,3] = 1
entropies = compute_board_entropies()
clue(3,0,True)
REVEAL[1,3] = 0
entropies = compute_board_entropies()
break


for x in range(10):
m_old = MINES[1, x]
_, _, c_before = clue(x, 0)
MINES[1, x] = 1
_, _, c_after = clue(x, 0)
MINES[1, x] = m_old

# putting a mine leads to worse prediction, so previous
# prediction was not a mine...
choose = c_after - c_before > 0
print(f"x={x}: mine_old={m_old}, diff = {c_after - c_before}, {choose}")



# print("\nNo mine")
# clue(0,0,True)
# REVEAL[1,0] = 1
# clue(0,0,True)
# REVEAL[1,0] = 0
# entropies = compute_board_entropies()
# print("\nMine")
# clue(3,0,True)
# REVEAL[1,3] = 1
# entropies = compute_board_entropies()
# clue(3,0,True)
# REVEAL[1,3] = 0
# entropies = compute_board_entropies()
# break

# entropies = compute_board_entropies()
# print()
# print(" ",entropies[0,:])
# print("-"*66)

# for x in range(10):
# m_old = REVEAL[1, x]
# REVEAL[1, x] = 1
# entropies = compute_board_entropies()
# print(f"x={x}", entropies[0,:])
# REVEAL[1, x] = m_old

# # putting a mine leads to worse prediction, so previous
# # prediction was not a mine...
# # choose = c_after - c_before > 0
# # print(f"x={x}: mine_old={m_old}, diff = {c_after - c_before}, {choose}")

# break


best_entropy = None

for px in range(10):
for py in range(10):
if REVEAL[py,px] == 1:
if REVEAL[py, px] == 1:
continue

neighbours_pos = nb_pos(px, py)

# entropy = 0
# for x, y in neighbours_pos:
# entropy += entropies[y, x]
# entropy /= len(neighbours_pos)

entropy = 0
nn = 0
for x, y in neighbours_pos:
entropy = max(entropy, entropies[y, x])
if entropies[y, x] < 99:
entropy += entropies[y, x]
nn += 1
if nn == 0:
# Not the fringe
continue
entropy /= nn
#print(px,py,entropy)

# entropy = 0
# for x, y in neighbours_pos:
# entropy = max(entropy, entropies[y, x])

# Take minimal entropy
# IF there's a tie, then take the cell that will give
# the more clues
if best_entropy is None or entropy > best_entropy[0] or (entropy == best_entropy[0] and len(neighbours_pos) > best_entropy[1]) :
if best_entropy is None or entropy < best_entropy[0] or (entropy == best_entropy[0] and len(neighbours_pos) > best_entropy[1]) :
best_entropy = entropy, len(neighbours_pos), px, py

e,nn,x,y = best_entropy
#print(f"x={x}, y={y}, ent={e}, mine? {MINES[y,x] == 1}")
if best_entropy:
e,nn,x,y = best_entropy
else:
e,nn,x,y = -1, 0, 1, 1

print(f"x={x}, y={y}, ent={e}, mine? {MINES[y,x] == 1}")

# Simulate random play
# x = random.randint(0,9)
# y = 1
# y = random.randint(0,9)

if MINES[y,x] == 1:
break
Expand Down
88 changes: 40 additions & 48 deletions p1_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,20 @@
import matplotlib.pyplot as plt


# from itertools import permutations

# a = [0, 1, 0, 2]

# S = 6
# counts = np.zeros(S)
# for i in range(S):
# # a = [ 1 1 ..1 0 0 .. 0 ]
# a = [1] * i + [0] * (S-i)
# print(a)
# perms = set(permutations(a))

# for perm in perms:
# counts += np.array(perm)

# print(len(perms))

# print( counts)
# exit()




def marginalize(dist_table: pd.DataFrame, variables, leave_values=False):
"""
Marginalize variables out of probability distribution table.
The probability distribution table is a DataFrame of which
dist_table: The probability distribution table is a DataFrame of which
the first columns are the random variables values and the
last column is the probability of having this combination
of values. For example : X, Y, P(X ^ Y)
of values (joint probability). For example : X, Y, P(X ^ Y)
Each column are labelled with capital letter
denoting the name of the variable (for the columns containing
variables).
Each column of variables are labelled with capital letter
denoting the name of the variable.
variables: teh marginalization target. So If one has P(X^Y) and
want to get P(X) out of it, then X is such a target.
"""

p_column = dist_table.columns[-1]
Expand All @@ -58,7 +39,7 @@ def marginalize(dist_table: pd.DataFrame, variables, leave_values=False):
return r


def entropy(probabilities):
def entropy(probabilities: np.array):
"""
Computes H(X)
Expand All @@ -72,34 +53,49 @@ def entropy(probabilities):
return - np.sum(non_zero * np.log2(non_zero))


def joint_entropy(x_and_y):
# Compute the joint entropy
def joint_entropy(x_and_y: pd.DataFrame):
"""
Computes the joint entropy H(X,Y)
Expects a dataframe with three columns :
- values of X
- values of Y
- P(X=x, Y=y) : probability distribution; must sum to one.
"""

return entropy(x_and_y["P(X^Y)"])


def cond_entropy(x_given_y, y):
# Compute the conditional entropy
def cond_entropy(x_given_y: pd.DataFrame, y: pd.DataFrame):
"""
Compute the conditional entropy
Expects a dataframe with three columns :
- x_given_y: values of X|Y as table of rows (x,y,X=x|Y=y)
- y: values of P(Y=y) as one column table
"""

# First, relate P(X_i|Y_j) to P(Y_j)
r = pd.merge(x_given_y, y)
return - np.sum(r["P(X|Y)"] * r["P(Y)"] * np.log2(r["P(X|Y)"]))


def mutual_information(x_and_y, var_x="X", var_y="Y"):
def mutual_information(x_and_y: pd.DataFrame,
var_x: str = "X",
var_y: str = "Y"):
""" Computes :
I(X;Y) = H(X) + H(Y) - H(X,Y) (See wikipedia)
I(X;Y) = H(X) + H(Y) - H(X,Y)
Expects parameters :
x_and_y : a table (DataFrame) giving P(one row of the table)
var_x : name of the variable X, must be in the columns of x_and_y
var_y : name of the variable Y, must be in the columns of x_and_y
- x_and_y : a table (DataFrame) giving P(one row of the table)
- var_x : name of the variable X, must be in the columns of x_and_y
- var_y : name of the variable Y, must be in the columns of x_and_y
"""

# The code here is a bit more dynamic so we can
# use this function in part 1 and 2 of the
# problem statement.
# The code here is a bit more dynamic so we can use this function
# in part 2 of the problem statement.

# Compute probabilities for all values of random variable X.
x = marginalize(x_and_y, var_x)
Expand All @@ -125,7 +121,7 @@ def cond_mutual_information(x_and_y_and_z):
y_and_z = marginalize(x_and_y_and_z, ["Y", "Z"])
z = marginalize(x_and_y_and_z, "Z")

return entropy(x_and_z) + entropy(y_and_z) - entropy(z) - entropy(x_and_y_and_z['P(X^Y^Z)'])
return entropy(x_and_z) - entropy(z) - entropy(x_and_y_and_z['P(X^Y^Z)']) + entropy(y_and_z)


def cond_joint_entropy(x_and_y_and_z):
Expand Down Expand Up @@ -179,11 +175,6 @@ def implementation():
# Q4
mutual_information(x_and_y)

# FIXME Andrea and stF to make test scenario

# assert mutual_information(x_and_y) == 0.9, "Ooops, unecpected"


# Q5
joint_entropy3(x_and_y_and_z)
cond_joint_entropy(x_and_y_and_z)
Expand Down Expand Up @@ -242,8 +233,9 @@ def medical_diagnosis():
cardinalities.append(card)
entropies.append(e)

print(
f"{var_name}\tcard:{card} {e:.3f}")
with open("question6.inc","w") as fout:
for vname, ent in sorted( zip(names, entropies), key=lambda t:t[1]):
fout.write(f"{vname} & {ent:.3f} \\\\\n")

plt.figure()
plt.scatter(cardinalities, entropies)
Expand Down

0 comments on commit a4afe49

Please sign in to comment.