-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdistances.py
248 lines (205 loc) · 8.74 KB
/
distances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import time
import sys
import numpy as np
import theano
import theano.tensor as T
import lasagne
import theano.sandbox.rng_mrg
def logg(*ss):
s = " ".join(map(str,ss))
sys.stderr.write(s+"\n")
def start(s):
global startTime
global phase
phase = s
logg(phase+".")
startTime = time.clock()
def end(s=None):
global startTime
global phase
if s is not None:
phase = s
endTime = time.clock()
logg(phase,"finished in",endTime-startTime,"seconds.")
def randomMatrix(n, f):
return np.random.normal(size=n*f).astype(np.float32).reshape((n, f))
def distanceMatrix(x, y):
xL2S = np.sum(x*x, axis=-1)
yL2S = np.sum(y*y, axis=-1)
xL2SM = np.tile(xL2S, (len(y), 1))
yL2SM = np.tile(yL2S, (len(x), 1))
squaredDistances = xL2SM + yL2SM.T - 2.0*y.dot(x.T)
distances = np.sqrt(squaredDistances+1e-6) # elementwise. +1e-6 is to supress sqrt-of-negative warning.
return distances
# Newer theano builds allow tile() with scalar variable as reps.
# https://github.com/Theano/Theano/pull/2875
# That could make this nicer.
# The worst thing about it is that it the constructed calculation
# silently fails when given smaller datasets.
# TODO If there's no easy fix, at least wrap closest_fn into
# a mini-class that verifies sizes.
def constructSquaredDistanceMatrixVariable(x, y, n, m):
# ([n, f] , [m, f]) -> (n, m)
xL2S = T.sum(x*x, axis=-1) # [n]
yL2S = T.sum(y*y, axis=-1) # [m]
xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n]
yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m]
squaredDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m]
return squaredDistances
def constructSDistanceMatrixFunction(n, m):
x = T.matrix('x')
y = T.matrix('y')
sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m)
return theano.function([x, y], sDistances)
# For each y, it returns the index of the closest x in L2 distance.
# x is [n, f], y is [m, f] for some f. Output is [m], the values are in range(n).
def constructMinimalDistanceIndicesVariable(x, y, n, m):
sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m)
lamblinsTrick = False
if lamblinsTrick:
# https://github.com/Theano/Theano/issues/1399
# https://gist.github.com/danielvarga/d0eeacea92e65b19188c
# https://groups.google.com/forum/#!topic/theano-users/E7ProqnGUMk
s = sDistances
bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
# This is a heavy-handed workaround for the fact that in
# lamblin's hack, ties lead to completely screwed results.
bestIndices = T.clip(bestIndices, 0, n-1)
else:
bestIndices = T.argmin(sDistances, axis=0)
return bestIndices
# The theano.function returned by this is usually called like this:
# closest_fn(sampled, data), output is an index (pointing to a sampled row)
# for each row of data.
#
# To elaborate:
# n = candidateCount, m = targetCount,
# typically candidateCount = sampleSize, targetCount = minibatchSize
# BEWARE: super confusingly, in generativeMLP.py sampleSize is called m.
#
# See testMinimalDistanceIndicesFunction for how to turn indices into samples and distances.
def constructMinimalDistanceIndicesFunction(n, m):
x = T.matrix('x')
y = T.matrix('y')
bestIndices = constructMinimalDistanceIndicesVariable(x, y, n, m)
return theano.function([x, y], bestIndices)
def constructMinimalDistancesVariable(x, y, initials, n, m):
sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m)
bestIndices = T.argmin(sDistances, axis=0)
bestXes = x[bestIndices]
bestInitials = initials[bestIndices]
return bestXes, bestInitials
def constructMinimalDistancesFunction(n, m):
x = T.matrix('x')
y = T.matrix('y')
initials = T.matrix('initials')
bestXes, bestInitials = constructMinimalDistancesVariable(x, y, initials, n, m)
return theano.function([x, y], bestXes)
def testMinimalDistanceIndicesFunction(batchSize, sampleSize, featureDim):
closest_fn = constructMinimalDistanceIndicesFunction(sampleSize, batchSize)
data = randomMatrix(batchSize, featureDim)
sampled = randomMatrix(sampleSize, featureDim)
import kohonen
start("CPU nearest neighbors")
distances = kohonen.distanceMatrix(sampled, data)
assert distances.shape == (len(data), len(sampled)) # Beware the transpose!
bestIndicesByCPU = np.argmin(distances, axis=1)
closestSampledByCPU = sampled[bestIndicesByCPU]
bestDistancesByCPU = np.linalg.norm(data-closestSampledByCPU, axis=1)
end()
start("GPU nearest neighbors")
bestIndicesByGPU = closest_fn(sampled, data)
# The next two steps are practically instant.
closestSampledByGPU = sampled[bestIndicesByGPU]
bestDistancesByGPU = np.linalg.norm(data-closestSampledByGPU, axis=1)
end()
print "total bestDistances CPU", bestDistancesByCPU.sum()
print "total bestDistances GPU", bestDistancesByGPU.sum()
# This class is a cheap workaround for the fact that I didn't manage to create
# a shape-independent constructMinimalDistanceIndicesFunction.
# It only works if the set of possible shapes is very small, otherwise Theano compilation
# becomes the bottleneck.
class ClosestFnFactory:
def __init__(self):
self.cache = {}
def __call__(self, *args):
assert len(args)==2
sampled, data = args
shape = (len(sampled), len(data))
candidateCount, targetCount = shape
if shape not in self.cache.keys():
logg("Adding to ClosestFnFactory cache, shape %s" % str(shape))
closest_fn = constructMinimalDistanceIndicesFunction(candidateCount, targetCount)
self.cache[shape] = closest_fn
else:
closest_fn = self.cache[shape]
return closest_fn(sampled, data)
# A cool little toy learning problem:
# We want to learn a translated 2D standard normal's translation, that's a 2D vector.
# We generate batchSize samples from this target distribution.
# We generate sampleSize samples from our current best bet for the distribution.
# We find the closest generated sample to each target sample.
# We calculate the sum of distances.
# That's the loss that we optimize by gradient descent.
# Note that Theano doesn't even break a sweat when doing backprop
# through a layer of distance minimization.
# Of course that's less impressive than it first sounds, because
# locally, the identity of the nearest target sample never changes.
def toyLearner():
batchSize = 2000
sampleSize = 2000
inDim = 2
srng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed=234)
dataVar = T.matrix("data")
initialsVar = srng.normal((sampleSize, inDim))
parametersVar = theano.shared(np.zeros(inDim, dtype=np.float32), "parameters")
generatedVar = initialsVar + parametersVar # broadcast
bestXesVar, bestInitialsVar = constructMinimalDistancesVariable(generatedVar, dataVar, initialsVar, sampleSize, batchSize)
deltaVar = bestXesVar - dataVar
# mean over samples AND feature coordinates!
# Very frightening fact: with .sum() here, the learning process diverges.
lossVar = (deltaVar*deltaVar).mean()
updates = lasagne.updates.nesterov_momentum(
lossVar, [parametersVar], learning_rate=0.2, momentum=0.0)
train_fn = theano.function([dataVar], updates=updates)
for epoch in range(1000):
data = randomMatrix(batchSize, inDim) + np.array([-5.0, 12.0], dtype=np.float32)
train_fn(data)
print parametersVar.get_value()
def distanceSpeedTest():
# I'm not using variable names n and m, because unfortunately
# the order is switched between sampleAndUpdate() and
# constructDistanceMatrixFunction().
batchSize = 3000
oversampling = 4.324
sampleSize = int(batchSize*oversampling)
f = 28*28
np.random.seed(0)
data = randomMatrix(batchSize, f)
generated = randomMatrix(sampleSize, f)
dm_fn = constructSDistanceMatrixFunction(sampleSize, batchSize)
md_fn = constructMinimalDistancesFunction(sampleSize, batchSize)
start("minimal distances theano")
bestXes = md_fn(generated, data)
print bestXes.shape
print np.sum(bestXes)
end()
start("all distances theano")
ds = dm_fn(generated, data)
print ds.shape
print np.sum(ds)
end()
start("all distances numpy")
ds = distanceMatrix(generated, data)
print ds.shape
print np.sum(ds)
end()
if __name__ == "__main__":
whichTest = sys.argv[1]
assert whichTest in ("distances", "toyLearner", "speeds")
if whichTest=="distances":
testMinimalDistanceIndicesFunction(batchSize=3000, sampleSize=12972, featureDim=28*28)
elif whichTest=="speeds":
distanceSpeedTest()
elif whichTest=="toyLearner":
toyLearner()