-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathautoencoder.py
129 lines (101 loc) · 4.48 KB
/
autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
from lasagne import layers
import numpy as np
import sys
import gzip
import cPickle
from PIL import Image
from nnbase.layers import Unpool2DLayer
from nnbase.utils import FlipBatchIterator
### this is really dumb, current nolearn doesnt play well with lasagne,
### so had to manually copy the file I wanted to this folder
import nnbase.shape as shape
import nnbase.inputs
import nnbase.vis
# This is very error-prone.
# Optimally, there should be a guarantee that the
# corpus loaded here is the same as the one that the
# encoder was trained on.
def loadCorpus():
face = True
if face:
directory = "../face/SCUT-FBP/thumb.big/"
X, (height, width) = nnbase.inputs.faces(directory)
else:
X, (height, width) = nnbase.inputs.mnist()
X = X.astype(np.float64).reshape((-1, 1, height, width))
mu, sigma = np.mean(X), np.std(X)
print "mu, sigma:", mu, sigma
return X, mu, sigma
# TODO I don't think that .eval() is how this should work.
def get_output_from_nn(last_layer, X):
indices = np.arange(128, X.shape[0], 128)
# not splitting into batches can cause a memory error
X_batches = np.split(X, indices)
out = []
for count, X_batch in enumerate(X_batches):
out.append( layers.get_output(last_layer, X_batch).eval() )
return np.vstack(out)
# This helper class deals with
# 1. normalizing input and de-normalizing output
# 2. reshaping output into shape compatible with input, namely (-1, 1, x ,y)
class Autoencoder:
# sigma and mu should be trained on the same corpus as the autoencoder itself.
# This is error-prone!
def __init__(self, ae, mu, sigma):
self.ae = ae
self.mu = mu
self.sigma = sigma
self.encode_layer_index = map(lambda pair : pair[0], self.ae.layers).index('encode_layer')
self.encode_layer = self.ae.get_all_layers()[self.encode_layer_index]
self.afterSplit = False
# from unnormalized to unnormalized [0,1] MNIST.
# ae is trained on normalized MNIST data.
# For 0-1 clipped digits this should be close to the identity function.
def predict(self, X):
assert not self.afterSplit
self.x, self.y = X.shape[-2:]
flatOutput = self.ae.predict((X - self.mu) / self.sigma).reshape(X.shape) * self.sigma + self.mu
return flatOutput.reshape((-1, 1, self.x, self.y))
def encode(self, X):
self.x, self.y = X.shape[-2:]
return get_output_from_nn(self.encode_layer, (X-self.mu)/self.sigma)
# N.B after we do this, we won't be able to use the original autoencoder , as the layers are broken up
def split(self):
next_layer = self.ae.get_all_layers()[self.encode_layer_index + 1]
self.final_layer = self.ae.get_all_layers()[-1]
new_layer = layers.InputLayer(shape = (None, self.encode_layer.num_units))
next_layer.input_layer = new_layer
self.afterSplit = True
def decode(self, X):
assert self.afterSplit
flatOutput = get_output_from_nn(self.final_layer, X) * self.sigma + self.mu
# Evil hack: decode only knows the shape of the input space
# if you did a predict or encode previously. TODO Fix asap.
return flatOutput.reshape((-1, 1, self.x, self.y))
def main():
X_train, mu, sigma = loadCorpus()
# autoencoderFile = "../lasagne-demo/conv_ae.pkl" # Trained on the full mnist train dataset
autoencoderFile = "../lasagne-demo/face.big.pkl" # Trained on the ../face/SCUT-FBP/thumb.big dataset.
ae_raw = cPickle.load(open(autoencoderFile, 'r'))
autoencoder = Autoencoder(ae_raw, mu, sigma)
sampleIndices = map(int, sys.argv[1:])
assert len(sampleIndices)==2, "the tool expects two sample indices"
X_train = X_train[sampleIndices]
X_pred = autoencoder.predict(X_train)
print "ended prediction"
sys.stdout.flush()
nnbase.vis.get_random_images(X_train, X_pred)
autoencoder.split()
X_encoded = autoencoder.encode(X_train)
x0 = X_encoded[0]
x1 = X_encoded[1]
stepCount = 100
intervalBase = np.linspace(1, 0, num=stepCount)
intervalEncoded = np.multiply.outer(intervalBase, x0)+np.multiply.outer(1.0-intervalBase, x1)
X_decoded = autoencoder.decode(intervalEncoded)
nnbase.vis.get_picture_array(X_decoded, 10, 10, "interval")
intervalInputspace = np.multiply.outer(intervalBase, X_train[0])+np.multiply.outer(1.0-intervalBase, X_train[1])
nnbase.vis.get_picture_array(intervalInputspace, 10, 10, "interval-inputspace")
if __name__ == "__main__":
main()