-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathencoder.py
122 lines (92 loc) · 3.77 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import process_data
import math
import tensorflow as tf
import numpy as np
from functools import partial
LOSS_OUT_FILE = 'Epoch_Loss.txt'
process_data.process_wav()
# Learning rate
lr = 0.0001
# L2 regularization
l2 = 0.0001
inputs = 12348
hidden_1_size = 8400
hidden_2_size = 3440
hidden_3_size = 2800
# Change the epochs variable to define the
# number of times we iterate through all our batches
epochs = 1000
# Change the batch_size variable to define how many songs to load per batch
batch_size = 50
# Change the batches variable to change the number of batches you want per epoch
batches = 1
# Define our placeholder with shape [?, 12348]
X = tf.placeholder(tf.float32, shape=[None, inputs])
l2_regularizer = tf.contrib.layers.l2_regularizer(l2)
autoencoder_dnn = partial(tf.layers.dense,
activation = tf.nn.elu,
kernel_initializer = tf.contrib.layers.variance_scaling_initializer(),
kernel_regularizer= tf.contrib.layers.l2_regularizer(l2))
hidden_1 = autoencoder_dnn(X, hidden_1_size)
hidden_2 = autoencoder_dnn(hidden_1, hidden_2_size)
hidden_4 = autoencoder_dnn(hidden_2, hidden_3_size)
hidden_5 = autoencoder_dnn(hidden_4, hidden_2_size)
outputs = autoencoder_dnn(hidden_5, inputs, activation=None)
reconstruction_loss = tf.reduce_mean(tf.square(outputs-X))
reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_loss)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
def next_batch(c_batch, batch_size, sess):
ch1_arr = []
ch2_arr = []
wav_arr_ch1, wav_arr_ch2, sample_rate = process_data.get_next_batch(c_batch, batch_size, sess)
for sub_arr in wav_arr_ch1:
batch_size_ch1 = math.floor(len(sub_arr)/inputs)
sub_arr = sub_arr[:(batch_size_ch1*inputs)]
ch1_arr.append(np.array(sub_arr).reshape(batch_size_ch1, inputs))
for sub_arr in wav_arr_ch2:
batch_size_ch2 = math.floor(len(sub_arr)/inputs)
sub_arr = sub_arr[:(batch_size_ch2*inputs)]
ch2_arr.append(np.array(sub_arr).reshape(batch_size_ch2, inputs))
return np.array(ch1_arr), np.array(ch2_arr), sample_rate
##### Run training
with tf.Session() as sess:
init.run()
for epoch in range(epochs):
epoch_loss = []
print("Epoch: " + str(epoch))
for i in range(batches):
ch1_song, ch2_song, sample_rate = next_batch(i, batch_size, sess)
total_songs = np.hstack([ch1_song, ch2_song])
batch_loss = []
for j in range(len(total_songs)):
x_batch = total_songs[j]
_, l = sess.run([training_op, loss], feed_dict={X:x_batch})
batch_loss.append(l)
print("Song loss: " + str(l))
print("Curr Epoch: " + str(epoch) + " Curr Batch: " + str(i) + "/"+ str(batches))
print("Batch Loss: " + str(np.mean(batch_loss)))
epoch_loss.append(np.mean(batch_loss))
print("Epoch Avg Loss: " + str(np.mean(epoch_loss)))
if epoch % 1000 == 0:
ch1_song_new, ch2_song_new, sample_rate_new = next_batch(2, 1, sess)
x_batch = np.hstack([ch1_song_new, ch2_song_new])[0]
print("Sample rate: " + str(sample_rate_new))
orig_song = []
full_song = []
evaluation = outputs.eval(feed_dict={X: x_batch})
print("Output: " + str(evaluation))
full_song.append(evaluation)
orig_song.append(x_batch)
# Merge the nested arrays
orig_song = np.hstack(orig_song)
full_song = np.hstack(full_song)
# Compute and split the channels
orig_song_ch1 = orig_song[:math.floor(len(orig_song)/2)]
orig_song_ch2 = orig_song[math.floor(len(orig_song)/2):]
full_song_ch1 = full_song[:math.floor(len(full_song)/2)]
full_song_ch2 = full_song[math.floor(len(full_song)/2):]
# Save both the untouched song and reconstructed song to the 'output' folder
process_data.save_to_wav(full_song_ch1, full_song_ch2, sample_rate, orig_song_ch1, orig_song_ch2, epoch, 'output', sess)