Skip to content

Commit

Permalink
updating scripts to use pro.run() instead of pro.updateSVD()
Browse files Browse the repository at this point in the history
  • Loading branch information
pgupta29 committed Oct 31, 2021
1 parent 240e2fc commit c3bc070
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 127 deletions.
35 changes: 13 additions & 22 deletions notebooks/ieeg_seizure.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# ax.set_xlim((0, 10))

# %% proSVD
%%time
##%%time
# PROTOTYPE FOR DOING ANALYSIS ON ANY DATASET
# params
k = 10 # reduced dim
Expand All @@ -52,31 +52,22 @@

# init
A_init = data[:, :l1]
pro = proSVD(k, history=num_iters, trueSVD=True)
pro = proSVD(k, w_len=l, w_shift=None, decay_alpha=decay, history=num_iters, trueSVD=True)
pro.initialize(A_init)
projs, frac_vars, derivs = pro.run(data, l1)

# for svd and prosvd projections, variance explained
projs = [np.zeros((k, data.shape[1]-l1)) for i in range(2)] # subtract l1 - init proj
frac_vars = [np.zeros(projs[i].shape) for i in range(2)]
# derivatives
derivs = np.zeros((k, num_iters))

# run proSVD online
for i, t in enumerate(update_times):
dat = data[:, t:t+l]
pro.updateSVD(dat)
# getting proj and variance explained
for j, basis in enumerate([pro.U, pro.Q]):
projs[j][:, t:t+l] = basis.T @ dat
curr_proj_vars = projs[j][:, :t-l1].var(axis=1)[:, np.newaxis]
total_vars = data[:, :t].var(axis=1)
frac_vars[j][:, t:t+l] = curr_proj_vars / total_vars.sum()
# proSVD basis derivatives
derivs[:, i] = np.linalg.norm(pro.curr_diff, axis=0)
fig, ax = plt.subplots()
ax.plot(derivs.T)
u, s, v = np.linalg.svd(data, full_matrices=False)
u = u[:, :k]
basis = pro.Q
res1 = np.linalg.norm(basis - u @ u.T @ basis)
res2 = np.linalg.norm(u - basis @ basis.T @ u)

print(res1, res2)

# %% incSFA (emphasis on SLOW)
%%time
##%%time
num_iters = 1000

incsfa = mdp.nodes.IncSFANode(input_dim=data.shape[0], output_dim=k)
Expand All @@ -97,7 +88,7 @@
print('incSFA:\t', np.mean(times)*1000, np.std(times)*1000)

# %% CCI-PCA
%%time
##%%time
# num_iters = 1000

ccipca = mdp.nodes.CCIPCANode(input_dim=data.shape[0], output_dim=k)
Expand Down
139 changes: 69 additions & 70 deletions notebooks/pandarinath2018_reaching.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_spikes(file_locs, bin_size=15):

file_dir = '/hdd/pgupta/lfads-neural-stitching-reproduce/export_v05_broadbandRethreshNonSorted_filtered/'
files = os.listdir(file_dir) # all sessions
files = [files[0]] # 1 session
files = [files[1]] # 1 session
# files = files[:2] # more sessions
file_locs = [file_dir + files[i] for i in range(len(files))]

Expand Down Expand Up @@ -120,42 +120,82 @@ def get_spikes(file_locs, bin_size=15):

#%% doing streamingSVD on smoothed spikes, projecting spikes onto the learned subspace

# ref_basis = pro.Q # None

# # PROTOTYPE FOR DOING ANALYSIS ON ANY DATASET
# # params
# k = 6 # reduced dim
# l = 10 # num cols processed per iter
# decay = 1 # 'forgetting' to track nonstationarity. 1 = no forgetting
# l1 = k # num cols init
# num_iters = np.floor((data.shape[1] - l1) / l).astype('int') # num iters to go through data once
# update_times = np.arange(1, num_iters) * l # index of when updates happen

# # init
# A_init = data[:, :l1]
# pro = proSVD(k, history=num_iters, trueSVD=True)

# # init strategies:
# # svd
# # u, s, v = np.linalg.svd(A_init, full_matrices=False)
# # Q_init = u[:, :k] @ np.diag(s[:k]) # None
# # B_init = v[:k, :] # None
# # random orthogonal decomposition
# # Q_init = np.random.normal(size=(data.shape[0], k))
# # Q_init, _ = np.linalg.qr(Q_init)
# # B_init = Q_init.T @ A_init
# # init from previous pro (make sure it exists!)
# # Q_init, B_init = Q_prev, B_prev
# # pro.initialize(A_init, Q_init=Q_init, B_init=B_init)
# pro.initialize(A_init) # regular init

# # for svd and prosvd projections, variance explained
# projs = [np.zeros((k, data.shape[1]-l1)) for i in range(2)] # subtract l1 - init proj
# frac_vars = [np.zeros(projs[i].shape) for i in range(2)]
# # derivatives
# derivs = np.zeros((k, num_iters))

# # run proSVD online
# for i, t in enumerate(update_times):
# dat = data[:, t:t+l]
# pro.updateSVD(dat, ref_basis=ref_basis)
# # getting proj and variance explained
# for j, basis in enumerate([pro.U, pro.Q]):
# projs[j][:, t:t+l] = basis.T @ dat
# curr_proj_vars = projs[j][:, :t-l1].var(axis=1)[:, np.newaxis]
# total_vars = data[:, :t].var(axis=1)
# frac_vars[j][:, t:t+l] = curr_proj_vars / total_vars.sum()
# # proSVD basis derivatives
# derivs[:, i] = np.linalg.norm(pro.curr_diff, axis=0)

#%%
# PROTOTYPE FOR DOING ANALYSIS ON ANY DATASET
# params
k = 6 # reduced dim
l = 6 # num cols processed per iter
l = 10 # num cols processed per iter
decay = 1 # 'forgetting' to track nonstationarity. 1 = no forgetting
l1 = k # num cols init
num_iters = np.floor((data.shape[1] - l1) / l).astype('int') # num iters to go through data once
update_times = np.arange(1, num_iters) * l # index of when updates happen

# init
A_init = data[:, :l1]
pro = proSVD(k, history=num_iters, trueSVD=True)
pro = proSVD(k, w_len=l, w_shift=None, decay_alpha=decay, history=num_iters, trueSVD=True)
pro.initialize(A_init)
projs, frac_vars, derivs = pro.run(data, l1)

# for svd and prosvd projections, variance explained
projs = [np.zeros((k, data.shape[1]-l1)) for i in range(2)] # subtract l1 - init proj
frac_vars = [np.zeros(projs[i].shape) for i in range(2)]
# derivatives
derivs = np.zeros((k, num_iters))

# run proSVD online
for i, t in enumerate(update_times):
dat = data[:, t:t+l]
pro.updateSVD(dat)
# getting proj and variance explained
for j, basis in enumerate([pro.U, pro.Q]):
projs[j][:, t:t+l] = basis.T @ dat
curr_proj_vars = projs[j][:, :t-l1].var(axis=1)[:, np.newaxis]
total_vars = data[:, :t].var(axis=1)
frac_vars[j][:, t:t+l] = curr_proj_vars / total_vars.sum()
# proSVD basis derivatives
derivs[:, i] = np.linalg.norm(pro.curr_diff, axis=0)
u, s, v = np.linalg.svd(data, full_matrices=False)
u = u[:, :k]
basis = pro.Q
res1 = np.linalg.norm(basis - u @ u.T @ basis)
res2 = np.linalg.norm(u - basis @ basis.T @ u)

print(res1, res2)

fig, ax = plt.subplots()
ax.plot(derivs.T)

#%%
# derivs = get_derivs(pro)
dists = np.linalg.norm(pro.Qs - pro.Q[:, :, np.newaxis], axis=0)
t = derivs.shape[1]

fig, ax = plt.subplots(1, 3, figsize=(12,4))
Expand Down Expand Up @@ -203,57 +243,16 @@ def get_spikes(file_locs, bin_size=15):

#%% doing full SVD
## %%time
Us, Ss = get_streamingSVD(data, data.shape[0], l1, l, num_iters, window=False)
Us, Ss = get_streamingSVD(data, k, l1, l, num_iters, window=False)

#%% looking at stuff
vals = (Ss[:6, :]**2) / (Ss[:, :]**2).sum(axis=0)
fig, ax = plt.subplots()
for i in range(2):
plt.plot(frac_vars[i].sum(axis=0)[:t])
plt.plot(vals.sum(axis=0), ls='--', color='k')
plt.set(xlabel='bins seen', ylabel='fraction of variance explained')
plt.legend(labels=['Q', 'U - pro', 'U - dumb streaming'])

# %% projecting each timepoint of neural activity onto the subspace learned for that chunk
all_projs_stream = np.zeros((pro.Qs.shape[1], pro.Qs.shape[2]*l))
for i in range(num_iters):
Q = pro.Qs[:, :, i] # has first k components
if i == 0:
curr_neural = data[:, :l1]
all_projs_stream[:, :l1] = Q.T @ curr_neural
t = l1
else:
if t + l > Us.shape[2] * l:
break
# aligning neural to Q (depends on l1 and l)
curr_neural = data[:, l1+((i-1)*l):l1+(i*l)]

# projecting curr_neural onto curr_Q (our tracked subspace) and on full svd u
all_projs_stream[:, t:t+l] = Q.T @ curr_neural
t += l

all_projs_stream_true = np.zeros((Us.shape[1], Us.shape[2]*l))
for i in range(Us.shape[2]):
Q = Us[:, :, i]
if i == 0:
curr_neural = data[:, :l1]
all_projs_stream_true[:, :l1] = Q.T @ curr_neural
t = l1
else:
if t + l > Us.shape[2] * l:
break
# aligning neural to Q (depends on l1 and l)
curr_neural = data[:, l1+((i-1)*l):l1+(i*l)]

# projecting curr_neural onto curr_Q (our tracked subspace) and on full svd u
all_projs_stream_true[:, t:t+l] = Q.T @ curr_neural
t += l

num_remove = all_projs_stream.shape[1] - t
num_remove = all_projs_stream_true.shape[1] - t

if num_remove > 0:
all_projs_stream_true = all_projs_stream_true[:, :-num_remove]
all_projs_stream = all_projs_stream[:, :-num_remove]
ax.plot(frac_vars[i].sum(axis=0)[:t])
ax.plot(vals.sum(axis=0), ls='--', color='k')
ax.set(xlabel='bins seen', ylabel='fraction of variance explained')
ax.legend(labels=['Q', 'U - pro', 'U - dumb streaming'])

# np.savez('neurips/ssSVD_results.npz', l1=l1, k=k, spikes=spikes, bin_size=bin_size, Us=Us, Qs=Qs)

Expand Down
70 changes: 35 additions & 35 deletions notebooks/toy_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from scipy.ndimage import gaussian_filter1d

from proSVD.proSVD import proSVD
import proSVD.utils as utils

#%% switching between different spectral regimes
np.random.seed(100)
n = 6
embed_dim = 60 # "neurons"
t_latent = [300] # time
regime_changes = 10
t_latent = [200] # time
regime_changes = 15

fig, ax = plt.subplots(len(t_latent), 3, figsize=(20,4*len(t_latent)),
gridspec_kw={'hspace': 0.5, 'wspace': 0.3})
Expand All @@ -30,7 +31,7 @@
[0, 0, 1, 1, 0, 1]])

num_regimes = Ss_true.shape[0]
noise = 0 # np.random.normal(scale=.05, size=(n,curr_t))
noise = np.random.normal(scale=.05, size=(n,curr_t))
regimes = [[] for i in range(num_regimes)]
for i in range(num_regimes):
S_true = Ss_true[i, :]
Expand Down Expand Up @@ -62,39 +63,29 @@
k = n
l1 = xs.shape[0]
l = 1
decay = 1
num_iters = np.ceil((xs.shape[1] - l1) / l).astype('int')
update_times = np.arange(1, num_iters) * l # index of when updates happen

# window for explained vars (TODO: also use this for keeping window of history/projs)
window = int(curr_t / 1)

# create object
pro = proSVD(k, w_len=l, w_shift=None, decay_alpha=decay, history=num_iters, trueSVD=True)
A_init = xs[:, :l1]
pro = proSVD(k, history=num_iters, trueSVD=True)
pro.initialize(A_init)

projs = [np.zeros((k, xs.shape[1]-l1)) for i in range(2)] # subtract l1 - init proj
frac_vars = [np.zeros(projs[i].shape) for i in range(2)]
derivs = np.zeros((k, num_iters))

for i, t in enumerate(update_times):
# proSVD update
dat = xs[:, t:t+l]
pro.updateSVD(dat)
derivs[:, i] = np.linalg.norm(pro.curr_diff, axis=0)

# getting proj and variance explained
for j, basis in enumerate([pro.U, pro.Q]):
projs[j][:, t:t+l] = basis.T @ dat
# var explained over window
if window > 0 and (t - l1) > window: # process more data than window before getting var
curr_proj_vars = projs[j][:, t-l1-window:t-l1].var(axis=1)[:, np.newaxis]
total_vars = xs[:, t-window:t].var(axis=1)
frac_vars[j][:, t:t+l] = curr_proj_vars / total_vars.sum()
else: # cumulative variance
curr_proj_vars = projs[j][:, :t].var(axis=1)[:, np.newaxis]
total_vars = xs[:, l1:].var(axis=1)
frac_vars[j][:, t:t+l] = curr_proj_vars / total_vars.sum()

# init strategies:
# svd
# u, s, v = np.linalg.svd(A_init, full_matrices=False)
# Q_init = u[:, :k] # None
# B_init = np.diag(s[:k]) @ v[:k, :] # None
# random orthogonal decomposition
# Q_init = np.random.normal(size=(embed_dim, k))
# Q_init, B_init = np.linalg.qr(Q_init)
# B_init = Q_init.T @ A_init
# pro.initialize(A_init, Q_init=Q_init, B_init=B_init)
pro.initialize(A_init) # regular init
# run it
projs, frac_vars, derivs = pro.run(xs, l1)
Qts, Scoll, Qcoll = (pro.Us, pro.Ss, pro.Qs)


Expand Down Expand Up @@ -150,7 +141,7 @@

# sum of vars for particular dimensions
dims_to_sum = int(n / num_regimes) # assuming first regime is in first n / num_regimes dims
labels = ['sum first 3 dims', 'sum last 3 dims']
labels = ['sum 1,2 dims', 'sum 3,4 dims', 'sum 5,6 dims']
start = 0
for p in range(num_regimes):
currax.plot(frac_vars[j][start:start+dims_to_sum, :t].sum(axis=0), lw=1.5,
Expand All @@ -174,19 +165,28 @@

# dotted line indicating regime change
# signal

for j, currax in enumerate(ax[row, :]):
z=1 if j==0 else l
for lineloc in [(curr_t*(i+1))/z for i in range(regime_changes+1)]:
lineloc += l1
for lineloc in [(curr_t*(i+1))/l for i in range(regime_changes+1)]:
if j == 0:
lineloc *= l
currax.axvline(lineloc, ls='--', color='grey', alpha=.6)


ax[0, 1].legend(loc='upper right')
axtwin.legend()
plt.suptitle('streaming SVD does not clearly identify spectral regime changes',
y=1.12)

# plt.savefig('stream_svd_problem.png', bbox_inches='tight')

u, s, v = np.linalg.svd(xs, full_matrices=False)
u = u[:, :k]
basis = pro.U
res1 = np.linalg.norm(basis - u @ u.T @ basis)
res2 = np.linalg.norm(u - basis @ basis.T @ u)

print(res1, res2)

fig, ax = plt.subplots()
ax.plot(derivs.T)

# %%

0 comments on commit c3bc070

Please sign in to comment.