-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRLreinforceXXchain_actionedtime.py
403 lines (326 loc) · 14.8 KB
/
RLreinforceXXchain_actionedtime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 29 20:05:32 2020
@author: irtazakhalid
"""
import numpy as np
import scipy as sp
import scipy.linalg
class Environment(object):
"simple XX spin chain environment with either a ring or linear topology"
def __init__(self, nspin, in_spin, out_spin, action_vector=None, final_time = 6,
topo="linear", timestep_res=0.01, max_time=30, bmin=-20, bmax=20,
fid_noisy=False, ham_noisy=False, draws=20, adaptive=False, adp_tol=0.05,
noise=0.05, transfer_learning=False, heisenberg_int: bool = False,
use_fixed_ham = False, opt_train_size = 100):
self.Nspin = nspin
self.in_spin = in_spin
self.out_spin = out_spin
self.topo = topo
self.heisenberg_int = heisenberg_int
self.timestep = 0
self.tres = timestep_res
self.action = np.zeros(self.Nspin) if type(action_vector)==type(None) else np.diag(action_vector) # create a diagonal matrix of actions on individual spins
if transfer_learning:
self.sys = (self.system_hamiltonian() + self.structured_perturabation(0.1))
mask = np.ones_like(self.sys)- np.eye(self.Nspin)
# print(mask)
self.sys = self.sys*mask
print(f"old ham {self.sys}")
else:
self.sys = self.system_hamiltonian()
self.in_state = self.state_vector(self.in_spin)
self.out_state = self.state_vector(self.out_spin)
self.maxtime = max_time
self.final_time = self.maxtime # min(abs(final_time), self.maxtime)
self.min = bmin
self.max = bmax
self.noise = noise
# print(self.in_state)
# print(self.out_state)
self.fid_noisy = fid_noisy
self.ham_noisy = ham_noisy
self.draws=draws
self.adaptive = adaptive
self.adp_func_calls_increment = self.draws
self.adp_var_tol = adp_tol
self.tf = 0
self.use_fixed_ham = use_fixed_ham
self.train_size = opt_train_size
self.randH, self.randH_test = self.randHset_constructor(train_size=self.train_size)
def randHset_constructor(self, train_size=1000, test_size=10000):
# TODO cache this and make this a universal training set
np.random.seed(4)
out_train = np.zeros((train_size, self.Nspin, self.Nspin), dtype="complex128")
for i in range(train_size):
H = self.sys.copy()
H += self.structured_perturabation(self.noise)
out_train[i] = H
out_test = np.zeros((test_size, self.Nspin, self.Nspin), dtype="complex128")
for i in range(test_size):
H = self.sys.copy()
H += self.structured_perturabation(self.noise)
out_test[i] = H
return out_train, out_test
def reinit_sys_hamiltonian(self):
self.sys = (self.system_hamiltonian() + self.structured_perturabation(.1))
mask = np.ones_like(self.sys)- np.eye(self.Nspin)
# print(mask)
self.sys = self.sys*mask
print(f"new ham: {self.sys}")
def system_hamiltonian(self):
J = np.zeros((self.Nspin,self.Nspin));
for l in range (1,self.Nspin):
J[l-1,l] = 1
J[l,l-1] = 1
if self.topo == "ring":
J[self.Nspin-1,0] = 1
J[0,self.Nspin-1] = 1
if self.heisenberg_int:
t = 0.5*np.triu(J).sum().sum()*np.ones(self.Nspin) - np.sum(J, axis=1)
J += np.diag(t)
return J
def control_hamiltonians(self):
"biased bases"
CC = []
for k in range(0,self.Nspin):
CM = np.zeros((self.Nspin,self.Nspin))
CM[k,k] = 1*self.biases[k]
CC.append(CM)
return CC
def state_vector(self, occ):
psi = np.zeros(self.Nspin)
psi[occ] = 1
return psi
# as density matrices, probably not needed atm
def input_state(self):
rho0 = np.zeros((self.Nspin,self.Nspin))
rho0[self.in_spin,self.in_spin] = 1
return rho0
def output_state(self):
rho1 = np.zeros((self.Nspin,self.Nspin))
rho1[self.out_spin,self.out_spin] = 1
return rho1
def structured_perturabation(self, noise):
z=np.zeros((self.Nspin,self.Nspin))
for i in range(self.Nspin):
z[i][i] = np.random.normal(scale=noise)
nn, nnn = np.random.normal(scale=noise), 0 # np.random.normal(scale=0.05) # nearest neighbour and next nearest neighbour
if i >= 1:
z[i][i-1]=nn
z[i-1][i]=nn
if i >=2:
z[i][i-2]=nnn
z[i-2][i]=nnn
return z
def change_sys_ham(self, default_variation = 0.1):
"""check if there is such a thing as haar random uniform for spin chain hams
otherwise just add small gaussian perturbations to the og self.sys"""
for i in range(self.Nspin):
nn = np.random.normal(scale=default_variation) # nearest neighbour
if i >= 1:
self.sys[i][i-1]+=nn
self.sys[i-1][i]+=nn
def state(self, action=None):
action = self.action if type(action) == type(None) else action
self.timestep = abs(self.timestep)
self.timestep = self.timestep%self.maxtime if self.timestep > self.maxtime else self.timestep
# using training dataset
if self.use_fixed_ham:
ham_list = self.randH[:self.train_size]
out = None
for H in ham_list:
U = sp.linalg.expm(-1j*(self.timestep)*(H+action))
if out is None:
out = U
else:
out += U
self.in_state = np.matmul(out/len(ham_list), self.in_state)
return
elif not self.ham_noisy:
H = self.sys + action
else:
H = self.sys + action + self.structured_perturabation(self.noise) # add 5% gaussian noise to couplings+nn+nnn
# print(H)
# print(self.timestep, np.diag(action))
U = sp.linalg.expm(-1j*self.timestep*H)
#print(np.allclose(np.matmul(np.conj(U).T, U), np.eye(self.Nspin))) # sanity check. am i producing unitaries as I step?
self.in_state = np.matmul(U, self.in_state)
# self.timestep += self.tres
return U
def reset(self):
self.timestep = 0
self.in_state = self.state_vector(self.in_spin)
self.action = np.zeros((self.Nspin, self.Nspin))
#print(self.action)
U = self.state()
return self.action
def fidelity(self):
overlap = np.matmul(np.conj(self.in_state).T, self.out_state)
fid = np.conj(overlap)*overlap
# print(fid)
assert np.allclose(np.imag(fid), 0)==True, "fid not real!" # sanity: correct fid is always real
# if self.timestep < self.final_time: # try out the Bukov et. al. (2018) approach
# if fid > 0.5:
# return np.real(fid)
# return 0
# if fid < 0.9:
# return 0
if not self.fid_noisy:
return np.real(fid)
else:
sample = np.random.binomial(self.draws, fid)
if not self.adaptive:
return sample / self.draws
else:
a,b = 0.5, 0.5 # jeffrey's prior for the conjugate dist. beta
mean = a / (a+b) # beta mean
var = mean*(1-mean) / (a+b+1) # beta var
# print(mean, np.sqrt(var))
while np.sqrt(var) > self.adp_var_tol:
s = np.random.binomial(self.draws, fid)
a+=s
b+=(self.draws-s)
mean = (a+s) / (a+b+self.draws)
var = mean*(1-mean) / (a+b+self.draws+1)
self.adp_func_calls_increment += self.draws
# print(mean, np.sqrt(var), self.adp_func_calls_increment)
return mean
def _true_fid_single(self, action, base_H=None, timestep_n=None):
if base_H is None:
base_H = self.sys.copy()
timestep_n=self.timestep
H = self.sys + action
U = sp.linalg.expm(-1j*timestep_n*H) # non-noisy ham
true_in_state = np.matmul(U, self.in_state)
overlap = np.matmul(np.conj(true_in_state).T, self.out_state)
fid = np.conj(overlap)*overlap
return np.real(fid)
def true_fid(self, action, timestep_n=None):
if self.use_fixed_ham:
size=len(self.randH_test)
fids = np.zeros(size)
for rep in range(size):
fids[rep] = self._true_fid_single(action, base_H=self.randH_test[rep], timestep_n=timestep_n)
out = fids.mean()
return out
else:
return self._true_fid_single(action)
def normalize(self):
"remove redundancy in the parameter space defined by the constraints in __init__"
self.action = self.action%np.diag(np.sign(self.action)*self.max) if (np.abs(self.action) > self.max).any() else self.action
self.timestep = abs(self.timestep)
self.timestep = self.timestep%self.maxtime if self.timestep > self.maxtime else self.timestep
def step(self, action):
self.action += action
self.action = self.action%np.diag(np.sign(self.action)*self.max) if (np.abs(self.action) > self.max).any() else self.action # check if i need to do mod negative, but atm b is symmetric about 0
try:
if not self.use_fixed_ham:
self.tf = self.true_fid(self.action)
self.state(self.action) # evolve the state
reward = self.fidelity()
done_flag = True if self.timestep > self.final_time else False
self.in_state = self.state_vector(self.in_spin) # reset the instate to get controllers that work at the end
#next_state = unitary
#print(self.action)
return self.action, reward, done_flag
except ValueError as e:
print(e)
return np.zeros_like(self.action), 0, False
def timeout(time_out):
"time out after time_out seconds using this decorator"
def timeout2(func):
import time as tt
start=tt.time()
def method_executioner(*args, **kwargs):
if tt.time()-start > time_out:
raise AssertionError("timeout!")
return func(*args, **kwargs)
return method_executioner
return timeout2
import unittest
class Envtest(unittest.TestCase):
"could add more tests from Newton based algorithms for more sanity"
def test_one_step_fid_correctness(self):
"sanity check 1"
action = np.array([ 9.76909983, 10.65815206, 10.65467358 , 9.71995292, -12.,
8.69457352 , 12. , -11.77314325, -11.29782006 , 5.27449319,])
final_time = 25.13468797
env = Environment(10, 0, 3, np.zeros(10), final_time=final_time, timestep_res=final_time)
env.reset()
env.timestep = final_time
_, fid, _ = env.step(np.diag(action))
self.assertAlmostEqual(fid, 0.995, places = 2)
# another one
action = np.array([-0.20574245, 4.3713235, -0.30473375])
final_time = 22.035034
env = Environment(3, 0, 2, np.zeros(3),)
env.reset()
env.timestep = final_time
_, fid, _ = env.step(np.diag(action))
# print(env.action, env.timestep)
self.assertAlmostEqual(fid, 0.90, places = 2)
# another one
action = np.array([2.9160861365962774, 4.385934774763882, 2.9311789427883923,
9.826275581493974, 9.276727781863883, 5.071161912055686,])
final_time = 3.6651542489416897
env = Environment(6, 0, 2, np.zeros(6),)
env.reset()
env.timestep = final_time
_, fid, _ = env.step(np.diag(action))
# print(env.action, env.timestep)
self.assertAlmostEqual(fid, 0.9025, places = 2)
# bad one
action= np.array([ 3.86111206, -0.8067965 , 3.86887524, 5.8814842 , -3.03354326,
7.42084848])
final_time = 24.83387072
env = Environment(6, 0, 2, np.zeros(6),)
env.reset()
env.timestep = final_time
_, fid, _ = env.step(np.diag(action))
self.assertTrue(fid < 0.9025)
def test_hermitianity_of_structured_perturbation(self):
env = Environment(20,0,6, np.zeros(20))
z = env.structured_perturabation(env.noise) + env.sys
self.assertTrue(np.allclose(z, np.conjugate(z.T)), "Perturbed ham must be real!")
another_z = env.structured_perturabation(env.noise) + env.sys
self.assertFalse(np.all(z==another_z), "don't fix the perturbation!")
self.assertTrue(np.allclose(another_z, np.conjugate(another_z.T)), "Perturbed ham must be real!")
def test_plot_fid_vs_true_fid_for_adaptive_protocol(self):
env = Environment(5,0,3, np.zeros(5), fid_noisy=True,
adaptive=True, draws=5, adp_tol=0.05)
fid = 0.8
ov=np.sqrt(fid)
env.in_state = np.array([ov,0,0,0,0])
env.out_state = np.array([1,0,0,0,0])
# self.assertTrue(abs(env.fidelity()-fid)<0.05)
env.fidelity()
self.assertTrue(env.adp_func_calls_increment > 5)
# more intense visualization!
import matplotlib.pyplot as plt
plt.figure()
fid_space = np.linspace(0,1,100)
for tol in [0.01, 0.02, 0.03, 0.06,0.1,0.2]:
calls = []
for fid in fid_space:
env.adp_func_calls_increment = 5
ov=np.sqrt(fid)
env.in_state = np.array([ov,0,0,0,0])
env.out_state = np.array([1,0,0,0,0])
env.adp_var_tol = tol
env.fidelity()
calls.append(env.adp_func_calls_increment)
plt.plot(calls, fid_space, label=f"tolerated var {tol}")
plt.xlabel("draws or function calls")
plt.ylabel("fidelity")
plt.legend()
# seems to be symmetric about 0.5 in the amount of apetite for func_calls; makes sense,
def test_timeout(self):
@timeout(0) # just throw an exception
def timeout_function():
pass
self.assertRaises(AssertionError, timeout_function) # catch it. complain if not caught
if __name__ == '__main__':
unittest.main()
# raise AssertionError("custom break point")