-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostproc.py
executable file
·314 lines (261 loc) · 11.6 KB
/
postproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
import os
import sys
import json
import pyvista as pv
import numpy as np
from tqdm import tqdm
from funcs import r2_score, mae_score, standardise_minmax, unstandardise_minmax, proc_bump, evaluate_subspaces, predict_design, parse_designs, rebuild_fine
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import pickle
from joblib import Parallel, delayed, cpu_count
basedir = os.getcwd()
# Read json file
inputfile = sys.argv[1]
with open(inputfile) as json_file:
json_dat = json.load(json_file)
# Parse json options
casename = json_dat['casename']
datadir = json_dat['datadir']
if datadir == '.':
datadir = basedir
vars = json_dat['vars'] #list of vars indexes to find subspaces for. (See ordering in proc_dat.py)
n_jobs = json_dat['njobs']
subdim = json_dat['subdim']
if (("points" in json_dat)==True): # Points to plot summary plots at (put None to skip)
plot_pts = json_dat['points']
else:
plot_pts = None
if (("designs" in json_dat)==True): # Designs to write to vtk for (put None to skip)
designs_proc = json_dat['designs']
else:
designs_proc = None
designmesh = False
if (("sample to design" in json_dat)==True): designmesh = json_dat['sample to design']
test = False
if (("plot test" in json_dat)==True): test = json_dat['plot test']
test_score = False
if (("test score" in json_dat)==True): test_score = json_dat['test score']
label = False
if (("label" in json_dat)==True): label = json_dat['label']
field_score = False
if (("field score" in json_dat)==True): field_score = json_dat['field score']
rebuild = False
if designs_proc is not None: rebuild = True
designs_train, designs_test = parse_designs(json_dat)
# housekeeping
saveloc = os.path.join(datadir,'POSTPROC_DATA',casename)
os.makedirs(saveloc,exist_ok=True)
os.makedirs(os.path.join(saveloc,'Figures'),exist_ok=True)
ncores = cpu_count()
if(n_jobs==-1): n_jobs = ncores
if (n_jobs>ncores): quit('STOPPING: n_jobs > available cores')
zp = 1.96
# Read in subspace weights and grf
dataloc = os.path.join(datadir,'SUBSPACE_DATA',casename)
os.chdir(dataloc)
#mysubspaces = np.load('SS.npy',allow_pickle=True)
print('Loading mygrf.pickle from '+ dataloc)
with open("mygrf.pickle","rb") as pickle_file:
mygrfs = pickle.load(pickle_file)
# Read in bump data (training)
dataloc = os.path.join(datadir,'PROCESSED_DATA',casename)
os.chdir(dataloc)
X = np.load('X.npy')
X, min_X, max_X = standardise_minmax(X)
# Read in bump data (test)
if test:
X_test = np.load('X_test.npy')
X_test,*_ = standardise_minmax(X_test,min_value=min_X,max_value=max_X)
# Read in dtag array and true field data
print('Loading D.npy from '+ dataloc)
data = np.load('D.npy',allow_pickle=True)
if test:
print('Loading D_test.npy from '+ dataloc)
data_test = np.load('D_test.npy',allow_pickle=True)
num_pts = data.shape[0]
num_designs = np.array([np.shape(point.D)[0] for point in data])
num_bumps = X.shape[1]
num_vars = data[0].D.shape[1]
print('Max number of designs = %d' % np.max(num_designs))
print('Min number of designs = %d' % np.min(num_designs))
print('Number of nodes = %d' %(num_pts))
print('Number of bump functions = %d' %(num_bumps))
if max(vars) > num_vars-1: quit('STOPPING: A vars index is greater than the number of arrays stored in D')
# Read in Base vtk file
file = os.path.join(datadir,'CFD_DATA',casename,'basegrid.vtk')
#file = os.path.join(datadir,'CFD_DATA',casename,'baseline.vtk')
basegrid = pv.read(file)
gridcopy = basegrid.copy(deep=True)
coords = basegrid.points[:,:2]
vtk_pts = basegrid.n_points
# Read in idx_fine and idx_coarse indexes
print('Loading subsampling mapping...')
loadfile = np.load('covar.npz')
idx_fine = loadfile['idx_fine']
idx_coarse = loadfile['idx_coarse']
if rebuild: Dmean = loadfile['Dmean']
# Check dimensions
Nfine = len(idx_fine)
Ncoarse = len(idx_coarse)
N = Nfine + Ncoarse
if (Ncoarse!=num_pts): quit('STOPPING: Number of points in np arrays != len(idx_coarse)')
if (N!=vtk_pts): quit('STOPPING: Sum of points in idx arrays != vtk points')
# Read in covariance data if needed
if rebuild:
print('Reading in covariance data for variable...')
Sigma = np.empty([N,N,len(vars)])
for j, var in enumerate(vars):
print(var)
# Sigma[:,:,j] = np.load('covar_%d.npy' %var)
Sigma[:,:,j] = np.load('covar_lr_%d.npy' %var)
#############################################
# Sufficient summary plots at selected coords
#############################################
plot_pts = None
if plot_pts is not None:
coarse_coords = coords[idx_coarse,:]
# Get bump x locations from 0th design
design = 'design_0000'
cfd_data = os.path.join(datadir,'CFD_DATA',casename,design)
os.chdir(cfd_data)
bump,lower,upper = proc_bump('deform_hh.cfg')
for p, pt in enumerate(plot_pts):
dist = coarse_coords - pt
dist = np.sqrt(dist[:,0]**2.0 + dist[:,1]**2.0)
loc = np.argmin(dist)
point = data[loc]
print('Requested point = (%.4f,%.4f)' %(pt[0],pt[1]))
print('Nearest sample point = (%.4f,%.4f)' %(coarse_coords[loc,0],coarse_coords[loc,1]))
indices = point.indices
x = X[indices,:]
if test:
point_test = data_test[loc]
indices_test = point_test.indices
x_test = X_test[indices_test,:]
if mygrfs[loc,vars[0]] is None:
print('No grf at this point, skipping...')
continue
for var in vars:
mygrf = mygrfs[loc,var]
M = mygrf.M
u = x @ M
y = point.D[:,var]
# Summary plot
fig, ax = plt.subplots()
fig.suptitle('Var %d, Point = (%.4f,%.4f)' %(var,coarse_coords[loc,0],coarse_coords[loc,1]))
ax.set_xlabel('$\mathbf{w}^T \mathbf{x}$')
ax.set_ylabel('Var %d' %var)
ax.plot(u,y,'C0o',ms=8,mec='k',mew=1,alpha=0.6,label='Training') # Plot real field values
if test:
u_test = x_test @ M
y_test = point_test.D[:,var]
ax.plot(u_test,y_test,'C2o',ms=8,mec='k',mew=1,alpha=0.6,label='Test') # Plot real field values
if label:
for i,d in enumerate(point.indices):
design = designs_train[d]
ax.annotate('%d' % design,[u[i],y[i]],color='C0')
if test:
for i,d in enumerate(point_test.indices):
design = designs_test[d]
print(design,i)
ax.annotate('%d' % design,[u_test[i],y_test[i]],color='C2')
ugrf = np.linspace(-2.25,2.25,50)
y_mean, y_std = mygrf.predict(ugrf.reshape(-1,1),return_std=True)
ax.plot(ugrf,y_mean,'C3-',lw=3,label='Mean')
y_std *= 1.96
ax.fill_between(ugrf,y_mean-y_std,y_mean+y_std,color='C2',label='$\sigma$',alpha=0.3)
# Annotate with mae and r2 score
ypred = mygrf.predict(u,return_std=False)
mae = mae_score(y,ypred)
r2 = r2_score(y,ypred)
if test:
ypred_test = mygrf.predict(u_test,return_std=False)
mae_test = mae_score(y_test,ypred_test)
r2_test = r2_score(y_test,ypred_test)
ax.set_title('Train/Test MAE = %.2g/%.2g, Train/Test $R^2$ = %.3f/%.3f' %(mae,mae_test,r2,r2_test))
else:
ax.set_title('Train MAE = %.2g, Train $R^2$ = %.3f' %(mae,r2))
ax.legend()
filename = 'summary_pt%d_var%d.pickle' %(p,var)
filename = os.path.join(saveloc,'Figures',filename)
with open(filename,'wb') as f:
pickle.dump(fig,f)
###################################################################
# Compute averaged accuracy metrics for training (and test) designs
###################################################################
if field_score:
print('Evaluating subspace grfs at %d nodes...' % num_pts)
for var in vars:
print('Variable index %d...' % var)
results = Parallel(n_jobs=n_jobs)(delayed(evaluate_subspaces)(X, mygrfs[j,var], data[j],var) for j in tqdm(range(num_pts)))
r2 = np.array([item[0] for item in results])
mae = np.array([item[1] for item in results])
print('Average training r2 score = %.3f' %(np.nanmean(r2)))
print('Average training mae = %.4f' %(np.nanmean(mae)))
if (test_score):
results = Parallel(n_jobs=n_jobs)(delayed(evaluate_subspaces)(X_test, mygrfs[j,var], data_test[j],var) for j in tqdm(range(num_pts)))
r2 = np.array([item[0] for item in results])
mae = np.array([item[1] for item in results])
print('Average test r2 score = %.3f' %(np.mean(r2[r2>=-1])))
print('Average test mae = %.4f' %(np.mean(mae[mae>=-1])))
# Save to vtk point clouds
points = basegrid.points[idx_coarse]
point_cloud = pv.PolyData(points)
point_cloud['r2_var'+str(var)] = np.array(r2)
point_cloud['mae_var'+str(var)] = np.array(mae)
os.chdir(saveloc)
point_cloud.save('field_scores.vtk')
##############################################################
# Save predicted fields and errors to vtk for selected designs
##############################################################
if designs_proc is not None:
print('Predicting fields for %d designs...' % len(designs_proc))
points = basegrid.points[idx_coarse]
for i in designs_proc:
design = 'design_%04d' % i
print(design)
cfd_data = os.path.join(datadir,'CFD_DATA',casename,design)
os.chdir(cfd_data)
if designmesh:
designgrid = pv.read('flow.vtk')
else:
designgrid = pv.read('flow_base.vtk')
point_cloud = pv.PolyData(points)
Xi,*_ = proc_bump('deform_hh.cfg')
Xi,*_ = standardise_minmax(Xi.reshape(1,-1), min_value=min_X, max_value=max_X) #Standardise with same min/max as original training x set
for j, var in enumerate(vars):
# Use grf for prediction
print("'Evaluating grf's at coarse points for variable %d..." %var)
results = Parallel(n_jobs=n_jobs)(delayed(predict_design)(Xi, mygrfs[n,var]) for n in tqdm(range(num_pts)))
ypred_coarse = np.array([item[0] for item in results])
ystd_coarse = np.array([item[1] for item in results])
# Save coarse results as point cloud
# Save to vtk point clouds
point_cloud['ypred'+str(var)] = ypred_coarse
point_cloud['ystd'+str(var)] = ystd_coarse
# Get prediction on remaining (fine) grid points via Shur complement
print('Rebuilding to fine points...')
ypred_fine, ystd_fine = rebuild_fine(ypred_coarse,ystd_coarse,Dmean[idx_coarse,var],Dmean[idx_fine,var], Sigma[:,:,j])
# Combine coarse and fine grid points
ypred = np.empty(N)
ypred[idx_fine] = ypred_fine
ypred[idx_coarse] = ypred_coarse
ystd = np.empty(N)
ystd[idx_fine] = ystd_fine
ystd[idx_coarse] = ystd_coarse
ystd[ystd==99] = 1.0
# Store final prediction in vtk
gridcopy['pred_var'+str(var)] = ypred
gridcopy['std_var'+str(var)] = ystd
# Resample back on to design grid
print('Resampling to original mesh...')
gridcopy = gridcopy.sample(designgrid)
# Save vtk
print('Saving vtk...')
savefile = os.path.join(saveloc,'flow_post_design_%04d.vtk' % i)
gridcopy.save(savefile)
savefile = os.path.join(saveloc,'points_post_design_%04d.vtk' % i)
point_cloud.save(savefile)
plt.show()