-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreaders.py
116 lines (99 loc) · 4.36 KB
/
readers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
import os
#import natsort
from io import StringIO
import scipy.io as sio
def read_trajs_from_xyz(fpaths_list, code_type, natom):
# main function for reading trajectories specified in every subdir listed
# in the parent dir fpaths
# filters all possible subdirs down to suitable trajectory ones on the
# basis they contain the file traj_file and it is readable
# by default it currently reads only SHARC files but can be easily extended
# returns a np array of shape [natom, 3, ntraj, nts] where nts = maximum
# run time over all trajs read in
# trajs that crash before nts have subsequent elements set to NaN
# also returns a list of each trajs run time and the total num. of trajs
if code_type == 'sharc':
traj_file = 'output.xyz'
else:
print("Only SHARC trajectories currently available.")
dirs = []
for fpaths in fpaths_list:
dirs.extend([(fpaths + '/' + d) for d in os.listdir(fpaths) if os.path.isdir(fpaths + '/' + d)])
#dirs = natsort.natsorted(dirs)
dirs = [d for d in dirs if os.path.isfile(d + '/' + traj_file)]
if len(dirs) < 1:
raise Exception("No directories containing trajectories found. Please check cwd.")
fpaths = [(f + '/' + traj_file) for f in dirs]
print("Attempting to read in %s trajectories from xyz files" % len(fpaths))
all_trajs, run_times, ntraj = load_trajs_xyz(fpaths, natom, 'sharc') # currently default is SHARC only
trajs = format_traj_array(all_trajs, run_times, natom, ntraj)
return trajs, run_times
def format_traj_array(traj_list, run_times, natom, ntraj):
# formats nested list of trajectories into a np array of dimensions (natom,
# 3, ntraj, nts)
# trajs might not all run over time frame, therefore nts is taken to be the
# longest
# run time of all trajectories together. For trajectories that crash before
# the int nts,
# the subsequent array indexes are filled with NaN
max_run_time = max(run_times)
trajs = np.full((natom, 3, ntraj, max_run_time), np.nan)
for i in range(ntraj):
run_time = run_times[i]
trajs[:, :, i, 0:run_time] = np.swapaxes(np.swapaxes(np.array(traj_list[i]), 0, 2), 0, 1) # must do some reshaping
return trajs
def load_trajs_xyz(fpaths, natom, traj_type='sharc'):
# iterates over file paths for all trajectories in subdirs and returns
# all suitable trajs in a nested list along with the run time for each traj
# simulation and the total traj number
# currently only works for SHARC formatted trajectory files (see call to
# read_sharc() func)
# can further extend to other quantum codes by adding a suitable function
# that returns a
# list of trajs and their corrosponding run times
run_times = []
all_trajs = []
ntraj = 0
for idx, fpath in enumerate(fpaths):
with open(fpath, 'r') as trj_file:
if traj_type.lower() == 'sharc':
traj_coords, traj_nts = read_sharc(trj_file, natom)
else:
pass # extend to other quantum md code readers if needed - write another function like read_sharc to call
if traj_nts > 1:
ntraj += 1
run_times.append(traj_nts)
all_trajs.append(traj_coords)
else:
print("%s contains less than one time step - excluding from analysis." % fpath)
return all_trajs, run_times, ntraj
def read_sharc(trj_file, natom):
# reads SHARC style trajectory files in xyz files
# returns a list of a single trajectories coordinates and its run time
count = 0
na = 0
nts = 0
geom_all = []
geom_temp = []
for idx, line in enumerate(trj_file):
idx += 1
count += 1
if count > 2:
atom_coord = np.genfromtxt(StringIO(line))[1:4].tolist()
geom_temp.append(atom_coord)
na += 1
if na == natom:
na = 0
count = 0
nts += 1
geom_all.append(geom_temp)
geom_temp = []
return geom_all, nts
if __name__== "__main__":
natom = 3
parent_dir = '/Users/kyleacheson/PycharmProjects/SHARC_NMA/'
trajs_dirs = ['Singlet_1', 'Singlet_2']
fpaths = [parent_dir + tdir for i, tdir in enumerate(trajs_dirs)]
trajs = read_trajs_from_xyz(fpaths, 'sharc', natom)
#sio.savemat('trajectories_test.mat', {'Q': trajs})