-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadHBN.py
83 lines (69 loc) · 3.31 KB
/
readHBN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from struct import unpack
from numpy import fromfile, reshape, datetime64, timedelta64, empty
from pandas import DataFrame
from datetime import datetime
from collections import defaultdict, Counter
tcodes = {1:'_Minutely', 2:'_Hourly', 3:'_Daily', 4:'_Monthly', 5:'_Yearly'}
def read_HBN(hbnfile, hdffile):
''' Extracts all data in hbnfile and saves to HDF5 file'''
data = fromfile(hbnfile, 'B')
if data[0] != 0xFD:
print('BAD HBN FILE - must start with magic number 0xFD')
return
# Build layout maps of the file's contents
map = {}
mapf = defaultdict(list)
target = {'P', 'I', 'R'} # (P)erland, (I)mplnd, and (R)chres
index = 1 # already used first byte (magic number)
while index < len(data):
if chr(data[index+8]) not in target:
index -= 1 # works, but why???
reclen, rectype, optype, lue, section = unpack('2I8sI8s', data[index : index+28])
reclen = reclen >> 2
optype = optype.decode().strip() # Python3 converts to bytearray not string
section = section.decode().strip()
if rectype:
level = unpack('I', data[index+32 : index+36])[0] # 28 + 4
mapf[(optype, lue, section, level)].append(index)
else:
map[(optype, lue, section)] = (index, reclen)
index += reclen + 6
# For each mapf key, use map to extract floating point values. Save to HDF5 file
for optype, lue, section, level in mapf.keys():
index, reclen = map[(optype, lue, section)]
# field names are concatenation of length, string pairs. Use trick to ignore lengths.
fields = ''.join([' ' if chr(y) < 'A' else chr(y) for y in data[index+28:index+reclen+4]]).split() # 4 ??? why
nfields = len(fields)
dindex = mapf[(optype, lue, section, level)]
d = empty(len(dindex) * nfields) # preallocate for speed, just like MATLAB
t = empty(len(dindex)).astype('datetime64[ns]')
findex = 0
tindex = 0
for di in dindex:
yr, mo, dy, hr, mn = unpack('5I', data[di+36 : di+56]) # 28 + 8
t[tindex] = todatetime64(yr, mo, dy, hr)
tindex += 1
d[findex:findex+nfields] = unpack(f'{nfields}f', data[di+56:di+56+nfields*4])
findex += nfields
# fix duplicate names in DataFrame (from nexits for example)
cnts = Counter(fields)
copy = cnts.copy()
temp = []
for x in fields[::-1]:
count = cnts[x]
if copy[x] == 1:
temp.append(x)
else:
temp.append(x+str(count))
cnts[x] -= 1
d = reshape(d, (len(dindex), nfields))
df = DataFrame(d, index=t, columns=reversed(temp)).astype('float32')
df.to_hdf(hdffile, f'/RESULTS{tcodes[level]}/{optype}/{section}/{optype[0]}{lue:03d}')
del d,t
return
def todatetime64(yr=1900, mo=1, dy=1, hr=0):
'''returns datatime64 with fix for midnight given yr,mo,dy,hr'''
if hr == 24:
return datetime64(datetime(yr,mo,dy,23), 'm') + timedelta64(1,'h')
else:
return datetime64(datetime(yr,mo,dy,hr), 'm')