-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdrift_scan_auto_corr_frequency.py
137 lines (112 loc) · 6.44 KB
/
drift_scan_auto_corr_frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# drift_scan_auto_corr: visualise the auto correlations of the drift scans and
# extract auto correlation data from measurement sets.
# Helga Denes 05/06/2019
# Edited by K.M.Hess ([email protected])
# Edited by H. Denes (26/08/2019, 19/03/2020, 22/04/2020)
__author__ = "Helga Denes"
__date__ = "$19-march-2020 16:00:00$"
__version__ = "0.3"
import casacore.tables as pt
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
def extract_data(antennas, beams, chan_range, chan_bins, exclude, data_location, task_id):
"""
Extract drift scan data from MS files and put them into a pandas data frame.
Data is extracted for each beam in 10 frequency bins and 4 different polarisations.
Data is extracted averaged for all antennas and per antenna.
Several parameters can be specified:
antennas: A list with all the antennas to read out. (Apertif has 12) [0, 1, 2, 3, ...]
bemas: A list with all the beams to read out. (Apertif has 40) [0, 1, 2, 3, ...]
chan_range: rfi free channel range. A list with 2 numbers [x1,x2]
chan_bins: A list of channel ranges to bin the data in frequency. By default this function only uses channels between 14000 and 24500 because of strong RFI. [[x1,x2],[x1,x2],...]
exclude: a string with instructions for taql to exclude certain data, e.g. baselines or antennas.
data_location: location to copy the data to as a string e.g. '/tank/apertif/driftscans'
task_id: or observation id as a string
"""
chan_range_1 = str(chan_range[0])
chan_range_2 = str(chan_range[1])
df = pd.DataFrame()
for j in beams:
t = pt.taql(
'select TIME, gmeans(abs(DATA[{0}:{1},0])) as XXPOL, gmeans(abs(DATA[{0}:{1},1])) as XYPOL, gmeans(abs(DATA[{0}:{1},2])) as YXPOL, gmeans(abs(DATA[{0}:{1},3])) as YYPOL from {2}{3}/WSRTA{3}_B0{4:02}.MS where ANTENNA1==ANTENNA2 {5} GROUP BY TIME'.format(chan_range[0], chan_range[1], data_location, task_id, j, exclude))
print('BEAM: {}'.format(j))
times = np.array(pt.tablecolumn(t, 'TIME'))
if j==beams[0]:
time_steps = len(times)
print('time steps in first beam: ', time_steps)
df['time'] = times[:time_steps-3]
for k, chan in enumerate(chan_bins):
data_beam_xx = np.mean(np.array(pt.tablecolumn(t, 'XXPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_xx = np.reshape(data_beam_xx, len(data_beam_xx))
data_beam_xy = np.mean(np.array(pt.tablecolumn(t, 'XYPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_xy = np.reshape(data_beam_xy, len(data_beam_xy))
data_beam_yx = np.mean(np.array(pt.tablecolumn(t, 'YXPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_yx = np.reshape(data_beam_yx, len(data_beam_yx))
data_beam_yy = np.mean(np.array(pt.tablecolumn(t, 'YYPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_yy = np.reshape(data_beam_yy, len(data_beam_yy))
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_xx'] = auto_corr_xx[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_xy'] = auto_corr_xy[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_yx'] = auto_corr_yx[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_yy'] = auto_corr_yy[:time_steps-3]
for i in antennas:
t_ant = pt.taql(
'select TIME, abs(DATA[{0}:{1},0]) as XXPOL, abs(DATA[{0}:{1},1]) as XYPOL, abs(DATA[{0}:{1},2]) as YXPOL, abs(DATA[{0}:{1},3]) as YYPOL from {2}{3}/WSRTA{3}_B0{4:02}.MS where ANTENNA1==ANTENNA2 AND ANTENNA1={5} GROUP BY TIME'.format(chan_range[0], chan_range[1], data_location, task_id, j, i))
print('ANT: {}'.format(i))
for k,chan in enumerate(chan_bins):
data_ant_xx = np.mean(np.array(pt.tablecolumn(t_ant, 'XXPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_xx_ant = np.reshape(data_ant_xx, len(data_ant_xx))
data_ant_xy = np.mean(np.array(pt.tablecolumn(t_ant, 'XYPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_xy_ant = np.reshape(data_ant_xy, len(data_ant_xy))
data_ant_yx = np.mean(np.array(pt.tablecolumn(t_ant, 'YXPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_yx_ant = np.reshape(data_ant_yx, len(data_ant_yx))
data_ant_yy = np.mean(np.array(pt.tablecolumn(t_ant, 'YYPOL'))[:, chan[0]:chan[1], :], axis=1)
auto_corr_yy_ant = np.reshape(data_ant_yy, len(data_ant_yy))
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_xx_antenna_' + str(i)] = auto_corr_xx_ant[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_xy_antenna_' + str(i)] = auto_corr_xy_ant[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_yx_antenna_' + str(i)] = auto_corr_yx_ant[:time_steps-3]
df['auto_corr_beam_' + str(j) + '_freq_' + str(k) + '_yy_antenna_' + str(i)] = auto_corr_yy_ant[:time_steps-3]
return df
def data_to_csv(data_location, task_id, chan_range, bin_num):
"""
- Extract data on the observed field into a csv file. This is later used to calculate coordinates.
- Extract auto correlation data with the "extract_data" function.
- Write the extracted data into csv files.
data_location: string
task_id: string
chan_range: rfi free channel range. A list with 2 numbers [x1,x2]
bin_num: number of bins (integer)
"""
print('test', data_location)
if data_location[-1] != '/':
data_location += '/'
output_path = data_location + task_id +'/'
if not os.path.exists(output_path):
os.mkdir(output_path)
# First read HA & Dec from MS file header and save into a csv file for later use.
# These are the best calculated RA/Dec positions of the centers of the beams.
beams = range(0, 40)
hadec = pd.DataFrame()
ha = np.zeros(40)
dec = np.zeros(40)
for j in beams:
field_table = pt.table('{}{}/WSRTA{}_B0{:02}.MS::FIELD'.format(data_location, task_id, task_id, j))
(ha[j], dec[j]) = field_table.getcol("DELAY_DIR")[0, 0]
hadec['ha'] = ha
hadec['dec'] = dec
hadec.to_csv(str(output_path) + str(task_id) + '_hadec.csv')
# Extract data and export data into a csv file
# count antennas in the MS file:
t_name = pt.taql('select NAME from {0}{1}/WSRTA{1}_B001.MS::ANTENNA'.format(data_location, task_id))
ant_names=t_name.getcol("NAME")
antennas = range(len(ant_names))
exclude = ''
# exclude = 'AND ANTENNA1!= 11 AND ANTENNA2!=11 AND ANTENNA1!= 10 AND ANTENNA2!=10'
total_chan_num = chan_range[1] - chan_range[0]
bin_size = int(total_chan_num / bin_num)
chan_bins = []
for i in range(0, int(chan_range[1]-bin_size), bin_size):
chan_bins.append([i, i + 1050])
df_1 = extract_data(antennas, beams, chan_range, chan_bins, exclude, data_location, task_id)
df_1.to_csv(str(output_path) + str(task_id) + '_exported_data_frequency_split.csv')