-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRFID_aggregate.5.2.py
340 lines (300 loc) · 27.2 KB
/
RFID_aggregate.5.2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#method spreadsheet_update() creates/updates spreadsheets for multiple fishids at once, given a tagmanager RFID spreadsheet filename
#output sheet names will be 'fishid_all.xlsx' (filepath of choice)- will update existing spreadsheets and make new ones for fishids with no spreadsheet
#if length of a spreadsheet for one fishid exceeds 1 million (near limit that pandas or excel can handle), program creates a new spreadsheet (_most_recent)
#the older dates are assigned to a spreadsheet with the filename 'fishid_dates_firstscandate_lastscandate.xlsx'
#method aggregate_raster() creates raster plots of RFID readings for given fishids over a given date range
#only graphs RFID readings between 08:00 and 20:00
#method aggregate_barplot() creates bar plots of RFID readings for given fishids over a given date range
#only graphs RFID readings between 08:00 and 20:00
#TODO decide whether AntennaIDs can be kept at 1,3, and 5 or need to be automated for whatever AntennaIDs are in use- discuss with Karen
#TODO adjust space between labels/axes, margin space, graph distance, etc for better-looking graphs
import numpy as np
import pandas as pd
from os import path #imported to test if file exists
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import matplotlib.axes as axs
import seaborn as sns
import xlrd
import sys
class FishAggregator: #create class
def __init__(self, target_fishids, filenames, filepath_in, filepath_out, extra_filenames, target_start, target_end): #define class variables
self.target_fishids = target_fishids
self.filenames = filenames
self.filepath_in = filepath_in
self.filepath_out = filepath_out
self.target_start, self.target_end = target_start, target_end
self.extra_filenames = extra_filenames
def build_dict_all(self): #this function reads in every new file, and compiles them into a dictionary of dataframes, so that they can
#be used between fishids without re-loading in each time
fishdict = {} #create dictionary
for i in self.filenames: #for every filename
pathname = self.filepath_in + i + '.xlsx' #combines file and path for reading in
new_file = pd.read_excel(pathname) #loads in new excel as a dataframe
fishdict[i] = new_file #adds the dataframe to the fishdictionary under the index of its filename
print('READING IN FILE: ' + i + '...')
print('FISHDICTIONARY LOADED')
return(fishdict)
def fishid_from_dict(self): #compiles dataframe of all RFID readings for a given fishid, across all dates
fishlist = pd.DataFrame() #create empty dataframe to hold the fishid readins from each individual dataframe
print('COMPILING SPREADSHEET FOR FISHID: ' + self.fishid + '...')
for i in self.fishdict: #for each dataframe in the dictionary of dataframes (containing all fish, all date ranges)
df = self.fishdict[i] #df is the dataframe at that index
df.columns = df.columns.str.replace(" ","") #take spaces out of column names
df_fishid = df[(df['HEXTagID'] == self.fishid)] #takes only the RFID readings for the fishid of choice
fishlist = fishlist.append(df_fishid) #add dataframe to overall dataframe of all RFID readings for fishid
for i in ('DownloadTime', 'DownloadDate', 'IsDuplicate'):
if i in fishlist:
fishlist = fishlist.drop(columns= i) #drops columns to allow duplicate RFID readings to be recognized
print(fishlist.head(2))
return(fishlist)
def check_and_export(self): #this function checks whether the fishid already has a saved spreadsheet
pathname = self.filepath_out + self.fishid + '_most_recent.xlsx' #combines name and filepath to test if there is already a sheet for the fishid
#TODO- this doesn't work if I have to split a spreadsheet that exceeds maximum row #- figure out how to accomodate
print('CHECKING FOR EXISTING FILENAMES: ' + pathname + '...')
if path.exists(pathname): #if there is already a spreadsheet with that filename...
previous_file = pd.read_excel(pathname) #read in that spreadsheet as a dataframe
print('THERE IS ALREADY A SHEET FOR FISHID: ' + self.fishid)
output_file = previous_file.append(self.fishlist) #adds new list to end of existing list
output_file = output_file.drop_duplicates() #removes any overlap readings from the old and new file.
add_length = len(output_file.index) - len(previous_file.index) #calculate how many RFID rows are being added to spreadsheet
else: #if there is no previous spreadsheet...
print('THERE IS NO PREVIOUS SHEET FOR FISHID: ' + self.fishid)
output_file = self.fishlist #no previous sheet exists, so the sheet in python becomes the output file
output_file = output_file.drop_duplicates() #removes any overlap readings from the old and new file.
add_length = len(output_file.index) #calculate how many RFID rows are being added in new spreadsheet
#THIS SECTION APPEARS TO BE CAUSING ISSUES- SOMEWHERE BETWEEN HERE AND print(output_file.head(3)), A COLUMN WITH THE NAME 69741 IS BEING ADDED
output_file['ScanDateDate'] = (pd.to_datetime(output_file.ScanDate, format = '%m/%d/%Y')).dt.date #convert ScanDate values to date object (so that they can be sorted correctly between years)
#output_file['ScanDateDate'] = output_file['ScanDateDateTime'].dt.date
output_file = output_file.sort_values(by=['ScanDateDate', 'ScanTime']) #sort sheet by ScanDate and then ScanTime
output_file = output_file.drop(columns=['ScanDateDate'])
print('ADDING ' + str(add_length) + ' RFID READINGS TO SPREADSHEET FOR ' + self.fishid + '...')
self.fishid_fishdict[self.fishid] = output_file #add the dataframe to dataframe dictionary of fishids (1 entry per fishid, over all dates)
print('SAMPLE OUTPUT FILE: ')
print(output_file.head(3))
#TODO WHEN USING THIS (BELOW) PROCESS THE 'most_recent' SPREADSHEET ENDS UP MISSING COLUMN FOR ANTENNAID, AND HAS OTHER LAYOUT ISSUES
#HOWEVER, THE HEAD OF BOTH SPREADSHEETS (older and 'most_recent') PRINTED IN PYTHON ARE IDENTICAL (SCREENSHOT IN split_fishids FOLDER),
#AND THE OLDER SPREADSHEET IS NORMAL UPON EXPORTING
#FIGURE OUT WHAT ACCOUNTS FOR THIS DIFFERENCE!!!!!
if len(output_file.index) > 1000000: #if the compiled spreadsheet for that fishid exceeds 1 million columns (near limit that python/excel can contain)
print('SPREADSHEET FOR FISHID: ', self.fishid, ' IS IN DANGER OF EXCEEDING MAXIMUM ROW COUNT- SPLITTING...')
earlier_output_file, later_output_file = output_file.iloc[:1000000, :], output_file.iloc[1000001:,:] #earlier output file contains rows 1-1000000
print('LATER OUTPUT FILE HEAD: ')
print(later_output_file.head(10))
print('LATER OUTPUT FILE TAIL: ')
print(later_output_file.tail(10))
print('EARLIER OUTPUT FILE HEAD: ')
print(earlier_output_file.head(10))
print('EARLIER OUTPUT FILE TAIL: ')
print(earlier_output_file.tail(10))
first_date, last_date = str(earlier_output_file['ScanDate'].iloc[1]).replace('/','.'), str(earlier_output_file['ScanDate'].iloc[-1]).replace('/','.')
earlier_output_filename = self.fishid + '_dates_' + first_date + '-' + last_date + '.xlsx'
#^set output filename for the earlier file of the split- it will be named fishid_dates_startdate_enddate.xlsx
earlier_output_file_pathname = self.filepath_out + earlier_output_filename #output pathname for earlier spreadsheet
earlier_output_file.to_excel(earlier_output_file_pathname, index= False) #export to folder
print(earlier_output_filename + ' SPREADSHEET SAVED') #appends to existing dataframe (or not), and exports to folder by running check_and_export
later_output_file.to_excel(pathname, index= False) #the later of the files will be named 'most_recent' like normal
print(self.fishid + ' MOST RECENT SPREADSHEET SAVED')
else:
output_file.to_excel(pathname, index= False) #export to folder
print(self.fishid + ' MOST RECENT SPREADSHEET SAVED') #appends to existing dataframe (or not), and exports to folder by running check_and_export
def update_spreadsheets(self): #updates spreadsheets with new RFID readings for all fishids in list
self.fishdict = self.build_dict_all() #gets aggregate dataframe of inputs for fishid by running fishname_cycler
self.fishid_fishdict = {} #creates dataframe dictionary to hold dataframes of each fishid, so that they don't have to be re-read in for graphing
for i in self.target_fishids: #for every fishid in the list...
self.fishid = i #that fishid becomes self.fishid
self.fishlist = self.fishid_from_dict() #compiles spreadsheet of all new reads for that fishid
self.check_and_export() #updates (or creates new) spreadsheet for that fishid
print('SPREADSHEETS UPDATED FOR FISHIDS: ' + str(self.target_fishids))
def convert_date(self, input_date): #function converts a string of text into a readable date format (requires mm.dd.yyyy input)
input_date = str(input_date)
output_date = datetime.strptime(input_date, '%m/%d/%Y') #converts input date (string) into a datetime function
return(output_date)
def aggregate_fishids_by_date(self): #combines RFID readings for fishids of your choice, between dates of your choice, into one dataframe-
#sets this dataframe as self.fishids_aggregated_by_date, callable in other methods
self.fishid_fishdict = {} #create empty dictionary to hold spreadsheets for all target fishids
for i in self.target_fishids: #for each target fishid...
pathname = self.filepath_out + i + '_most_recent.xlsx' #combines name and filepath to test if there is already a sheet for the fishid
#print('CHECKING FOR
# EXISTING FILENAMES: ' + pathname + '...')
if path.exists(pathname): #if there is already a spreadsheet with that filename...
previous_file = pd.read_excel(pathname) #read in that spreadsheet as a dataframe
print('THERE IS ALREADY A SHEET FOR FISHID: ' + i)
self.fishid_fishdict[i] = previous_file #insert the spreadsheet for that fishid into the dataframe dictionary
else:
print('THERE IS NO SHEET FOR FISHID: ' + i)
self.fishlist[i] = pd.DataFrame() #insert empty dataframe as place in dictionary- TODO does this work when there are no reads?
if len(self.extra_filenames) > 0:
for j in self.extra_filenames: #for each extra file (if those are needed given the date range and fishids in question)...
pathname = self.filepath_out + j + '.xlsx' #create pathname... (no need to add any words to the actual filename)
if path.exists(pathname) == False: #the file should already exist if it is being specifically written in- if it does not...
print('ERROR: THE PATHNAME: ' + pathname + 'DOES NOT EXIST- RECHECK THAT FILENAME WAS ENTERED CORRECTLY') #recheck that filename and filepath exist
else: #if the extra file exists (as it should if it has been entered/called for)...
print('READING IN EXTRA FILE: ' + j)
previous_file = pd.read_excel(pathname) #read in the extra spreadsheet file
self.fishid_fishdict[j] = previous_file #add the extra spreadsheet file to the dictionary of spreadsheets for each of the selected fishids accross the daterange
#self.fishid_fishdict will then contain spreadsheets for every fishid (including extra, older files if needed based on date range)
print('FISHID FISHDICTIONARY: ')
print(self.fishid_fishdict)
df_for_graphing = pd.DataFrame() #create empty dataframe to hold all fishids for the date range
target_start, target_end = self.convert_date(self.target_start), self.convert_date(self.target_end) #convert start and end dates into datetime objects
fishids_and_extra_files = self.target_fishids + self.extra_filenames #all initial fishids, and the extra files for fishids whose RFID read spreadsheets exceeded one million rows
for i in fishids_and_extra_files: #for each target fishid/file...
fishid_fishlist = self.fishid_fishdict[i] #pull out the dataframe in the dictionary of dataframes for that fishid
print('CONVERTING AND CULLING TIMES FOR FISHID: ', i)
culled_fishlist = fishid_fishlist[(fishid_fishlist['ScanDate'].apply(self.convert_date) >= target_start) & (fishid_fishlist['ScanDate'].apply(self.convert_date) <= target_end)]
print('CULLED FISHLIST HEAD FOR FISHID: ' + str(i) + ': ')
print(culled_fishlist.head(10))
#^for that fishid dataframe, pulls out only the RFID readings for the daterange between target_start and target_end
df_for_graphing = df_for_graphing.append(culled_fishlist) #add it to the dataframe of all fishids for that date range
df_for_graphing = df_for_graphing.sort_values(by=['ScanDate', 'ScanTime'])
self.fishids_aggregated_by_date = df_for_graphing #self.fishids_aggregated_by_date can now be used in different methods within the class
print('AGGREGATED: HEAD: ')
print(df_for_graphing.head(10))
print('AGGREGATED: TAIL: ')
print(df_for_graphing.tail(10))
print('GENERATED COMBINED DATAFRAME FOR FISHIDS:' + str(self.target_fishids) + ' BETWEEN DATES: ' + self.target_start + ' - ' + self.target_end) #prints that dataframe for fishids and dates has been created
def convert_time(self, input_time): #converts a value from a string into a datetime (time) object
output_time = datetime.strptime(input_time, '%H:%M:%S.%f')
return(output_time)
def select_hours(self, input_sheet, start_time, end_time): #culls a sheet to only a certain range of hours
input_sheet['TimeConverted'] = input_sheet['ScanTime'].apply(self.convert_time) #make a new column of datetime time objects for scantime
input_sheet = input_sheet[(input_sheet['TimeConverted'] <= self.convert_time(end_time)) & (input_sheet['TimeConverted'] >= self.convert_time(start_time))] #cull spreadsheet to just readings between target times
return(input_sheet)
def generate_raster(self): #given a set list of target fishids readings between a start date and end date,
#this function generates a figure containing one raster plot for each AntennaID, with all target fish, between those dates
#this function culls so that only times between 08:00 and 20:00 are shown
#instructions for including AntennaID5 are commented out
df = self.fishids_aggregated_by_date #load dataframe of all fishids for the chosen date range
df = self.convert_to_datetime(df) #add column of datetime objects for ScanDateTime
df = self.select_hours(df, '08:00:00.00', '20:00:00.00') #culls dataframe to just readings between 08:00 and 20:00 (does not include 20:00)
df_antennaid_1 = df[df.AntennaID == 1] #sort all RFId reads from AntennaID1 into one dataframe
df_antennaid_3 = df[df.AntennaID == 3] #sort all RFId reads from AntennaID3 into one dataframe
#df_antennaid_5 = df[df.AntennaID == 5]
print('SHEETS SORTED BY ANTENNA ID- RFID READS FOR ANTENNAID 1: ' + str(len(df_antennaid_1.index)) + ' RFID READS FOR ANTENNAID 3: ' + str(len(df_antennaid_3.index)))
#fig, (ax1, ax2, ax3) = plt.subplots(3)
fig, (ax1, ax2) = plt.subplots(2, figsize=(18,7)) #makes 2 stacked subplots in one image, image size is 18x8 inches
fig.suptitle('RFID readings for fishids: ' + str(self.target_fishids) + ' between dates: ' + self.target_start + ' - ' + self.target_end) #creates title for figure, noting fishid and dates
sns.scatterplot(x= df_antennaid_1.ScanDateTime, y= df_antennaid_1.HEXTagID, data= df_antennaid_1, marker = '|', hue = df_antennaid_1.HEXTagID, s = 100, ax= ax1) #creates raster plot for AntennaID1
sns.scatterplot(x= df_antennaid_3.ScanDateTime, y= df_antennaid_3.HEXTagID, data= df_antennaid_3, marker = '|', hue = df_antennaid_3.HEXTagID, s = 100, ax= ax2) #creates raster plot for AntennaID3
ax1.title.set_text('AntennaID 1') #subtitle
ax2.title.set_text('AntennaID 3') #subtitle
#sns.scatterplot(x= df_antennaid_5.ScanDateTime, y= df_antennaid_5.HEXTagID, data= df_antennaid_5, marker = '|', hue = df_antennaid_5.HEXTagID, s = 100, ax= ax3)
plt.subplots_adjust(bottom=0.2, hspace=0.5) #creates space at bottom of figure and between plots
plotname = str(self.target_fishids) + '_' + str(self.target_start).replace('/','.') + '-' + str(self.target_end).replace('/','.') + '_raster.png'
pathname = '/Users/johnmerlo-coyne/Desktop/Juntti Lab Stuff/RFID Project/Figures/' + plotname
plt.savefig(pathname)
plt.show() #display plots- TODO save plots as image file
def convert_to_datetime(self, df): #for a given spreadsheet, combines ScanDate and ScanTime columns into a ScanDateTime column, and makes the rows datetime objects
df['DateTimeRaw'] = df['ScanDate'] + ' ' + df['ScanTime'] #combine scandate and scantime as strings
df['ScanDateTime'] = pd.to_datetime(df.DateTimeRaw, format = '%m/%d/%Y %H:%M:%S.%f') #convert datetimeraw column into a datetime object that can be used
return(df)
def create_bargraph(self): #from a dataframe of RFID readings for a group of target fishids between a target daterange, produces a bargraph of RFID counts binned by hour
print('CREATING BARGRAPH FOR FISHIDS: ' + str(self.target_fishids) + ' FOR TARGET DATES: ' + target_start + ' - ' + target_end + '...')
df = self.fishids_aggregated_by_date #reads in the aggregated df of target fishids in the target daterange
df = self.convert_to_datetime(df) #creates a column of datetime objects for the RFID reading (df['ScanDateTime'])
df['ScanDateTimeShort'] = df['ScanDateTime'].values.astype('datetime64[h]') #cuts the datetime column to just month, day and hour
start_date, raw_end_date = self.convert_date(self.target_start), self.convert_date(self.target_end)
end_date = raw_end_date + timedelta(days=1)
possible = pd.date_range(start_date, end_date, freq='h') #creates list of all datetimes between start dayhour and end dayhour
possible = possible[:-1] #removes the first hour of end date (output will not include any hour in end date)
possible_culled = [] #create empty list to hold possible all hours (including those with 0 entries) that will be in bar graph
for i in possible: #adds hours between 08:00 and 20:00 to a new series, then makes that series into 'possible'
if ((i.hour <= 19) and (i.hour >= 8)): #if the hour is between 08:00 and 20:00...
possible_culled.append(i) #add that hour to the list of hours to be included in the binned graph
df_tograph = pd.DataFrame(possible_culled, columns= ['possible_culled']) #create empty dataframe- first column is list of all possible datehours to be graphed, column name is possible_culled
#^will contain columns with the RFID frequency for each datehour in possible_culled for each target fishid
for i in self.target_fishids: #for each fishid in the list of target fishids...
#TODO create a df holding only readings for that fishid
df_target_fishid = df[df['HEXTagID'] == i] #creates dataframe holding only RFID readings for that individual target fishid
#TODO for each fishid df, get list of frequencies for each datehour in possible[], and add it as a column as df_tograph[fishid]
df_tograph[i] = self.get_fishid_occurences_for_datehour_range(df_target_fishid, possible_culled) #adds that fishid's RFID frequency list (for each datehour in possible_culled) as a column to the tograph dataframe
#TODO ^ will df_tograph[i] (meant to name the column after that fishid string) work?
#TODO once df of RFID counts for each fishid for possible_culled is created, make graph
#fig, ax = plt.subplots(1, figsize=(18,7)) #makes 2 stacked subplots in one image, image size is 18x8 inches
#x_vals = df_tograph['possible_culled'] #x values are all the datehours in possible_culled
#y_vals = df_tograph[self.target_fishids] #y values are all the frequencies of RFID readings for the possible_culled datehours, for each fish
fig, ax = plt.subplots(figsize=(18,7))
df_tograph.plot(x='possible_culled', y=self.target_fishids, kind='bar', width= self.barwidth, ax=ax)
#ax.set_xticklabels(labels= df_tograph.possible_culled, rotation=55, ha='right')
plt.title('RFID readings for fishids: ' + str(self.target_fishids) + ' between dates: ' + self.target_start + ' - ' + self.target_end) #creates title for figure, noting fishid and dates
plt.xlabel('Date and Time (H)')
plt.ylabel('RFID Readings')
plt.tight_layout()
plt.subplots_adjust(left= 0.05)
plotname = str(self.target_fishids) + '_' + str(self.target_start).replace('/','.') + '-' + str(self.target_end).replace('/','.') + '_bar.png'
pathname = '/Users/johnmerlo-coyne/Desktop/Juntti Lab Stuff/RFID Project/Figures/' + plotname
plt.savefig(pathname)
plt.show()
def get_fishid_occurences_for_datehour_range(self, fishid_df, possible_datehours): #given a df of RFID readings for only one target fishid, and a list of datehours to be graphed, returns a list of that fishid's RFID counts for each datehour to be graphed
freqlist = [] #empty list to hold the RFID frequencies for each datehour (including 0 if there are none for the datehour)
for i in possible_datehours: #for each datehour in the list of all datehours between the start and end dates and between 08:00 and 20:00...
if (i in fishid_df['ScanDateTimeShort'].values): #if the df for the input fishid contains that datehour...
freqlist.append(fishid_df['ScanDateTimeShort'].value_counts()[i]) #return the number of RFID readings for that datehour
else: #if there are no RFID readings for that datehour...
freqlist.append(0) #add 0 to the list of frequencies
return(freqlist) #return the list of frequencies,
#^which will line up with list of frequencies for all other fishids because all are structure around the list of possible datehours
#set variables
all_fishids = ['3D6.1D59B07986', '3D6.1D59B07981', '3D6.1D59B0796C','3D6.1D59B0793D','3D6.1D599FDD2C', '3D6.1D59B07978',
'3D6.1D599FDD53','3D6.1D599FDD2A','3D6.1D599FDD3F','3D6.1D599FDD07','3D6.1D599FDD27','3D6.1D599FDD31']
#^this list doesn't get used in the script- just contains all the fishids for easy copying and pasting
all_tagmanager_files = ['09-25-2020_tagmanager','10-02-2020_tagmanager', '10-08-2020_tagmanager', '10-23-2020_tagmanager', '11-06-2020_tagmanager',
'12-4-2020_3', '12-18-2020_tagmanager', '01-25-2021_tagmanager', '02082021_tagmanager_ALL', '02-11-2021_tagmanager_all']
#^this list includes the file names of all tagmanager files to date- only use whole thing when creating all new fishids from scratch
filenames = ['01-25-2021_tagmanager']
#must be a list, even when only reading in one file
#^list of new spreadsheet filenames to be added (updating spreadsheets)- DO NOT include '.xlsx' at end- program adds that later
filepath_in = '/Users/johnmerlo-coyne/Desktop/Juntti Lab Stuff/RFID Project/tagmanager/'
#^shared filepath of files to be read in (assumes they are in same folder) (updating spreadsheets)
filepath_out = '/Users/johnmerlo-coyne/Desktop/Juntti Lab Stuff/RFID Project/Split FishIDs/'
#^filepath of destination for aggregate sheet (updating spreadsheets) (plotting)
target_start, target_end = '10/11/2020', '10/18/2020' #DATE FORMAT: 'mm.dd.yyyy' for graphing, etc- what dates should the program act on (plotting)
input_target_fishids = ['3D6.1D59B07978'] #list of fishids to be targeted (plotting)
#DON'T FORGET TO INPUT ANY EXTRA (NOT 'MOST RECENT') FILES FOR ANY NEEDED FISHIDS
input_extra_filenames = [] #list of any fishid filenames that are not 'most recent'- if there is a fishid spreadsheet that was cut off at 1 million rows and renamed,
#and that fishid is one of the targets, include it here- do not include '.xlsx'
fishobject = FishAggregator(input_target_fishids, all_tagmanager_files, filepath_in, filepath_out, input_extra_filenames, target_start, target_end) #sets variables for class
def spreadsheet_update(): #updates RFID spreadsheets for all fishids
#^be sure to set filenames, filepath_in and filepath_out to what you need them to be
fishobject.target_fishids = ['3D6.1D59B07986'] #sets target fishids to all fishids
fishobject.update_spreadsheets() #updates spreadsheets
print('SPREADSHEETS UPDATED FOR ' + str(fishobject.target_fishids) + ' USING TAGMANAGER SHEETS ' + str(fishobject.filenames))
def aggregate_raster(): #creates raster plots for fishids and date ranges of choice (set target_fishids, target_start, and target_end)
try:
fishobject.fishids_aggregated_by_date
except AttributeError:
fishobject.target_fishids = input_target_fishids
fishobject.aggregate_fishids_by_date()
else:
print('AGGREGATED FISHID SHEET HAS ALREADY BEEN CREATED')
fishobject.generate_raster() #generates raster plot from dataframe
print('RASTER PLOTS CREATED FOR ' + str(fishobject.target_fishids) + ' IN DATE RANGE: ' + fishobject.target_start + ' - ' + fishobject.target_end)
def aggregate_barplot():
try:
fishobject.fishids_aggregated_by_date
except AttributeError:
fishobject.target_fishids = input_target_fishids
fishobject.aggregate_fishids_by_date()
else:
print('AGGREGATED FISHID SHEET HAS ALREADY BEEN CREATED')
fishobject.barwidth = 1
fishobject.create_bargraph()
print('BARPLOT CREATED FOR ' + str(fishobject.target_fishids) + ' IN DATE RANGE: ' + fishobject.target_start + ' - ' + fishobject.target_end)
def aggregate_export():
fishobject.target_fishids = input_target_fishids
fishobject.aggregate_fishids_by_date()
if len(fishobject.fishids_aggregated_by_date.index) == 0:
print('AGGREGATED SPREADSHEET IS EMPTY- CHECK THAT TARGET DATES AND FISHIDS ARE CORRECT')
sys.exit()
else:
export_filepath = '/Users/johnmerlo-coyne/Desktop/Juntti Lab Stuff/RFID Project/Aggregated FishIDs/'
export_filename = ('Aggregated RFID_' + str(input_target_fishids) + ' ' + fishobject.target_start.replace('/','.') + '-' + fishobject.target_end.replace('/','.') + '.xlsx')
export_pathname = export_filepath + export_filename
fishobject.fishids_aggregated_by_date.to_excel(export_pathname)
print('AGGREGATE SPREADSHEET FOR: ' + str(input_target_fishids) + ' BETWEEN ' + target_start + ' - ' + target_end + 'SAVED')
#spreadsheet_update()
#aggregate_barplot()
aggregate_raster()
#aggregate_export()