-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjoin_dam_pup.py
122 lines (96 loc) · 4.78 KB
/
join_dam_pup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
This script joins the dam and pup pose estimation files for the AMBER pipeline.
The data frame is formatted to match the format expected by SimBA (single animal project)
and required for feature extraction with the AMBER feature extraction script
Last updated: October 5, 2023
Author: Hannah Lapp
"""
import os
import sys
import pandas as pd
def main(argv):
"""
Process and merge pose estimation data for dam and pup videos.
Given a video directory containing pose estimation CSV files for dam and pup videos,
this function identifies files with specific naming patterns and merges the data
based on frame number.
Args:
argv (list): Command-line arguments. the first argument is the name of this script
The second argument (argv[1]) should be the path to the directory
containing pose estimation CSV files.
Returns:
None
This function performs the following steps:
1. Identifies CSV files for dam and pup videos based on the original video file name.
2. Loads and preprocesses the data from these files.
3. Merges the dam and pup data based on frame timestamps.
4. Saves the merged data to CSV files in a subdirectory named 'AMBER_joined_pose_estimation'.
Example:
To process pose estimation CSV files in the 'videos' directory, run the script
with the following command:
>>> python join_dam_pup.py path/videos
Note:
- The script assumes specific naming patterns for dam and pup video files that are generated from the provided
pose estimation models.
- The merged data is saved in a subdirectory named 'AMBER_joined_pose_estimation'.
"""
print("Starting file joining...")
video_directory = argv[1]
all_files = os.listdir(video_directory)
#given the video directory, find all files containing "DLC_resnet50_dam-single-animalMay26shuffle" and ending in ".csv"
dam_files = []
dam_keys = []
for file in all_files:
if 'DLC_resnet50_dam-single-animalMay26shuffle' in file \
and file.lower().endswith('.csv'):
dam_files.append(video_directory + os.sep + file)
dam_keys.append(file.split('DLC_resnet50_dam-single-animalMay26shuffle')[0])
#print("Dam files:", dam_files)
#given the video directory, find all files containing "DLC_dlcrnetms5_pup-nine-ptJun5shuffle" and ending in "UNPICKLED.csv"
pup_files = []
pup_keys= []
for file in all_files:
if 'DLC_dlcrnetms5_pup-nine-ptJun5shuffle' in file \
and file.lower().endswith('unpickled.csv'):
pup_files.append(video_directory + os.sep + file)
pup_keys.append(file.split('DLC_dlcrnetms5_pup-nine-ptJun5shuffle')[0])
#print("Pup files:", pup_files)
#create paired_keys for files that have pup and dam pose estimation
dam_set = set(dam_keys)
pup_set = set(pup_keys)
paired_keys = list(dam_set.intersection(pup_set))
#print("Paired keys:", paired_keys)
for key in paired_keys:
print('Joining', key)
for dam_file in dam_files:
if video_directory + os.sep + key + 'DLC_resnet50_dam-single-animalMay26shuffle' in dam_file:
dam_path = dam_file
break
dam_df = pd.read_csv(dam_path)
dam_df.columns = dam_df.iloc[0] + '_' + dam_df.iloc[1]
dam_df = dam_df.iloc[2:]
dam_df = dam_df.rename(columns= {'bodyparts_coords': 'frame'})
dam_df['frame'] =dam_df['frame'].astype(int)
for pup_file in pup_files:
if video_directory + os.sep + key + 'DLC_dlcrnetms5_pup-nine-ptJun5shuffle' in pup_file:
pup_path = pup_file
break
pup_df = pd.read_csv(pup_path)
merged_df = dam_df.merge(pup_df, on='frame', how='left')
merged_df.loc[-2] = [column_name.replace('_x', '').replace('_y', '').replace('_likeihood','') for column_name in merged_df.columns]
merged_df.loc[-1] = ['coords'] + ['x', 'y', 'likelihood'] * int((len(merged_df.columns) - 1) / 3)
merged_df.index = merged_df.index + 2
merged_df.sort_index(inplace=True)
merged_df.columns = ['scorer'] + (['DLC_AMBER_dam_pup'] * (len(merged_df.columns) - 1))
merged_df.iloc[0, 0] = 'bodyparts'
try:
os.makedirs(video_directory + os.sep + 'AMBER_joined_pose_estimation' + os.sep)
except FileExistsError:
pass
out_path = video_directory + os.sep + 'AMBER_joined_pose_estimation' + os.sep + key + '.csv'
merged_df.to_csv(out_path, index=False)
#print(out_path)
def run_join_dam_pup():
sys.exit(main(sys.argv))
if __name__ == '__main__':
run_join_dam_pup()