Skip to content

Commit

Permalink
Remove strings from heart rate column
Browse files Browse the repository at this point in the history
  • Loading branch information
evavanweenen committed Jan 20, 2022
1 parent f9d9c1c commit c7d19a7
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions preprocess_trainingpeaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def clean(i, verbose=False):
def local_timestamp(i, verbose=False):
################ PREREQUISITE:
## preprocess_dexcom.py: create {root}/timezone.csv
tz = pd.read_csv('/'.join(root.split('/')[:-2])+'/timezone.csv', index_col=0)
tz = pd.read_csv(DATA_PATH+'/timezone.csv', index_col=0)
tz['date'] = pd.to_datetime(tz['date'])
tz['timezone'] = pd.to_timedelta(tz['timezone'])

Expand All @@ -327,7 +327,7 @@ def glucose():
################ PREREQUISITE:
## preprocess_dexcom.py: create {root}/Dexcom/dexcom_clean.csv

df_dc = pd.read_csv('/'.join(root.split('/')[:-2])+'/Dexcom/clean/dexcom_clean.csv', index_col=0)
df_dc = pd.read_csv(DATA_PATH+'/Dexcom/clean/dexcom_clean.csv', index_col=0)
df_dc['timestamp'] = pd.to_datetime(df_dc['timestamp'])
df_dc['local_timestamp'] = pd.to_datetime(df_dc['local_timestamp'])

Expand Down Expand Up @@ -378,7 +378,7 @@ def glucose():
del df, df_glucose; gc.collect()

df_dc = df_dc.sort_values(['RIDER', 'timestamp'])
df_dc.to_csv('/'.join(root.split('/')[:-2])+'/Dexcom/clean/dexcom_clean2.csv', index_label=0)
df_dc.to_csv(DATA_PATH+'/Dexcom/clean/dexcom_clean2.csv', index_label=0)

def features(i, verbose=0):
df = pd.read_csv(f'{root}clean/{i}/{i}_data3.csv')
Expand Down Expand Up @@ -482,6 +482,12 @@ def features(i, verbose=0):
df.left_right_balance = pd.to_numeric(df.left_right_balance)
print("CLEAN: left-right balance")

# -------------------- Heart rate
if df.heart_rate.apply(lambda x: isinstance(x, str)).any():
df['heart_rate'] = df['heart_rate'].replace({'\n ': np.nan})
df['heart_rate'] = pd.to_numeric(df['heart_rate'])
print("CLEAN: heart_rate")

# -------------------- Enhanced altitude
# check if enhanced_altitude equals altitude
print("CHECK: enhanced altitude does not equal altitude %s times"\
Expand Down Expand Up @@ -555,6 +561,8 @@ def main():
print("\n------------------------------- Athlete ", i)
features(i)

# TODO: use timezone.csv to fillna countries and timezones

if __name__ == '__main__':
main()

Expand Down

0 comments on commit c7d19a7

Please sign in to comment.