Skip to content

Commit

Permalink
Include DATA_PATH variable in config file
Browse files Browse the repository at this point in the history
  • Loading branch information
evavanweenen committed Jan 20, 2022
1 parent d8c6858 commit 0c09574
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 26 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,5 @@
.~lock.*

*.yml
dexcom_id.csv

/data_final/*.csv
/data_final/*.xls

/lib/*
4 changes: 3 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
with open('mapping.yml', 'r') as f:
rider_mapping = yaml.safe_load(f)

rider_mapping_inv = {v:k for k,v in rider_mapping.items()}
rider_mapping_inv = {v:k for k,v in rider_mapping.items()}

DATA_PATH = '/wave/hypex/data/'
9 changes: 5 additions & 4 deletions preprocess_carbs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pandas as pd
import pandas as pd
from config import DATA_PATH

path = './data/carbs/'
root = DATA_PATH+'carbs/'

# read data from FAO (2017) downloaded from https://ourworldindata.org/diet-compositions
df = pd.read_csv(path+'/daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv')
df = pd.read_csv(root+'/daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv')

# select most recent year (2013)
df = df[df.Year == df.Year.max()]
Expand All @@ -16,4 +17,4 @@
'Calories from plant protein (FAO (2017))' :'plant protein (kcal)',
'Calories from fat (FAO (2017))' :'fat (kcal)',
'Calories from carbohydrates (FAO (2017))' :'carbohydrates (kcal)'})
df.to_csv(path+'/country_nutrients.csv', index_label=False)
df.to_csv(root+'/country_nutrients.csv', index_label=False)
8 changes: 4 additions & 4 deletions preprocess_dexcom.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

from helper import *
from calc import *
from config import rider_mapping
from config import rider_mapping, DATA_PATH
from timezone import get_timezones_dexcom, get_timezones_final

from tqdm import tqdm

from matplotlib import pyplot as plt
import seaborn as sns

root = '/wave/hypex/data/Dexcom/'#'./data/Dexcom/'
root = DATA_PATH+'Dexcom/'

if not os.path.exists(root+'drop/'):
os.mkdir(root+'drop/')
Expand Down Expand Up @@ -539,8 +539,8 @@ def main():
df = df.drop('transmitter_order', axis=1)
df.to_csv(root+'clean/dexcom_utc.csv')

tz = get_timezones_final(df)
tz.to_csv(root.rstrip('Dexcom/')+'/timezone.csv')
tz = get_timezones_final(df, root_tp=DATA_PATH+'TrainingPeaks/')
tz.to_csv(DATA_PATH+'/timezone.csv')

# TODO: fix all insulin and carbs metrics
df = utc_to_localtime(df, tz)
Expand Down
9 changes: 5 additions & 4 deletions preprocess_fitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import numpy as np
import pandas as pd

from config import rider_mapping
from config import rider_mapping, DATA_PATH

root = DATA_PATH+'fitness/'

path = '/wave/hypex/data/fitness/'
rider_mapping = {k.upper() : v for k, v in rider_mapping.items()}

df = pd.read_excel(path+'TEST ANALYSIS Dec_2018.xlsx', nrows=16, header=(0,1), sheet_name=None)
df = pd.read_excel(root+'TEST ANALYSIS Dec_2018.xlsx', nrows=16, header=(0,1), sheet_name=None)

# make sure all tabs are using the same units
df['Dec_2018'].loc[:,('VT1 (GET)', 'VO2%max')] /= 100
Expand Down Expand Up @@ -38,4 +39,4 @@
df = df.rename(columns={'level_0':'date'})
df = df.set_index(['RIDER', 'date']).sort_index()

df.to_csv(path+'fitness.csv')
df.to_csv(root+'fitness.csv')
4 changes: 2 additions & 2 deletions preprocess_trainingpeaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@

from bike2csv.converter import Converter as Convert2CSV

from config import rider_mapping_inv
from config import rider_mapping_inv, DATA_PATH
from helper import isnan, print_times_dates, country_names, country_timezones, country_names_inv
from calc import semicircles_to_degrees
from timezone import get_timezones_trainingpeaks

root = '/wave/hypex/data/TrainingPeaks/'#'data/TrainingPeaks/'
root = DATA_PATH+'TrainingPeaks/'

def get_country_from_gps(df:pd.DataFrame):
# use geopy with OpenStreetMap to look up country from coordinates
Expand Down
5 changes: 3 additions & 2 deletions scrape_dexcom.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from config import credentials, rider_mapping
from config import credentials, rider_mapping, DATA_PATH
import pyderman as dr
from selenium import webdriver

import pandas as pd
import time

path = '/local/home/evanweenen/hype-data/data/Dexcom/'
path = DATA_PATH+'Dexcom/'

dates = pd.date_range(start='01-01-2014', end='31-12-2021', freq='90D')

athletes = pd.read_csv(path+'dexcom_id.csv', index_col=0, dtype={'US':object, 'EU':object}) # beadle missing
Expand Down
4 changes: 2 additions & 2 deletions scrape_procyclingstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import requests
from bs4 import BeautifulSoup

from config import rider_mapping
from config import rider_mapping, DATA_PATH

root = '/wave/hypex/data/calendar/'
root = DATA_PATH+'calendar/'

riders = ('oliver-behringer',
'andrea-peron',
Expand Down
4 changes: 2 additions & 2 deletions scrape_trainingpeaks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from config import credentials, rider_mapping
from config import credentials, rider_mapping, DATA_PATH
import pyderman as dr
from selenium import webdriver
from tqdm import tqdm
Expand Down Expand Up @@ -122,7 +122,7 @@ def main():
"""
scraper = Scraper(dates=pd.date_range(start='01-01-2014', end='31-12-2021', freq='30D'))

scraper.open_driver(download_path='/local/home/evanweenen/hype-data/data/TrainingPeaks/export/')
scraper.open_driver(download_path=DATA_PATH+'TrainingPeaks/export/')
scraper.login(url_login='https://home.trainingpeaks.com/login', credentials=credentials['TP']['pro'])

scraper.click_calendar()
Expand Down
2 changes: 1 addition & 1 deletion timezone.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def remove_faulty_timezones(tz):
# TODO: it seems that maybe dexcom of rider 3 is one date ahead in time?
return tz

def get_timezones_final(df, root_tp='/wave/hypex/data/TrainingPeaks/'):
def get_timezones_final(df, root_tp):
# --------- TrainingPeaks
# trainingpeaks timezones
tz_tp = pd.concat({i: pd.read_csv(root_tp+f'/clean/{i}/{i}_timezone_final_list.csv', index_col=0) for i in df.RIDER.unique()})
Expand Down

0 comments on commit 0c09574

Please sign in to comment.