Skip to content

Commit

Permalink
Added assign_coords to forcing sources with daily data
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesus Perez Curbelo (ame805) committed Aug 23, 2024
1 parent f1b0f15 commit 3352214
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 1,417 deletions.
4 changes: 0 additions & 4 deletions camels_spat2nh-1058854.out

This file was deleted.

4 changes: 4 additions & 0 deletions camels_spat2nh-1123653.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Unusable basins: 13
{'05RE002', '08LF023', '08MG022', '07QD002', '06DA001', '08MG020', '08AA007', '08LD003', '09AE002', '08KH011', '09AA004', '07SB017', '07BJ006'}
Basins to process: 1698
Processing USA...
23 changes: 15 additions & 8 deletions camels_spat2nh.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@ def camels_spat2nh(data_dir, data_gen, unusuable_basins):
## Process data for each basin and save to csv file
for country in countries[:]:
# Create a folder for each country
country_dir = os.path.join(data_dir_out, f'CAMELS_spat_{country}_{len(data_sources)}sources')
# Check if only testing
if ONLY_TESTING:
country_dir = os.path.join(data_dir_out, f'CAMELS_spat_{country}_testing')
else:
country_dir = os.path.join(data_dir_out, f'CAMELS_spat_{country}_{len(data_sources)}sources')

if not os.path.exists(country_dir ):
os.makedirs(country_dir)

Expand Down Expand Up @@ -142,9 +147,9 @@ def processBasinSave2CSV(basin_f, basin_data_path, country_dir,

print(f'{src}_files', len(eras_files), '->', folder2load)

# Check if only testing
if ONLY_TESTING:
continue
# # Check if only testing
# if ONLY_TESTING:
# continue

# Check whether there are files to load
if len(eras_files) == 0:
Expand Down Expand Up @@ -179,9 +184,9 @@ def processBasinSave2CSV(basin_f, basin_data_path, country_dir,
df_src_dict[src] = basin_data_df


# Check if only testing
if ONLY_TESTING:
return None
# # Check if only testing
# if ONLY_TESTING:
# return None

print('basin', basin_f, '->', df_src_dict.keys())
# Check if there are len(data_sources) data sources in df_src_dict.keys() (expected ERA5, EM_EARTH, daymet, and RDRS)
Expand Down Expand Up @@ -224,12 +229,14 @@ def processBasinSave2CSV(basin_f, basin_data_path, country_dir,
df_target.rename(columns={'time': 'date'}, inplace=True)
# Remove duplicates
df_target = df_target.drop_duplicates(subset=['date'])

# print('df_target', df_target.head())

# Merge input and target dataframes
df_merged = df_merged_inp.merge(df_target, on='date')


# print('df_merged', df_merged_inp.head())
# print('df_merged', df_merged.head())
# # Print data_vars
# for var in df_merged.columns:
# print(var)
Expand Down
1,493 changes: 94 additions & 1,399 deletions camels_spat_exploring_forcings.ipynb

Large diffs are not rendered by default.

4 changes: 0 additions & 4 deletions utils/data_dir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ data_dir_camels_spat: /project/gwf/gwf_cmt/wknoben/camels_spat/camels-spat-data
relative_path_forcing: forcing/lumped
relative_path_target: observations

# countries:
# - USA
# - CAN

camels_spat_metadata: camels_spat_metadata.csv
camels_spat_unusable: camels_spat_unusable.csv

Expand Down
6 changes: 4 additions & 2 deletions utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def reduceDataByDay(dataset, set_vars, sum_vars, input_vars_repeated, forcing_sr
day_dates = pd.to_datetime(dataset.coords["time"].values).normalize()
day_dates = xr.DataArray(day_dates, name="time", dims="time")

print('day_dates', day_dates)

# Group by day and apply appropriate reduction method for each variable
daily_data = xr.Dataset()

Expand All @@ -41,8 +43,8 @@ def reduceDataByDay(dataset, set_vars, sum_vars, input_vars_repeated, forcing_sr

# Check if the frequency is daily - daymet
if inferred_frequency == pd.Timedelta(days=1) and variable in variable in set_vars:
# Do not aggregate
daily_data[var] = dataset[variable]
# Do not aggregate and bring to the day dimension: 1980-01-01 12:00:00 to be 1980-01-01
daily_data[var] = dataset[variable].assign_coords(time=day_dates)
else:
if variable in sum_vars:
# print('sum', variable)
Expand Down

0 comments on commit 3352214

Please sign in to comment.