Skip to content

Commit

Permalink
Merge pull request #43 from NOAA-CEFI-Portal/develop
Browse files Browse the repository at this point in the history
Feature rotation and IO improvement
  • Loading branch information
chiaweh2 authored Sep 12, 2024
2 parents 5a3d5c8 + d539fa0 commit d8b574b
Show file tree
Hide file tree
Showing 17 changed files with 7,719 additions and 42 deletions.
151 changes: 141 additions & 10 deletions mom6/mom6_module/mom6_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
import numpy as np
import xarray as xr
from mom6 import DATA_PATH
from mom6.mom6_module.mom6_types import ModelRegionOptions,GridOptions,DataTypeOptions,DataSourceOptions
from mom6.mom6_module.mom6_types import (
ModelRegionOptions,GridOptions,
DataTypeOptions,DataSourceOptions,
DataFreqOptions
)

warnings.simplefilter("ignore")
xr.set_options(keep_attrs=True)
Expand Down Expand Up @@ -132,6 +136,7 @@ def __init__(
tercile_relative_dir : str = None,
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
chunks : dict = None
) -> None:
"""
input for the class to get the forecast data
Expand Down Expand Up @@ -165,7 +170,10 @@ def __init__(
self.data_relative_dir = data_relative_dir
self.static_relative_dir = static_relative_dir
self.tercile_relative_dir = tercile_relative_dir

if chunks is None:
self.chunks = {}
else :
self.chunks = chunks

def get_all(self) -> xr.Dataset:
"""
Expand Down Expand Up @@ -194,7 +202,7 @@ def get_all(self) -> xr.Dataset:
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
# setup chuck
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
for file in file_list:
Expand Down Expand Up @@ -234,7 +242,7 @@ def get_all(self) -> xr.Dataset:
else:
mom6_dir = os.path.join(DATA_PATH,self.data_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
io_chunk = {'init': 1,'member':1,'lead':-1}
Expand Down Expand Up @@ -293,7 +301,7 @@ def get_tercile(
raise OSError('for raw grid please input the path to grid file')
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
for file in file_list:
Expand Down Expand Up @@ -339,7 +347,7 @@ def get_tercile(
else:
mom6_dir = os.path.join(DATA_PATH,self.tercile_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
io_chunk = {'init': 4,'member':1,'lead':-1}
Expand Down Expand Up @@ -493,6 +501,7 @@ def __init__(
static_relative_dir : str = None,
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
chunks : dict = None
) -> None:
"""
input for getting the historical run data
Expand Down Expand Up @@ -522,13 +531,88 @@ def __init__(
self.source = source
self.data_relative_dir = data_relative_dir
self.static_relative_dir = static_relative_dir
if chunks is None:
self.chunks = {}
else :
self.chunks = chunks

def get_all(self) -> xr.Dataset:
@staticmethod
def freq_find(
file_list : list[str],
freq : DataFreqOptions = None):
"""finding the correct data based on input frequency
Parameters
----------
file_list : list
a list of files with dir path
freq : str
frequency of the data. Default is None.
Returns
-------
list
a list with only one element which contain the file need
to be read
Raises
------
ValueError
if multiple file present in the `file_list` but no freq is provided
an error message is shown in stdout
"""
# differen frequency file exists resulting in multi files
if len(file_list)>1 and freq is not None :
if freq not in ['annual','monthly','daily']:
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)
filtered_file_list = None
for file in file_list:
filename_split = file.split('/')[-1].split('.')

# 0 : exp, 1 : time span, 2: varname, 3:file format
freq_span = len(filename_split[1])
if freq_span == int(4+4+1) and freq == 'annual':
filtered_file_list = [file]
elif freq_span == int(6+6+1) and freq == 'monthly':
filtered_file_list = [file]
elif freq_span == int(8+8+1) and freq == 'daily':
filtered_file_list = [file]
else :
pass
if filtered_file_list is None:
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)

elif len(file_list)>1 and freq is None :
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)

else:
filtered_file_list = file_list

return filtered_file_list

def get_all(
self,
freq : DataFreqOptions = None
) -> xr.Dataset:
"""
Return the mom6 all rawgrid/regridded historical run field
with the static field combined and setting the
lon lat related variables to coordinate
Parameters
----------
freq : str
frequency of the data. Default is None.
Returns
-------
xr.Dataset
Expand All @@ -549,7 +633,7 @@ def get_all(self) -> xr.Dataset:
raise IOError('for raw grid please input the path to grid file')
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog()
for file in file_list:
Expand All @@ -560,6 +644,8 @@ def get_all(self) -> xr.Dataset:

file_read = [file for file in file_list if f'.{self.var}.' in file]

file_read = self.freq_find(file_read,freq)

# merge the static field with the variables
ds = xr.open_mfdataset(
file_read,combine='nested',
Expand Down Expand Up @@ -588,15 +674,18 @@ def get_all(self) -> xr.Dataset:
else:
mom6_dir = os.path.join(DATA_PATH,self.data_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog()
io_chunk = {'time':100}

file_read = [file for file in file_list if f'.{self.var}.' in file]
file_read = self.freq_find(file_read,freq)
ds = xr.open_mfdataset(
file_read,
combine='nested',
concat_dim='time',
chunks={'time': 100}
chunks=io_chunk
).sortby('time')

# test if accident read raw file
Expand All @@ -617,6 +706,7 @@ def get_single(
year : int = 1993,
month : int = 1,
day : int = 1,
freq : DataFreqOptions = None
) -> xr.Dataset:
"""
Return the mom6 rawgrid/regridded historical run field
Expand All @@ -631,6 +721,8 @@ def get_single(
month of the historical run
day : int
day in month of the historical run
freq : str
frequency of the data. Default is None.
Returns
-------
Expand All @@ -639,7 +731,7 @@ def get_single(
all forecast field include in the `file_list`. The
Dataset object is lazily-loaded.
"""
ds = self.get_all()
ds = self.get_all(freq=freq)

min_year = ds['time.year'].min().data
max_year = ds['time.year'].max().data
Expand Down Expand Up @@ -760,6 +852,45 @@ def get_grid(
'geolon_u','geolat_u',
'geolon_v','geolat_v']
)

@staticmethod
def get_rotate(
data_relative_dir : str
) -> xr.Dataset:
"""return the original mom6 grid rotation information
The information is store in the ice_month.static.nc file
Parameters
----------
data_relative_dir : str
relative path from DATAPATH setup in config file to
the actual forecast/reforecast data, by setting 'forecast/'
which makes the absolute path to DATAPATH/forecast/
Returns
-------
xr.Dataset
The Xarray Dataset object of mom6's grid lon lat
"""
ds_rotate = xr.open_dataset(
os.path.join(DATA_PATH,data_relative_dir,'ice_monthly.static.nc')
)

# prepare the rotation matrix to regular coord names
ds_rotate = ds_rotate.rename({
'yT':'yh',
'xT':'xh',
'GEOLON':'geolon',
'GEOLAT':'geolat',
'COSROT':'cosrot',
'SINROT':'sinrot'
})

return ds_rotate.set_coords(
['geolon','geolat']
)

@staticmethod
def get_mask(
data_relative_dir : str,
Expand Down
37 changes: 36 additions & 1 deletion mom6/mom6_module/mom6_mhw.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,29 @@ def generate_forecast_batch(
# output dataset
ds_mhw = xr.Dataset()
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'] = da_threshold
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'].attrs['long_name'] = (
f'{self.varname} threshold{quantile_threshold:02d})'
)
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'].attrs['units'] = 'degC'

ds_mhw[f'mhw_prob{quantile_threshold:02d}'] = da_prob
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['long_name'] = (
f'marine heatwave probability (threshold{quantile_threshold:02d})'
)
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['units'] = 'unitless'

ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_magnitude_indentified_ens'] = da_mhw_mag
ds_mhw['ssta_avg'].attrs['long_name'] = (
'anomalous sea surface temperature ensemble mean'
)
ds_mhw['ssta_avg'].attrs['units'] = 'degC'

ds_mhw['mhw_mag_indentified_ens'] = da_mhw_mag
ds_mhw['mhw_mag_indentified_ens'].attrs['long_name'] = (
'marine heatwave magnitude in each ensemble'
)
ds_mhw['mhw_mag_indentified_ens'].attrs['units'] = 'degC'

ds_mhw.attrs['period_of_quantile'] = da_threshold.attrs['period_of_quantile']
ds_mhw.attrs['period_of_climatology'] = da_threshold.attrs['period_of_climatology']

Expand Down Expand Up @@ -239,4 +259,19 @@ def generate_forecast_single(
ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_mag_indentified_ens'] = da_mhw_mag

ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['long_name'] = (
f'marine heatwave probability (threshold{quantile_threshold:02d})'
)
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['units'] = 'unitless'

ds_mhw['ssta_avg'].attrs['long_name'] = (
'anomalous sea surface temperature ensemble mean'
)
ds_mhw['ssta_avg'].attrs['units'] = 'degC'

ds_mhw['mhw_mag_indentified_ens'].attrs['long_name'] = (
'marine heatwave magnitude in each ensemble'
)
ds_mhw['mhw_mag_indentified_ens'].attrs['units'] = 'degC'

return ds_mhw
Loading

0 comments on commit d8b574b

Please sign in to comment.