Skip to content

Commit

Permalink
Fix various pp issues related to running seaice_suite (#721)
Browse files Browse the repository at this point in the history
* fix pp issues for seaice_suite

* fix arg issue

* rename functions

* add default return for conversion function
  • Loading branch information
jtmims authored Dec 18, 2024
1 parent 3d2bc45 commit abc89d6
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 41 deletions.
8 changes: 7 additions & 1 deletion data/fieldlist_GFDL.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,13 @@
"realm": "atmos",
"units": "1",
"ndim": 3
},
},
"siconc": {
"standard_name": "sea_ice_area_fraction",
"realm": "seaIce",
"units": "0-1",
"ndim": 3
},
"IWP": {
"standard_name": "atmosphere_mass_content_of_cloud_ice",
"long_name": "Ice water path",
Expand Down
2 changes: 1 addition & 1 deletion diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def readindata(file, varname='siconc', firstyr='1979', lastyr='2014'):


# 1) Loading model data files:
input_file = "{DATADIR}/mon/{CASENAME}.{siconc_var}.mon.nc".format(**os.environ)
input_file = os.environ['SICONC_FILE']
obsoutput_dir = "{WORK_DIR}/obs/".format(**os.environ)
modoutput_dir = "{WORK_DIR}/model/".format(**os.environ)
figures_dir = "{WORK_DIR}/model/".format(**os.environ)
Expand Down
47 changes: 45 additions & 2 deletions src/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,36 @@ def read_varlist(self, parent, append_vars: bool=False):

def set_date_range(self, startdate: str, enddate: str):
self.date_range = util.DateRange(start=startdate, end=enddate)

def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
realm_regex = var.realm + '*'
date_range = var.T.range

Check warning

Code scanning / CodeQL

Variable defined multiple times Warning

This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
var_id = var.name
standard_name = var.standard_name
if var.translation.convention is not None:
var_id = var.translation.name
standard_name = var.translation.standard_name
if any(var.translation.alternate_standard_names):
standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
date_range = var.translation.T.range

Check warning

Code scanning / CodeQL

Variable defined multiple times Warning

This assignment to 'date_range' is unnecessary as it is
redefined
before this value is used.
if var.is_static:
date_range = None

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable date_range is not used.
freq = "fx"
else:
freq = var.T.frequency
if not isinstance(freq, str):
freq = freq.format_local()
if freq == 'hr':
freq = '1hr'

# define initial query dictionary with variable settings requirements that do not change if
# the variable is translated
self.query['frequency'] = freq
self.query['path'] = path_regex
self.query['realm'] = realm_regex
self.query['standard_name'] = standard_name
self.query['variable_id'] = var_id


def translate_varlist(self,
var: varlist_util.VarlistEntry,
Expand Down Expand Up @@ -94,7 +124,10 @@ class CMIPDataSource(DataSourceBase):
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "CMIP"


def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
return

@data_source.maker
class CESMDataSource(DataSourceBase):
Expand All @@ -105,7 +138,10 @@ class CESMDataSource(DataSourceBase):
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "CESM"


def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
return

@data_source.maker
class GFDLDataSource(DataSourceBase):
Expand All @@ -116,3 +152,10 @@ class GFDLDataSource(DataSourceBase):
# col_spec = sampleLocalFileDataSource_col_spec
# varlist = diagnostic.varlist
convention: str = "GFDL"

def set_query(self, var: varlist_util.VarlistEntry, path_regex: str):
super().set_query(var, path_regex)
# this is hacky, but prevents the framework from grabbing from ice_1x1deg
if self.query['realm'] == 'seaIce*':
self.query['realm'] = 'ice'
return
64 changes: 36 additions & 28 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ def execute(self, var: varlist_util.VarlistEntry,
pass


class PercentConversionFunction(PreprocessorFunctionBase):
"""A PreprocessorFunction which convers the dependent variable's units and values,
for the specific case of percentages. ``0-1`` are not defined in the UDUNITS-2
library. So, this function handles the case where we have to convert from
``0-1`` to ``%``.
"""

_std_name_tuple = ('0-1', '%')

def execute(self, var, ds, **kwargs):
var_unit = getattr(var, "units", "")
tv = var.translation #abbreviate
tv_unit = getattr(tv, "units", "")
# 0-1 to %
if str(tv_unit) == self._std_name_tuple[0] and str(var_unit) == self._std_name_tuple[1]:
ds[tv.name].attrs['units'] = '%'
ds[tv.name].values = ds[tv.name].values*100
return ds
# % to 0-1
if str(tv_unit) == self._std_name_tuple[1] and str(var_unit) == self._std_name_tuple[0]:
ds[tv.name].attrs['units'] = '0-1'
# sometimes % is [0,1] already
if ds[tv.name].values[:, :, 3].max() < 1.5:
return ds
else:
ds[tv.name].values = ds[tv.name].values/100
return ds

return ds

class PrecipRateToFluxFunction(PreprocessorFunctionBase):
"""A PreprocessorFunction which converts the dependent variable's units, for
the specific case of precipitation. Flux and precip rate differ by a factor
Expand Down Expand Up @@ -694,7 +724,7 @@ def _functions(self):
"""
# normal operation: run all functions
return [
AssociatedVariablesFunction,
AssociatedVariablesFunction, PercentConversionFunction,
PrecipRateToFluxFunction, ConvertUnitsFunction,
ExtractLevelFunction, RenameVariablesFunction
]
Expand Down Expand Up @@ -1012,33 +1042,11 @@ def query_catalog(self,
path_regex = [re.compile(r'({})'.format(case_name))]

for var in case_d.varlist.iter_vars():
realm_regex = var.realm + '*'
date_range = var.T.range
var_id = var.name
standard_name = var.standard_name
if var.translation.convention is not None:
var_id = var.translation.name
standard_name = var.translation.standard_name
if any(var.translation.alternate_standard_names):
standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names
date_range = var.translation.T.range
if var.is_static:
date_range = None
freq = "fx"
else:
freq = var.T.frequency
if not isinstance(freq, str):
freq = freq.format_local()
if freq == 'hr':
freq = '1hr'


# define initial query dictionary with variable settings requirements that do not change if
# the variable is translated
case_d.query['frequency'] = freq
case_d.query['path'] = path_regex
case_d.query['realm'] = realm_regex
case_d.query['standard_name'] = standard_name
case_d.query['variable_id'] = var_id
case_d.set_query(var, path_regex)

# change realm key name if necessary
if cat.df.get('modeling_realm', None) is not None:
Expand All @@ -1047,7 +1055,7 @@ def query_catalog(self,
# search catalog for convention specific query object
var.log.info("Querying %s for variable %s for case %s.",
data_catalog,
var_id,
case_d.query['variable_id'],
case_name)
cat_subset = cat.search(**case_d.query)
if cat_subset.df.empty:
Expand Down Expand Up @@ -1086,7 +1094,7 @@ def query_catalog(self,
f"configuration file.")
else:
raise util.DataRequestError(
f"Unable to find match or alternate for {var_id}"
f"Unable to find match or alternate for {case_d.query['variable_id']}"
f" for case {case_name} in {data_catalog}")

# Get files in specified date range
Expand Down Expand Up @@ -1162,7 +1170,7 @@ def query_catalog(self,
# check that the trimmed variable data in the merged dataset matches the desired date range
if not var.is_static:
try:
self.check_time_bounds(cat_dict[case_name], var.translation, freq)
self.check_time_bounds(cat_dict[case_name], var.translation, var.T.frequency)
except LookupError:
var.log.error(f'Time bounds in trimmed dataset for {var_id} in case {case_name} do not match'
f'requested date_range.')
Expand Down
2 changes: 2 additions & 0 deletions src/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ def conversion_factor(source_unit, dest_unit):
*source_unit*, *dest_unit* are coerced to :class:`Units` objects via
:func:`to_cfunits`.
"""
if str(source_unit) == str(dest_unit):
return 1.0 # bypass function if the units have the same string allowing units like '0-1' to be used
source_unit, dest_unit = to_equivalent_units(source_unit, dest_unit)
return Units.conform(1.0, source_unit, dest_unit)

Expand Down
19 changes: 10 additions & 9 deletions src/xr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _old_axes_dict(self, var_name=None):
if len(v) > 1 and var_name is not None:
ax = [c for c in v if c in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
del_ax = [d for d in v if d not in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())]
if del_ax is not None: # remove the entries that are not in the cf.coordinates.values dict
if del_ax is not None and len(del_ax) > 0: # remove the entries that are not in the cf.coordinates.values dict
# append entries that are in the cf.coordinates.values dict if they are missing in coords_list
# and dims_list
if del_ax[0] in coords_list:
Expand All @@ -208,14 +208,15 @@ def _old_axes_dict(self, var_name=None):

if ax is not None:
vardict[k] = ax
if ax[0] not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
"for %s; dropping."), k, ax[0], var_name)
delete_keys.append(k)
else:
coords_list.remove(ax[0])
if ax[0] in dims_list:
dims_list.remove(ax[0])
for a in ax:
if a not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
"for %s; dropping."), k, a, var_name)
delete_keys.append(k)
else:
coords_list.remove(a)
if a in dims_list:
dims_list.remove(a)
elif len(v) == 1:
if v[0] not in coords_list:
_log.warning(("cf_xarray fix: %s axis %s not in dimensions "
Expand Down

0 comments on commit abc89d6

Please sign in to comment.