Skip to content

Commit

Permalink
Merge pull request #118 from xarray-contrib/extra_cell_measures
Browse files Browse the repository at this point in the history
new cell measures implementation
  • Loading branch information
malmans2 authored Dec 3, 2020
2 parents d2dedfa + 36bc9ac commit 023af99
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 32 deletions.
51 changes: 32 additions & 19 deletions cf_xarray/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,27 +338,21 @@ def _get_measure_variable(

def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]:
"""
Translate from cell measures ("area" or "volume") to appropriate variable name.
Translate from cell measures to appropriate variable name.
This function interprets the ``cell_measures`` attribute on DataArrays.
Parameters
----------
obj: DataArray, Dataset
DataArray belonging to the coordinate to be checked
key: str, ["area", "volume"]
key: str
key to check for.
Returns
-------
List[str], Variable name(s) in parent xarray object that matches axis or coordinate `key`
"""

valid_keys = _CELL_MEASURES
if key not in valid_keys:
raise KeyError(
f"cf_xarray did not understand key {key!r}. Expected one of {valid_keys!r}"
)

if isinstance(obj, DataArray):
obj = obj._to_temp_dataset()

Expand Down Expand Up @@ -438,7 +432,7 @@ def _build_docstring(func):
mapper_docstrings = {
_get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}",
_get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}",
_get_measure_variable: f"One of {_CELL_MEASURES!r}",
# _get_measure_variable: f"One of {_CELL_MEASURES!r}",
}

sig = inspect.signature(func)
Expand Down Expand Up @@ -653,6 +647,18 @@ class CFAccessor:

def __init__(self, da):
self._obj = da
self._all_cell_measures = None

def _get_all_cell_measures(self):
"""
Get all cell measures defined in the object, adding CF pre-defined measures.
"""

# get all_cell_measures only once
if not self._all_cell_measures:
self._all_cell_measures = set(_CELL_MEASURES + tuple(self.cell_measures))

return self._all_cell_measures

def _process_signature(
self,
Expand Down Expand Up @@ -833,7 +839,7 @@ def describe(self):

text += "\nCell Measures:\n"
measures = self.cell_measures
for key in _CELL_MEASURES:
for key in sorted(self._get_all_cell_measures()):
text += f"\t{key}: {measures[key] if key in measures else []}\n"

text += "\nStandard Names:\n"
Expand Down Expand Up @@ -868,8 +874,7 @@ def keys(self) -> Set[str]:
"""

varnames = list(self.axes) + list(self.coordinates)
if not isinstance(self._obj, Dataset):
varnames.extend(list(self.cell_measures))
varnames.extend(list(self.cell_measures))
varnames.extend(list(self.standard_names))

return set(varnames)
Expand Down Expand Up @@ -930,15 +935,23 @@ def cell_measures(self) -> Dict[str, List[str]]:
Returns
-------
Dictionary of valid cell measure names that can be used with __getitem__ or .cf[key].
Will be ("area", "volume") or a subset thereof.
"""

measures = {
key: apply_mapper(_get_measure, self._obj, key, error=False)
for key in _CELL_MEASURES
}
obj = self._obj
all_attrs = [da.attrs.get("cell_measures", "") for da in obj.coords.values()]
if isinstance(obj, DataArray):
all_attrs += [obj.attrs.get("cell_measures", "")]
elif isinstance(obj, Dataset):
all_attrs += [
da.attrs.get("cell_measures", "") for da in obj.data_vars.values()
]

measures: Dict[str, List[str]] = dict()
for attr in all_attrs:
for key, value in parse_cell_methods_attr(attr).items():
measures[key] = measures.setdefault(key, []) + [value]

return {k: sorted(v) for k, v in measures.items() if v}
return {k: sorted(set(v)) for k, v in measures.items() if v}

def get_standard_names(self) -> List[str]:

Expand Down Expand Up @@ -1069,7 +1082,7 @@ def check_results(names, k):
check_results(names, k)
successful[k] = bool(names)
coords.extend(names)
elif k in _CELL_MEASURES:
elif k in self._get_all_cell_measures():
measure = _get_measure(self._obj, k)
check_results(measure, k)
successful[k] = bool(measure)
Expand Down
25 changes: 19 additions & 6 deletions cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,30 @@ def test_coordinates():
assert actual == expected


def test_cell_measures():
def test_cell_measures(capsys):
ds = airds.copy(deep=True)
ds["foo"] = xr.DataArray(ds["cell_area"], attrs=dict(standard_name="foo_std_name"))
ds["air"].attrs["cell_measures"] += " foo_measure: foo"
assert "foo_std_name" in ds.cf["air_temperature"].cf
assert ("foo_std_name" in ds.cf["air_temperature"].cf) and ("foo_measure" in ds.cf)

ds["air"].attrs["cell_measures"] += " volume: foo"
expected = dict(area=["cell_area"], volume=["foo"])
actual = ds["air"].cf.cell_measures
assert actual == expected
ds["foo"].attrs["cell_measures"] = ds["air"].attrs["cell_measures"]
expected = dict(area=["cell_area"], foo_measure=["foo"], volume=["foo"])
actual_air = ds["air"].cf.cell_measures
actual_foo = ds.cf["foo_measure"].cf.cell_measures
assert actual_air == actual_foo == expected

actual = ds.cf.cell_measures
assert actual == expected

ds.cf.describe()
actual = capsys.readouterr().out
expected = (
"\nCell Measures:\n\tarea: ['cell_area']\n\tfoo_measure: ['foo']\n\tvolume: ['foo']\n"
"\nStandard Names:\n\tair_temperature: ['air']\n\tfoo_std_name: ['foo']\n\n"
)
assert actual.endswith(expected)


def test_standard_names():
expected = dict(
Expand Down Expand Up @@ -240,7 +250,10 @@ def test_kwargs_expand_key_to_multiple_keys():
@pytest.mark.parametrize(
"obj, expected",
[
(ds, {"latitude", "longitude", "time", "X", "Y", "T", "air_temperature"}),
(
ds,
{"latitude", "longitude", "time", "X", "Y", "T", "air_temperature", "area"},
),
(ds.air, {"latitude", "longitude", "time", "X", "Y", "T", "area"}),
(ds_no_attrs.air, set()),
],
Expand Down
11 changes: 5 additions & 6 deletions doc/examples/introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -559,10 +559,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note:** Although it is possible to assign additional coordinates and cell\n",
"measures, `.cf.coordinates` and `.cf.cell_measures` only return a subset of\n",
"`(\"longitude\", \"latitude\", \"vertical\", \"time\")` and `(\"area\", \"volume\")`,\n",
"respectively.\n"
"**Note:** Although it is possible to assign additional coordinates,\n",
"`.cf.coordinates` only returns a subset of\n",
"`(\"longitude\", \"latitude\", \"vertical\", \"time\")`.\n"
]
},
{
Expand Down Expand Up @@ -927,8 +926,8 @@
"source": [
"## Feature: Weight by Cell Measures\n",
"\n",
"`cf_xarray` can weight by cell measure variables `\"area\"` and `\"volume\"` if the\n",
"appropriate attribute is set\n"
"`cf_xarray` can weight by cell measure variables if the appropriate attribute is\n",
"set\n"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ What's New

v0.4.0 (unreleased)
===================

- Support for arbitrary cell measures indexing. By `Mattia Almansi`_.

v0.3.1 (Nov 25, 2020)
=====================
Expand Down

0 comments on commit 023af99

Please sign in to comment.