Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'to_iris' and 'from_iris' to methods Dataset #2449

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ Dataset methods
Dataset.to_dataframe
Dataset.to_dask_dataframe
Dataset.to_dict
DataSet.to_iris
DataSet.from_iris
Dataset.from_dataframe
Dataset.from_dict
Dataset.close
Expand Down
7 changes: 6 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,12 @@ Enhancements
:py:meth:`~xarray.DataArray.interp`, and
:py:meth:`~xarray.Dataset.interp`.
By `Spencer Clark <https://github.com/spencerkclark>`_


- Added :py:meth:`DataSet.to_iris` and
:py:meth:`DataSet.from_iris` for
converting data sets to and from Iris_ CubeLists with the same data and coordinates.
By `Jacob Tomlinson <https://github.com/jacobtomlinson>`_.

Bug fixes
~~~~~~~~~

Expand Down
18 changes: 16 additions & 2 deletions xarray/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .conventions import decode_cf
from .core import duck_array_ops
from .core.dataarray import DataArray
from .core.dataset import Dataset
from .core.dtypes import get_fill_value
from .core.pycompat import OrderedDict, range

Expand Down Expand Up @@ -139,7 +140,7 @@ def _get_iris_args(attrs):


# TODO: Add converting bounds from xarray to Iris and back
def to_iris(dataarray):
def datarray_to_iris(dataarray):
""" Convert a DataArray into a Iris Cube
"""
# Iris not a hard dependency
Expand Down Expand Up @@ -221,7 +222,7 @@ def _name(iris_obj, default='unknown'):
iris_obj.long_name or default)


def from_iris(cube):
def datarray_from_iris(cube):
""" Convert a Iris cube into an DataArray
"""
import iris.exceptions
Expand Down Expand Up @@ -273,3 +274,16 @@ def from_iris(cube):
attrs=array_attrs, dims=dims)
decoded_ds = decode_cf(dataarray._to_temp_dataset())
return dataarray._from_temp_dataset(decoded_ds)


def dataset_to_iris(dataset):
""" Convert a Dataset into an Iris CubeList.
"""
from iris.cube import CubeList
return CubeList([dataset[variable].to_iris() for variable in list(dataset.data_vars)])


def dataset_from_iris(cubelist):
""" Convert an Iris CubeList into a Dataset.
"""
return Dataset({cube.var_name: DataArray.from_iris(cube) for cube in cubelist})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the name attribute already on DataArray.from_iris(cube)? We have some special logic already for figuring out names in DataArray.from_iris.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So cube.name instead of cube.var_name?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say DataArray.from_iris(cube).name, but it probably makes sense save it in an intermediate variable to avoid converting the cube twice.

8 changes: 4 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1855,15 +1855,15 @@ def from_cdms2(cls, variable):
def to_iris(self):
"""Convert this array into a iris.cube.Cube
"""
from ..convert import to_iris
return to_iris(self)
from ..convert import datarray_to_iris
return datarray_to_iris(self)

@classmethod
def from_iris(cls, cube):
"""Convert a iris.cube.Cube into an xarray.DataArray
"""
from ..convert import from_iris
return from_iris(cube)
from ..convert import datarray_from_iris
return datarray_from_iris(cube)

def _all_compat(self, other, compat_str):
"""Helper function for equals and identical"""
Expand Down
17 changes: 15 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,19 @@ def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,
return to_zarr(self, store=store, mode=mode, synchronizer=synchronizer,
group=group, encoding=encoding, compute=compute)

def to_iris(self):
"""Convert this dataset into an iris.cube.CubeList.
"""
from ..convert import dataset_to_iris
return dataset_to_iris(self)

@classmethod
def from_iris(cls, cubelist):
"""Convert an iris.cube.CubeList into a dataset.
"""
from ..convert import dataset_from_iris
return dataset_from_iris(cubelist)

def __unicode__(self):
return formatting.dataset_repr(self)

Expand Down Expand Up @@ -1415,7 +1428,7 @@ def _validate_indexers(self, indexers):
""" Here we make sure
+ indexer has a valid keys
+ indexer is in a valid data type
+ string indexers are cast to the appropriate date type if the
+ string indexers are cast to the appropriate date type if the
associated index is a DatetimeIndex or CFTimeIndex
"""
from .dataarray import DataArray
Expand Down Expand Up @@ -1998,7 +2011,7 @@ def _validate_interp_indexer(x, new_x):
'Instead got\n{}'.format(new_x))
else:
return (x, new_x)

variables = OrderedDict()
for name, var in iteritems(obj._variables):
if name not in indexers:
Expand Down
37 changes: 35 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from . import (
InaccessibleArray, TestCase, UnexpectedDataAccess, assert_allclose,
assert_array_equal, assert_equal, assert_identical, has_cftime,
has_dask, raises_regex, requires_bottleneck, requires_dask, requires_scipy,
source_ndarray)
has_dask, raises_regex, requires_bottleneck, requires_iris, requires_dask,
requires_scipy, source_ndarray)

try:
import cPickle as pickle
Expand Down Expand Up @@ -4635,3 +4635,36 @@ def test_differentiate_cftime(dask):
# Test the differentiation of datetimes themselves
actual = da['time'].differentiate('time', edge_order=1, datetime_unit='D')
assert_allclose(actual, xr.ones_like(da['time']).astype(float))


class TestIrisConversion(object):
@requires_iris
def test_to_and_from_iris(self):
import iris
import cf_units # iris requirement

# to iris
coord_dict = OrderedDict()
coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'})
coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3))
coord_dict['height'] = 10
coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'})
coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]])

array = DataArray(np.arange(6, dtype='float').reshape(2, 3),
coord_dict, name='Temperature',
attrs={'baz': 123, 'units': 'Kelvin',
'standard_name': 'fire_temperature',
'long_name': 'Fire Temperature'},
dims=('distance', 'time'))

original = Dataset({'Temperature': array})
actual = original.to_iris()

assert_array_equal(actual.extract(iris.Constraint(name='Temperature'))[0].data,
original['Temperature'].data)

assert len(list(original.data_vars)) == len(actual)

roundtripped = DataSet.from_iris(actual)
assert_identical(original, roundtripped)