Skip to content

Commit

Permalink
Implement integrate (#2653)
Browse files Browse the repository at this point in the history
* added integrate.

* Docs

* Update via comment

* Update via comments

* integrate can accept multiple dimensions.

* using set instead of list

* dim -> coord
  • Loading branch information
fujiisoup authored and shoyer committed Jan 31, 2019
1 parent 3cc0d22 commit 4923039
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Computation
Dataset.diff
Dataset.quantile
Dataset.differentiate
Dataset.integrate

**Aggregation**:
:py:attr:`~Dataset.all`
Expand Down Expand Up @@ -321,6 +322,7 @@ Computation
DataArray.dot
DataArray.quantile
DataArray.differentiate
DataArray.integrate

**Aggregation**:
:py:attr:`~DataArray.all`
Expand Down
14 changes: 12 additions & 2 deletions doc/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ function or method name to ``coord_func`` option,
da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean()
.. _compute.using_coordinates:

Computation using Coordinates
=============================

Expand All @@ -261,9 +263,17 @@ This method can be used also for multidimensional arrays,
coords={'x': [0.1, 0.11, 0.2, 0.3]})
a.differentiate('x')
:py:meth:`~xarray.DataArray.integrate` computes integration based on
trapezoidal rule using their coordinates,

.. ipython:: python
a.integrate('x')
.. note::
This method is limited to simple cartesian geometry. Differentiation along
multidimensional coordinate is not supported.
These methods are limited to simple cartesian geometry. Differentiation
and integration along multidimensional coordinate are not supported.


.. _compute.broadcasting:

Expand Down
8 changes: 8 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ Enhancements
``nearest``. (:issue:`2695`)
By `Hauke Schulz <https://github.com/observingClouds>`_.

- :py:meth:`~xarray.DataArray.integrate` and
:py:meth:`~xarray.Dataset.integrate` are newly added.
See :ref:`_compute.using_coordinates` for the detail.
(:issue:`1332`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.


Bug fixes
~~~~~~~~~

Expand Down Expand Up @@ -115,6 +122,7 @@ Breaking changes
(:issue:`2565`). The previous behavior was to decode them only if they
had specific time attributes, now these attributes are copied
automatically from the corresponding time coordinate. This might
break downstream code that was relying on these variables to be
brake downstream code that was relying on these variables to be
not decoded.
By `Fabien Maussion <https://github.com/fmaussion>`_.
Expand Down
47 changes: 47 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2425,6 +2425,53 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
coord, edge_order, datetime_unit)
return self._from_temp_dataset(ds)

def integrate(self, dim, datetime_unit=None):
""" integrate the array with the trapezoidal rule.
.. note::
This feature is limited to simple cartesian geometry, i.e. coord
must be one dimensional.
Parameters
----------
dim: str, or a sequence of str
Coordinate(s) used for the integration.
datetime_unit
Can be specify the unit if datetime coordinate is used. One of
{'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs',
'as'}
Returns
-------
integrated: DataArray
See also
--------
numpy.trapz: corresponding numpy function
Examples
--------
>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
... coords={'x': [0, 0.1, 1.1, 1.2]})
>>> da
<xarray.DataArray (x: 4, y: 3)>
array([[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11]])
Coordinates:
* x (x) float64 0.0 0.1 1.1 1.2
Dimensions without coordinates: y
>>>
>>> da.integrate('x')
<xarray.DataArray (y: 3)>
array([5.4, 6.6, 7.8])
Dimensions without coordinates: y
"""
ds = self._to_temp_dataset().integrate(dim, datetime_unit)
return self._from_temp_dataset(ds)


# priority most be higher than Variable to properly work with binary ufuncs
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
72 changes: 72 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3869,6 +3869,78 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
variables[k] = v
return self._replace_vars_and_dims(variables)

def integrate(self, coord, datetime_unit=None):
""" integrate the array with the trapezoidal rule.
.. note::
This feature is limited to simple cartesian geometry, i.e. coord
must be one dimensional.
Parameters
----------
dim: str, or a sequence of str
Coordinate(s) used for the integration.
datetime_unit
Can be specify the unit if datetime coordinate is used. One of
{'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs',
'as'}
Returns
-------
integrated: Dataset
See also
--------
DataArray.integrate
numpy.trapz: corresponding numpy function
"""
if not isinstance(coord, (list, tuple)):
coord = (coord, )
result = self
for c in coord:
result = result._integrate_one(c, datetime_unit=datetime_unit)
return result

def _integrate_one(self, coord, datetime_unit=None):
from .variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError('Coordinate {} does not exist.'.format(dim))

coord_var = self[coord].variable
if coord_var.ndim != 1:
raise ValueError('Coordinate {} must be 1 dimensional but is {}'
' dimensional'.format(coord, coord_var.ndim))

dim = coord_var.dims[0]
if _contains_datetime_like_objects(coord_var):
if coord_var.dtype.kind in 'mM' and datetime_unit is None:
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = 's' # Default to seconds for cftime objects
coord_var = datetime_to_numeric(
coord_var, datetime_unit=datetime_unit)

variables = OrderedDict()
coord_names = set()
for k, v in self.variables.items():
if k in self.coords:
if dim not in v.dims:
variables[k] = v
coord_names.add(k)
else:
if k in self.data_vars and dim in v.dims:
if _contains_datetime_like_objects(v):
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
integ = duck_array_ops.trapz(
v.data, coord_var.data, axis=v.get_axis_num(dim))
v_dims = list(v.dims)
v_dims.remove(dim)
variables[k] = Variable(v_dims, integ)
else:
variables[k] = v
return self._replace_vars_and_dims(variables, coord_names=coord_names)

@property
def real(self):
return self._unary_op(lambda x: x.real,
Expand Down
12 changes: 12 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ def gradient(x, coord, axis, edge_order):
return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)


def trapz(y, x, axis):
if axis < 0:
axis = y.ndim + axis
x_sl1 = (slice(1, None), ) + (None, ) * (y.ndim - axis - 1)
x_sl2 = (slice(None, -1), ) + (None, ) * (y.ndim - axis - 1)
slice1 = (slice(None),) * axis + (slice(1, None), )
slice2 = (slice(None),) * axis + (slice(None, -1), )
dx = (x[x_sl1] - x[x_sl2])
integrand = dx * 0.5 * (y[tuple(slice1)] + y[tuple(slice2)])
return sum(integrand, axis=axis, skipna=False)


masked_invalid = _dask_or_eager_func(
'masked_invalid', eager_module=np.ma,
dask_module=getattr(dask_array, 'ma', None))
Expand Down
79 changes: 79 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4716,3 +4716,82 @@ def test_differentiate_cftime(dask):
# Test the differentiation of datetimes themselves
actual = da['time'].differentiate('time', edge_order=1, datetime_unit='D')
assert_allclose(actual, xr.ones_like(da['time']).astype(float))


@pytest.mark.parametrize('dask', [True, False])
def test_integrate(dask):
rs = np.random.RandomState(42)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]

da = xr.DataArray(rs.randn(8, 6), dims=['x', 'y'],
coords={'x': coord, 'x2': (('x', ), rs.randn(8)),
'z': 3, 'x2d': (('x', 'y'), rs.randn(8, 6))})
if dask and has_dask:
da = da.chunk({'x': 4})

ds = xr.Dataset({'var': da})

# along x
actual = da.integrate('x')
# coordinate that contains x should be dropped.
expected_x = xr.DataArray(
np.trapz(da, da['x'], axis=0), dims=['y'],
coords={k: v for k, v in da.coords.items() if 'x' not in v.dims})
assert_allclose(expected_x, actual.compute())
assert_equal(ds['var'].integrate('x'), ds.integrate('x')['var'])

# make sure result is also a dask array (if the source is dask array)
assert isinstance(actual.data, type(da.data))

# along y
actual = da.integrate('y')
expected_y = xr.DataArray(
np.trapz(da, da['y'], axis=1), dims=['x'],
coords={k: v for k, v in da.coords.items() if 'y' not in v.dims})
assert_allclose(expected_y, actual.compute())
assert_equal(actual, ds.integrate('y')['var'])
assert_equal(ds['var'].integrate('y'), ds.integrate('y')['var'])

# along x and y
actual = da.integrate(('y', 'x'))
assert actual.ndim == 0

with pytest.raises(ValueError):
da.integrate('x2d')


@pytest.mark.parametrize('dask', [True, False])
@pytest.mark.parametrize('which_datetime', ['np', 'cftime'])
def test_trapz_datetime(dask, which_datetime):
rs = np.random.RandomState(42)
if which_datetime == 'np':
coord = np.array(
['2004-07-13', '2006-01-13', '2010-08-13', '2010-09-13',
'2010-10-11', '2010-12-13', '2011-02-13', '2012-08-13'],
dtype='datetime64')
else:
if not has_cftime:
pytest.skip('Test requires cftime.')
coord = xr.cftime_range('2000', periods=8, freq='2D')

da = xr.DataArray(
rs.randn(8, 6),
coords={'time': coord, 'z': 3, 't2d': (('time', 'y'), rs.randn(8, 6))},
dims=['time', 'y'])

if dask and has_dask:
da = da.chunk({'time': 4})

actual = da.integrate('time', datetime_unit='D')
expected_data = np.trapz(
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0)
expected = xr.DataArray(
expected_data, dims=['y'],
coords={k: v for k, v in da.coords.items() if 'time' not in v.dims})
assert_allclose(expected, actual.compute())

# make sure result is also a dask array (if the source is dask array)
assert isinstance(actual.data, type(da.data))

actual2 = da.integrate('time', datetime_unit='h')
assert_allclose(actual, actual2 / 24.0)

0 comments on commit 4923039

Please sign in to comment.