diff --git a/doc/api.rst b/doc/api.rst index e1f70cfbdea..552582a553f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -152,6 +152,7 @@ Computation Dataset.diff Dataset.quantile Dataset.differentiate + Dataset.integrate **Aggregation**: :py:attr:`~Dataset.all` @@ -321,6 +322,7 @@ Computation DataArray.dot DataArray.quantile DataArray.differentiate + DataArray.integrate **Aggregation**: :py:attr:`~DataArray.all` diff --git a/doc/computation.rst b/doc/computation.rst index 412f24eee6a..2d41479f67f 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -240,6 +240,8 @@ function or method name to ``coord_func`` option, da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() +.. _compute.using_coordinates: + Computation using Coordinates ============================= @@ -261,9 +263,17 @@ This method can be used also for multidimensional arrays, coords={'x': [0.1, 0.11, 0.2, 0.3]}) a.differentiate('x') +:py:meth:`~xarray.DataArray.integrate` computes integration based on +trapezoidal rule using their coordinates, + +.. ipython:: python + + a.integrate('x') + .. note:: - This method is limited to simple cartesian geometry. Differentiation along - multidimensional coordinate is not supported. + These methods are limited to simple cartesian geometry. Differentiation + and integration along multidimensional coordinate are not supported. + .. _compute.broadcasting: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b50df2af10e..bed5d9088ae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,13 @@ Enhancements as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. +- :py:meth:`~xarray.DataArray.integrate` and + :py:meth:`~xarray.Dataset.integrate` are newly added. + See :ref:`_compute.using_coordinates` for the detail. + (:issue:`1332`) + By `Keisuke Fujii `_. + + Bug fixes ~~~~~~~~~ @@ -83,6 +90,7 @@ Breaking changes (:issue:`2565`). The previous behavior was to decode them only if they had specific time attributes, now these attributes are copied automatically from the corresponding time coordinate. This might + break downstream code that was relying on these variables to be brake downstream code that was relying on these variables to be not decoded. By `Fabien Maussion `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a63b63b45bf..31cd6930561 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2428,6 +2428,53 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): coord, edge_order, datetime_unit) return self._from_temp_dataset(ds) + def integrate(self, dim, datetime_unit=None): + """ integrate the array with the trapezoidal rule. + + .. note:: + This feature is limited to simple cartesian geometry, i.e. coord + must be one dimensional. + + Parameters + ---------- + dim: str, or a sequence of str + Coordinate(s) used for the integration. + datetime_unit + Can be specify the unit if datetime coordinate is used. One of + {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', + 'as'} + + Returns + ------- + integrated: DataArray + + See also + -------- + numpy.trapz: corresponding numpy function + + Examples + -------- + + >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], + ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da + + array([[ 0, 1, 2], + [ 3, 4, 5], + [ 6, 7, 8], + [ 9, 10, 11]]) + Coordinates: + * x (x) float64 0.0 0.1 1.1 1.2 + Dimensions without coordinates: y + >>> + >>> da.integrate('x') + + array([5.4, 6.6, 7.8]) + Dimensions without coordinates: y + """ + ds = self._to_temp_dataset().integrate(dim, datetime_unit) + return self._from_temp_dataset(ds) + # priority most be higher than Variable to properly work with binary ufuncs ops.inject_all_ops_and_reduce_methods(DataArray, priority=60) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 29178c9b13c..8086bee7a30 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3867,6 +3867,78 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): variables[k] = v return self._replace_vars_and_dims(variables) + def integrate(self, coord, datetime_unit=None): + """ integrate the array with the trapezoidal rule. + + .. note:: + This feature is limited to simple cartesian geometry, i.e. coord + must be one dimensional. + + Parameters + ---------- + dim: str, or a sequence of str + Coordinate(s) used for the integration. + datetime_unit + Can be specify the unit if datetime coordinate is used. One of + {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', + 'as'} + + Returns + ------- + integrated: Dataset + + See also + -------- + DataArray.integrate + numpy.trapz: corresponding numpy function + """ + if not isinstance(coord, (list, tuple)): + coord = (coord, ) + result = self + for c in coord: + result = result._integrate_one(c, datetime_unit=datetime_unit) + return result + + def _integrate_one(self, coord, datetime_unit=None): + from .variable import Variable + + if coord not in self.variables and coord not in self.dims: + raise ValueError('Coordinate {} does not exist.'.format(dim)) + + coord_var = self[coord].variable + if coord_var.ndim != 1: + raise ValueError('Coordinate {} must be 1 dimensional but is {}' + ' dimensional'.format(coord, coord_var.ndim)) + + dim = coord_var.dims[0] + if _contains_datetime_like_objects(coord_var): + if coord_var.dtype.kind in 'mM' and datetime_unit is None: + datetime_unit, _ = np.datetime_data(coord_var.dtype) + elif datetime_unit is None: + datetime_unit = 's' # Default to seconds for cftime objects + coord_var = datetime_to_numeric( + coord_var, datetime_unit=datetime_unit) + + variables = OrderedDict() + coord_names = set() + for k, v in self.variables.items(): + if k in self.coords: + if dim not in v.dims: + variables[k] = v + coord_names.add(k) + else: + if k in self.data_vars and dim in v.dims: + if _contains_datetime_like_objects(v): + v = datetime_to_numeric(v, datetime_unit=datetime_unit) + integ = duck_array_ops.trapz( + v.data, coord_var.data, axis=v.get_axis_num(dim)) + v_dims = list(v.dims) + v_dims.remove(dim) + variables[k] = Variable(v_dims, integ) + else: + variables[k] = v + return self._replace_vars_and_dims(variables, coord_names=coord_names) + @property def real(self): return self._unary_op(lambda x: x.real, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index b02eb4e899b..e556e96be57 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -101,6 +101,18 @@ def gradient(x, coord, axis, edge_order): return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order) +def trapz(y, x, axis): + if axis < 0: + axis = y.ndim + axis + x_sl1 = (slice(1, None), ) + (None, ) * (y.ndim - axis - 1) + x_sl2 = (slice(None, -1), ) + (None, ) * (y.ndim - axis - 1) + slice1 = (slice(None),) * axis + (slice(1, None), ) + slice2 = (slice(None),) * axis + (slice(None, -1), ) + dx = (x[x_sl1] - x[x_sl2]) + integrand = dx * 0.5 * (y[tuple(slice1)] + y[tuple(slice2)]) + return sum(integrand, axis=axis, skipna=False) + + masked_invalid = _dask_or_eager_func( 'masked_invalid', eager_module=np.ma, dask_module=getattr(dask_array, 'ma', None)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e7e091efa4c..0ab16914396 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4702,3 +4702,82 @@ def test_differentiate_cftime(dask): # Test the differentiation of datetimes themselves actual = da['time'].differentiate('time', edge_order=1, datetime_unit='D') assert_allclose(actual, xr.ones_like(da['time']).astype(float)) + + +@pytest.mark.parametrize('dask', [True, False]) +def test_integrate(dask): + rs = np.random.RandomState(42) + coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8] + + da = xr.DataArray(rs.randn(8, 6), dims=['x', 'y'], + coords={'x': coord, 'x2': (('x', ), rs.randn(8)), + 'z': 3, 'x2d': (('x', 'y'), rs.randn(8, 6))}) + if dask and has_dask: + da = da.chunk({'x': 4}) + + ds = xr.Dataset({'var': da}) + + # along x + actual = da.integrate('x') + # coordinate that contains x should be dropped. + expected_x = xr.DataArray( + np.trapz(da, da['x'], axis=0), dims=['y'], + coords={k: v for k, v in da.coords.items() if 'x' not in v.dims}) + assert_allclose(expected_x, actual.compute()) + assert_equal(ds['var'].integrate('x'), ds.integrate('x')['var']) + + # make sure result is also a dask array (if the source is dask array) + assert isinstance(actual.data, type(da.data)) + + # along y + actual = da.integrate('y') + expected_y = xr.DataArray( + np.trapz(da, da['y'], axis=1), dims=['x'], + coords={k: v for k, v in da.coords.items() if 'y' not in v.dims}) + assert_allclose(expected_y, actual.compute()) + assert_equal(actual, ds.integrate('y')['var']) + assert_equal(ds['var'].integrate('y'), ds.integrate('y')['var']) + + # along x and y + actual = da.integrate(('y', 'x')) + assert actual.ndim == 0 + + with pytest.raises(ValueError): + da.integrate('x2d') + + +@pytest.mark.parametrize('dask', [True, False]) +@pytest.mark.parametrize('which_datetime', ['np', 'cftime']) +def test_trapz_datetime(dask, which_datetime): + rs = np.random.RandomState(42) + if which_datetime == 'np': + coord = np.array( + ['2004-07-13', '2006-01-13', '2010-08-13', '2010-09-13', + '2010-10-11', '2010-12-13', '2011-02-13', '2012-08-13'], + dtype='datetime64') + else: + if not has_cftime: + pytest.skip('Test requires cftime.') + coord = xr.cftime_range('2000', periods=8, freq='2D') + + da = xr.DataArray( + rs.randn(8, 6), + coords={'time': coord, 'z': 3, 't2d': (('time', 'y'), rs.randn(8, 6))}, + dims=['time', 'y']) + + if dask and has_dask: + da = da.chunk({'time': 4}) + + actual = da.integrate('time', datetime_unit='D') + expected_data = np.trapz( + da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0) + expected = xr.DataArray( + expected_data, dims=['y'], + coords={k: v for k, v in da.coords.items() if 'time' not in v.dims}) + assert_allclose(expected, actual.compute()) + + # make sure result is also a dask array (if the source is dask array) + assert isinstance(actual.data, type(da.data)) + + actual2 = da.integrate('time', datetime_unit='h') + assert_allclose(actual, actual2 / 24.0)