From 1eefcdcdf0c18745b7858331af8e89bde8626b65 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 31 Jan 2020 17:10:01 +0000 Subject: [PATCH 01/54] Fix RTD build (#3737) * pin some requirements to reduce conda's memory usage * remove python section in readthedocs.yml --- ci/requirements/doc.yml | 21 +++++++++++---------- readthedocs.yml | 4 ---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index a8b72dc0956..16cce5782e0 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,20 +6,21 @@ dependencies: - python=3.7 - bottleneck - cartopy - - cfgrib - - h5netcdf + - cfgrib>=0.9 + - dask>=2.10 + - h5netcdf>=0.7.4 - ipykernel - ipython - - iris + - iris>=2.3 - jupyter_client - nbsphinx - - netcdf4 + - netcdf4>=1.5 - numba - - numpy + - numpy>=1.17 - numpydoc - - pandas - - rasterio + - pandas>=1.0 + - rasterio>=1.1 - seaborn - - sphinx - - sphinx_rtd_theme - - zarr + - sphinx>=2.3 + - sphinx_rtd_theme>=0.4 + - zarr>=2.4 diff --git a/readthedocs.yml b/readthedocs.yml index 9ed8d28eaf2..88aee82a44b 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -6,8 +6,4 @@ build: conda: environment: ci/requirements/doc.yml -python: - version: 3.7 - install: [] - formats: [] From 58b11a63732e3066ad38dc1e63a733f4cce6425f Mon Sep 17 00:00:00 2001 From: Graham Inggs Date: Sun, 23 Feb 2020 21:39:39 +0200 Subject: [PATCH 02/54] Let test_repr_of_dataset pass on big-endian systems (#3772) --- xarray/tests/test_formatting_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index fea24ff93f8..01357000b20 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -130,5 +130,5 @@ def test_repr_of_dataset(dataset): assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 ) - assert "<U4" in formatted + assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted From 45d88fc4b2524ecb0c1236cd31767d00f72b0ea1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 2 Mar 2020 04:41:19 -0500 Subject: [PATCH 03/54] Enable pandas-style rounding of cftime.datetime objects (#3792) * Initial progress on implementing cftime floor/ceil/round * Improve tests and docstrings * Add tests of rounding cftime datetimes via dt accessor * Add documentation * docstring edits * Test rounding raises error with non-fixed frequency * black * typo * A couple cleanup items: - Fix floating point issue in asi8 and add tests - Ensure dask only computes once when using the rounding accessors * black --- doc/weather-climate.rst | 8 ++ doc/whats-new.rst | 4 + xarray/coding/cftimeindex.py | 135 +++++++++++++++++++++++++++++++ xarray/core/accessor_dt.py | 28 ++++--- xarray/tests/test_accessor_dt.py | 104 ++++++++++++++++++++++++ xarray/tests/test_cftimeindex.py | 89 ++++++++++++++++++++ 6 files changed, 359 insertions(+), 9 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 96641c2b97e..9e7c0f1d51d 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -105,6 +105,14 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.dayofyear da.time.dt.dayofweek +- Rounding of datetimes to fixed frequencies via the ``dt`` accessor: + +.. ipython:: python + + da.time.dt.ceil('3D') + da.time.dt.floor('5D') + da.time.dt.round('2D') + - Group-by operations based on datetime accessor attributes (e.g. by month of the year): diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1deb77eecfc..579719cb8d7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added support for :py:class:`pandas.DatetimeIndex`-style rounding of + ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the + :py:class:`~core.accessor_dt.DatetimeAccessor`. + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8b440812ca9..99f90430e91 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -528,6 +528,83 @@ def strftime(self, date_format): """ return pd.Index([date.strftime(date_format) for date in self._data]) + @property + def asi8(self): + """Convert to integers with units of microseconds since 1970-01-01.""" + from ..core.resample_cftime import exact_cftime_datetime_difference + + epoch = self.date_type(1970, 1, 1) + return np.array( + [ + _total_microseconds(exact_cftime_datetime_difference(epoch, date)) + for date in self.values + ] + ) + + def _round_via_method(self, freq, method): + """Round dates using a specified method.""" + from .cftime_offsets import CFTIME_TICKS, to_offset + + offset = to_offset(freq) + if not isinstance(offset, CFTIME_TICKS): + raise ValueError(f"{offset} is a non-fixed frequency") + + unit = _total_microseconds(offset.as_timedelta()) + values = self.asi8 + rounded = method(values, unit) + return _cftimeindex_from_i8(rounded, self.date_type, self.name) + + def floor(self, freq): + """Round dates down to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _floor_int) + + def ceil(self, freq): + """Round dates up to fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _ceil_int) + + def round(self, freq): + """Round dates to a fixed frequency. + + Parameters + ---------- + freq : str or CFTimeOffset + The frequency level to round the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See `frequency + aliases `_ + for a list of possible values. + + Returns + ------- + CFTimeIndex + """ + return self._round_via_method(freq, _round_to_nearest_half_even) + def _parse_iso8601_without_reso(date_type, datetime_str): date, _ = _parse_iso8601_with_reso(date_type, datetime_str) @@ -554,3 +631,61 @@ def _parse_array_of_cftime_strings(strings, date_type): return np.array( [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] ).reshape(strings.shape) + + +def _cftimeindex_from_i8(values, date_type, name): + """Construct a CFTimeIndex from an array of integers. + + Parameters + ---------- + values : np.array + Integers representing microseconds since 1970-01-01. + date_type : cftime.datetime + Type of date for the index. + name : str + Name of the index. + + Returns + ------- + CFTimeIndex + """ + epoch = date_type(1970, 1, 1) + dates = np.array([epoch + timedelta(microseconds=int(value)) for value in values]) + return CFTimeIndex(dates, name=name) + + +def _total_microseconds(delta): + """Compute the total number of microseconds of a datetime.timedelta. + + Parameters + ---------- + delta : datetime.timedelta + Input timedelta. + + Returns + ------- + int + """ + return delta / timedelta(microseconds=1) + + +def _floor_int(values, unit): + """Copied from pandas.""" + return values - np.remainder(values, unit) + + +def _ceil_int(values, unit): + """Copied from pandas.""" + return values + np.remainder(-values, unit) + + +def _round_to_nearest_half_even(values, unit): + """Copied from pandas.""" + if unit % 2: + return _ceil_int(values - unit // 2, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c407371f9f0..de0e332b26c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -78,20 +78,27 @@ def _get_date_field(values, name, dtype): return access_method(values, name) -def _round_series(values, name, freq): - """Coerce an array of datetime-like values to a pandas Series and - apply requested rounding +def _round_through_series_or_index(values, name, freq): + """Coerce an array of datetime-like values to a pandas Series or xarray + CFTimeIndex and apply requested rounding """ - values_as_series = pd.Series(values.ravel()) - method = getattr(values_as_series.dt, name) + from ..coding.cftimeindex import CFTimeIndex + + if is_np_datetime_like(values.dtype): + values_as_series = pd.Series(values.ravel()) + method = getattr(values_as_series.dt, name) + else: + values_as_cftimeindex = CFTimeIndex(values.ravel()) + method = getattr(values_as_cftimeindex, name) + field_values = method(freq=freq).values return field_values.reshape(values.shape) def _round_field(values, name, freq): - """Indirectly access pandas rounding functions by wrapping data - as a Series and calling through `.dt` attribute. + """Indirectly access rounding functions by wrapping data + as a Series or CFTimeIndex Parameters ---------- @@ -110,9 +117,12 @@ def _round_field(values, name, freq): if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_round_series, values, name, freq=freq, dtype=np.datetime64) + dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") + return map_blocks( + _round_through_series_or_index, values, name, freq=freq, dtype=dtype + ) else: - return _round_series(values, name, freq) + return _round_through_series_or_index(values, name, freq) def _strftime_through_cftimeindex(values, date_format): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index f178720a6e1..1a8a2732eeb 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,6 +7,7 @@ from . import ( assert_array_equal, assert_equal, + assert_identical, raises_regex, requires_cftime, requires_dask, @@ -435,3 +436,106 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.fixture +def cftime_rounding_dataarray(cftime_date_type): + return xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 1), cftime_date_type(1, 1, 1, 15)], + [cftime_date_type(1, 1, 1, 23), cftime_date_type(1, 1, 2, 1)], + ] + ) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_floor_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 1, 0)], + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="floor", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.floor(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.floor(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_ceil_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 3, 0)], + ], + name="ceil", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.ceil(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.ceil(freq) + + assert_identical(result, expected) + + +@requires_cftime +@requires_dask +@pytest.mark.parametrize("use_dask", [False, True]) +def test_cftime_round_accessor(cftime_rounding_dataarray, cftime_date_type, use_dask): + import dask.array as da + + freq = "D" + expected = xr.DataArray( + [ + [cftime_date_type(1, 1, 1, 0), cftime_date_type(1, 1, 2, 0)], + [cftime_date_type(1, 1, 2, 0), cftime_date_type(1, 1, 2, 0)], + ], + name="round", + ) + + if use_dask: + chunks = {"dim_0": 1} + # Currently a compute is done to inspect a single value of the array + # if it is of object dtype to check if it is a cftime.datetime (if not + # we raise an error when using the dt accessor). + with raise_if_dask_computes(max_computes=1): + result = cftime_rounding_dataarray.chunk(chunks).dt.round(freq) + expected = expected.chunk(chunks) + assert isinstance(result.data, da.Array) + assert result.chunks == expected.chunks + else: + result = cftime_rounding_dataarray.dt.round(freq) + + assert_identical(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8025766529e..8d83b833ca3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -904,3 +904,92 @@ def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") mindex = pd.MultiIndex.from_arrays([index]) assert mindex.get_loc("2001-01") == slice(0, 30) + + +@requires_cftime +@pytest.mark.parametrize("freq", ["3663S", "33T", "2H"]) +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_against_datetimeindex(freq, method): + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777S") + expected = getattr(expected, method)(freq) + result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + result = getattr(result, method)(freq).to_datetimeindex() + assert result.equals(expected) + + +@requires_cftime +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_invalid_freq(method): + index = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777S") + with pytest.raises(ValueError, match="fixed"): + getattr(index, method)("MS") + + +@pytest.fixture +def rounding_index(date_type): + return xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 999512), + date_type(1, 1, 1, 3, 0, 1, 500001), + date_type(1, 1, 1, 7, 0, 6, 499999), + ] + ) + + +@requires_cftime +def test_ceil(rounding_index, date_type): + result = rounding_index.ceil("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 7, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_floor(rounding_index, date_type): + result = rounding_index.floor("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 1, 59, 59, 0), + date_type(1, 1, 1, 3, 0, 1, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_round(rounding_index, date_type): + result = rounding_index.round("S") + expected = xr.CFTimeIndex( + [ + date_type(1, 1, 1, 2, 0, 0, 0), + date_type(1, 1, 1, 3, 0, 2, 0), + date_type(1, 1, 1, 7, 0, 6, 0), + ] + ) + assert result.equals(expected) + + +@requires_cftime +def test_asi8(date_type): + index = xr.CFTimeIndex([date_type(1970, 1, 1), date_type(1970, 1, 2)]) + result = index.asi8 + expected = 1000000 * 86400 * np.array([0, 1]) + np.testing.assert_array_equal(result, expected) + + +@requires_cftime +def test_asi8_distant_date(): + """Test that asi8 conversion is truly exact.""" + import cftime + + date_type = cftime.DatetimeProlepticGregorian + index = xr.CFTimeIndex([date_type(10731, 4, 22, 3, 25, 45, 123456)]) + result = index.asi8 + expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456]) + np.testing.assert_array_equal(result, expected) From 8512b7bf498c0c300f146447c0b05545842e9404 Mon Sep 17 00:00:00 2001 From: niowniow Date: Mon, 2 Mar 2020 13:19:16 +0100 Subject: [PATCH 04/54] Fix zarr append with groups (#3610) * bug fixed and added zarr group tests * black . * added info to whats-new Co-authored-by: Ryan Abernathey --- doc/whats-new.rst | 2 ++ xarray/backends/zarr.py | 4 ++-- xarray/tests/test_backends.py | 35 ++++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 579719cb8d7..2cc92c78ac8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. +- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` + simultaneously. (:issue:`3170`). By `Matthias Meyer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 763769dac74..2469a31a3d9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -373,7 +373,7 @@ def store( if len(existing_variables) > 0: # there are variables to append # their encoding must be the same as in the store - ds = open_zarr(self.ds.store, chunks=None) + ds = open_zarr(self.ds.store, group=self.ds.path, chunks=None) variables_with_encoding = {} for vn in existing_variables: variables_with_encoding[vn] = variables[vn].copy(deep=False) @@ -487,7 +487,7 @@ def open_zarr( directory in file system where a Zarr DirectoryStore has been stored. synchronizer : object, optional Array synchronizer provided to zarr - group : str, obtional + group : str, optional Group path. (a.k.a. `path` in zarr terminology.) chunks : int or dict or tuple or {None, 'auto'}, optional Chunk sizes along each dimension, e.g., ``5`` or diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b7ba70ef6c4..015d2cbfdeb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1729,39 +1729,52 @@ def test_hidden_zarr_keys(self): pass @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") - def test_write_persistence_modes(self): + @pytest.mark.parametrize("group", [None, "group1"]) + def test_write_persistence_modes(self, group): original = create_test_data() # overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # don't overwrite mode - with self.roundtrip(original, save_kwargs={"mode": "w-"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "w-", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) # make sure overwriting works as expected with self.create_zarr_target() as store: self.save(original, store) # should overwrite with no error - self.save(original, store, mode="w") - with self.open(store) as actual: + self.save(original, store, mode="w", group=group) + with self.open(store, group=group) as actual: assert_identical(original, actual) with pytest.raises(ValueError): self.save(original, store, mode="w-") # check append mode for normal write - with self.roundtrip(original, save_kwargs={"mode": "a"}) as actual: + with self.roundtrip( + original, + save_kwargs={"mode": "a", "group": group}, + open_kwargs={"group": group}, + ) as actual: assert_identical(original, actual) - ds, ds_to_append, _ = create_append_test_data() - # check append mode for append write + ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") + ds.to_zarr(store_target, mode="w", group=group) + ds_to_append.to_zarr(store_target, append_dim="time", group=group) original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) + actual = xr.open_zarr(store_target, group=group) + assert_identical(original, actual) def test_compressor_encoding(self): original = create_test_data() From b155853ff6e17172b1b6b16c0da31522718e9409 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 2 Mar 2020 18:01:43 -0500 Subject: [PATCH 05/54] Turn on html repr by default (#3812) * Turn on html repr by default * Add By line to release docs * Change tests to expect html as the default display_style --- doc/whats-new.rst | 5 +++++ xarray/core/options.py | 2 +- xarray/tests/test_options.py | 22 ++++++++++++---------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cc92c78ac8..151ba917cce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ New Features often means a user is attempting to pass multiple dimensions to group over and should instead pass a list. By `Maximilian Roos `_ +- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced + in 0.14.1) is now on by default. To disable, use + ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/options.py b/xarray/core/options.py index 72f9ad8e1fa..15d05159d6d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -20,7 +20,7 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", - DISPLAY_STYLE: "text", + DISPLAY_STYLE: "html", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index f155acbf494..19f74476ced 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -68,12 +68,12 @@ def test_nested_options(): def test_display_style(): - original = "text" + original = "html" assert OPTIONS["display_style"] == original with pytest.raises(ValueError): xarray.set_options(display_style="invalid_str") - with xarray.set_options(display_style="html"): - assert OPTIONS["display_style"] == "html" + with xarray.set_options(display_style="text"): + assert OPTIONS["display_style"] == "text" assert OPTIONS["display_style"] == original @@ -177,10 +177,11 @@ def test_merge_attr_retention(self): def test_display_style_text(self): ds = create_test_dataset_attrs() - text = ds._repr_html_() - assert text.startswith("
")
-        assert "'nested'" in text
-        assert "<xarray.Dataset>" in text
+        with xarray.set_options(display_style="text"):
+            text = ds._repr_html_()
+            assert text.startswith("
")
+            assert "'nested'" in text
+            assert "<xarray.Dataset>" in text
 
     def test_display_style_html(self):
         ds = create_test_dataset_attrs()
@@ -191,9 +192,10 @@ def test_display_style_html(self):
 
     def test_display_dataarray_style_text(self):
         da = create_test_dataarray_attrs()
-        text = da._repr_html_()
-        assert text.startswith("
")
-        assert "<xarray.DataArray 'var1'" in text
+        with xarray.set_options(display_style="text"):
+            text = da._repr_html_()
+            assert text.startswith("
")
+            assert "<xarray.DataArray 'var1'" in text
 
     def test_display_dataarray_style_html(self):
         da = create_test_dataarray_attrs()

From 1c5e1cd022a0ff91275c50a50d1c6f88a7abff7d Mon Sep 17 00:00:00 2001
From: Andrew Thomas 
Date: Mon, 2 Mar 2020 18:02:55 -0500
Subject: [PATCH 06/54] Coarsen keep attrs 3376 (#3801)

* Add test of DataWithCoords.coarsen() for #3376

* Add test of Variable.coarsen() for #3376

* Add keep_attrs kwarg to DataWithCoords.coarsen() for #3376

* Style and spelling fixes (#3376)

* Fix test_coarsen_keep_attrs by removing self from input

* Pass keep_attrs through to _coarsen_cls and _rolling_cls returns (#3376)

* Move keyword from coarsen to mean in test_coarsen_keep_attrs

* Start handling keep_attrs in rolling class constructors (#3376)

* Update Coarsen constructor and DatasetCoarsen class method (GH3376)

Assign keep_attrs keyword value to Coarsen objects in constructor
Add conditional inside _reduce_method.wrapped_func branching on self.keep_attrs and pass back to returned Dataset

* Incorporate code review from @max-sixty

* Fix Dataset.coarsen and Variable.coarsen for GH3376

Handle global keep_attrs setting inside Variable._coarsen_reshape

Pass attrs through consistently inside DatasetCoarsen._reduce_method

Don't pass Variable.coarsen a keyword argument it doesn't expect inside DataArrayCoarsen._reduce_method

* Update tests for GH3376

* Incorporate review changes to test_dataset for GH3376

Remove commented-out test from test_coarsen_keep_attrs

Add test_rolling_keep_attrs

* Change Rolling._dataset_implementation for GH3376

Return a Dataset object that results in test_rolling_keep_attrs Passing

* style fixes

* Remove duplicate variable assignment and document change (GH3776)
---
 doc/whats-new.rst             |  5 +++
 xarray/core/common.py         | 29 +++++++++++++--
 xarray/core/rolling.py        | 67 ++++++++++++++++++++++++++++-------
 xarray/core/variable.py       |  3 ++
 xarray/tests/test_dataset.py  | 56 +++++++++++++++++++++++++++++
 xarray/tests/test_variable.py | 22 +++++++++++-
 6 files changed, 165 insertions(+), 17 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 151ba917cce..089cbbe1be3 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -61,6 +61,11 @@ Bug fixes
 - xarray now respects the over, under and bad colors if set on a provided colormap.
   (:issue:`3590`, :pull:`3601`)
   By `johnomotani `_.
+- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)``
+  to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword
+  argument ``keep_attrs`` to change this setting. (:issue:`3376`,
+  :pull:`3801`) By `Andrew Thomas `_.
+  
 - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group`
   simultaneously. (:issue:`3170`). By `Matthias Meyer `_.
 
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 582ae310061..e3739d6d039 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -753,6 +753,7 @@ def rolling(
         dim: Mapping[Hashable, int] = None,
         min_periods: int = None,
         center: bool = False,
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -769,6 +770,10 @@ def rolling(
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
         **window_kwargs : optional
             The keyword arguments form of ``dim``.
             One of dim or window_kwargs must be provided.
@@ -810,8 +815,13 @@ def rolling(
         core.rolling.DataArrayRolling
         core.rolling.DatasetRolling
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "rolling")
-        return self._rolling_cls(self, dim, min_periods=min_periods, center=center)
+        return self._rolling_cls(
+            self, dim, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
     def rolling_exp(
         self,
@@ -859,6 +869,7 @@ def coarsen(
         boundary: str = "exact",
         side: Union[str, Mapping[Hashable, str]] = "left",
         coord_func: str = "mean",
+        keep_attrs: bool = None,
         **window_kwargs: int,
     ):
         """
@@ -879,8 +890,12 @@ def coarsen(
             multiple of the window size. If 'trim', the excess entries are
             dropped. If 'pad', NA will be padded.
         side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
-        coord_func : function (name) that is applied to the coordintes,
+        coord_func : function (name) that is applied to the coordinates,
             or a mapping from coordinate name to function (name).
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -915,9 +930,17 @@ def coarsen(
         core.rolling.DataArrayCoarsen
         core.rolling.DatasetCoarsen
         """
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+
         dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen")
         return self._coarsen_cls(
-            self, dim, boundary=boundary, side=side, coord_func=coord_func
+            self,
+            dim,
+            boundary=boundary,
+            side=side,
+            coord_func=coord_func,
+            keep_attrs=keep_attrs,
         )
 
     def resample(
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index ea6d72b2e03..61178cfb15f 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -7,6 +7,7 @@
 from . import dtypes, duck_array_ops, utils
 from .dask_array_ops import dask_rolling_wrapper
 from .ops import inject_reduce_methods
+from .options import _get_keep_attrs
 from .pycompat import dask_array_type
 
 try:
@@ -42,10 +43,10 @@ class Rolling:
     DataArray.rolling
     """
 
-    __slots__ = ("obj", "window", "min_periods", "center", "dim")
-    _attributes = ("window", "min_periods", "center", "dim")
+    __slots__ = ("obj", "window", "min_periods", "center", "dim", "keep_attrs")
+    _attributes = ("window", "min_periods", "center", "dim", "keep_attrs")
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object.
 
@@ -65,6 +66,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -89,6 +94,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         self.center = center
         self.dim = dim
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        self.keep_attrs = keep_attrs
+
     @property
     def _min_periods(self):
         return self.min_periods if self.min_periods is not None else self.window
@@ -143,7 +152,7 @@ def count(self):
 class DataArrayRolling(Rolling):
     __slots__ = ("window_labels",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for DataArray.
         You should use DataArray.rolling() method to construct this object
@@ -165,6 +174,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -177,7 +190,11 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.rolling
         Dataset.groupby
         """
-        super().__init__(obj, windows, min_periods=min_periods, center=center)
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        super().__init__(
+            obj, windows, min_periods=min_periods, center=center, keep_attrs=keep_attrs
+        )
 
         self.window_labels = self.obj[self.dim]
 
@@ -374,7 +391,7 @@ def _numpy_or_bottleneck_reduce(
 class DatasetRolling(Rolling):
     __slots__ = ("rollings",)
 
-    def __init__(self, obj, windows, min_periods=None, center=False):
+    def __init__(self, obj, windows, min_periods=None, center=False, keep_attrs=None):
         """
         Moving window object for Dataset.
         You should use Dataset.rolling() method to construct this object
@@ -396,6 +413,10 @@ def __init__(self, obj, windows, min_periods=None, center=False):
             setting min_periods equal to the size of the window.
         center : boolean, default False
             Set the labels at the center of the window.
+        keep_attrs : bool, optional
+            If True, the object's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -408,7 +429,7 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         Dataset.groupby
         DataArray.groupby
         """
-        super().__init__(obj, windows, min_periods, center)
+        super().__init__(obj, windows, min_periods, center, keep_attrs)
         if self.dim not in self.obj.dims:
             raise KeyError(self.dim)
         # Keep each Rolling object as a dictionary
@@ -416,7 +437,9 @@ def __init__(self, obj, windows, min_periods=None, center=False):
         for key, da in self.obj.data_vars.items():
             # keeps rollings only for the dataset depending on slf.dim
             if self.dim in da.dims:
-                self.rollings[key] = DataArrayRolling(da, windows, min_periods, center)
+                self.rollings[key] = DataArrayRolling(
+                    da, windows, min_periods, center, keep_attrs
+                )
 
     def _dataset_implementation(self, func, **kwargs):
         from .dataset import Dataset
@@ -427,7 +450,8 @@ def _dataset_implementation(self, func, **kwargs):
                 reduced[key] = func(self.rollings[key], **kwargs)
             else:
                 reduced[key] = self.obj[key]
-        return Dataset(reduced, coords=self.obj.coords)
+        attrs = self.obj.attrs if self.keep_attrs else {}
+        return Dataset(reduced, coords=self.obj.coords, attrs=attrs)
 
     def reduce(self, func, **kwargs):
         """Reduce the items in this group by applying `func` along some
@@ -466,7 +490,7 @@ def _numpy_or_bottleneck_reduce(
             **kwargs,
         )
 
-    def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
+    def construct(self, window_dim, stride=1, fill_value=dtypes.NA, keep_attrs=None):
         """
         Convert this rolling object to xr.Dataset,
         where the window dimension is stacked as a new dimension
@@ -487,6 +511,9 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA):
 
         from .dataset import Dataset
 
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=True)
+
         dataset = {}
         for key, da in self.obj.data_vars.items():
             if self.dim in da.dims:
@@ -509,10 +536,18 @@ class Coarsen:
     DataArray.coarsen
     """
 
-    __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
+    __slots__ = (
+        "obj",
+        "boundary",
+        "coord_func",
+        "windows",
+        "side",
+        "trim_excess",
+        "keep_attrs",
+    )
     _attributes = ("windows", "side", "trim_excess")
 
-    def __init__(self, obj, windows, boundary, side, coord_func):
+    def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs):
         """
         Moving window object.
 
@@ -541,6 +576,7 @@ def __init__(self, obj, windows, boundary, side, coord_func):
         self.windows = windows
         self.side = side
         self.boundary = boundary
+        self.keep_attrs = keep_attrs
 
         absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
         if absent_dims:
@@ -626,6 +662,11 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         def wrapped_func(self, **kwargs):
             from .dataset import Dataset
 
+            if self.keep_attrs:
+                attrs = self.obj.attrs
+            else:
+                attrs = {}
+
             reduced = {}
             for key, da in self.obj.data_vars.items():
                 reduced[key] = da.variable.coarsen(
@@ -644,7 +685,7 @@ def wrapped_func(self, **kwargs):
                     )
                 else:
                     coords[c] = v.variable
-            return Dataset(reduced, coords=coords)
+            return Dataset(reduced, coords=coords, attrs=attrs)
 
         return wrapped_func
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index daa8678157b..62f9fde6a2e 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1949,6 +1949,9 @@ def _coarsen_reshape(self, windows, boundary, side):
             else:
                 shape.append(variable.shape[i])
 
+        keep_attrs = _get_keep_attrs(default=False)
+        variable.attrs = variable._attrs if keep_attrs else {}
+
         return variable.data.reshape(shape), tuple(axes)
 
     @property
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 5e254c37e44..7bcf9379ae8 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5664,6 +5664,62 @@ def test_coarsen_coords_cftime():
     np.testing.assert_array_equal(actual.time, expected_times)
 
 
+def test_coarsen_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.coarsen(coord=5, keep_attrs=True).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.coarsen(coord=5).mean()
+    assert dat.attrs == _attrs
+
+
+def test_rolling_keep_attrs():
+    _attrs = {"units": "test", "long_name": "testing"}
+
+    var1 = np.linspace(10, 15, 100)
+    var2 = np.linspace(5, 10, 100)
+    coords = np.linspace(1, 10, 100)
+
+    ds = Dataset(
+        data_vars={"var1": ("coord", var1), "var2": ("coord", var2)},
+        coords={"coord": coords},
+        attrs=_attrs,
+    )
+
+    # Test dropped attrs
+    dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == {}
+
+    # Test kept attrs using dataset keyword
+    dat = ds.rolling(
+        dim={"coord": 5}, min_periods=None, center=False, keep_attrs=True
+    ).mean()
+    assert dat.attrs == _attrs
+
+    # Test kept attrs using global option
+    with set_options(keep_attrs=True):
+        dat = ds.rolling(dim={"coord": 5}, min_periods=None, center=False).mean()
+    assert dat.attrs == _attrs
+
+
 def test_rolling_properties(ds):
     # catching invalid args
     with pytest.raises(ValueError, match="exactly one dim/window should"):
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 62fde920b1e..c86ecd0121f 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -9,7 +9,7 @@
 import pytz
 
 from xarray import Coordinate, Dataset, IndexVariable, Variable, set_options
-from xarray.core import dtypes, indexing
+from xarray.core import dtypes, duck_array_ops, indexing
 from xarray.core.common import full_like, ones_like, zeros_like
 from xarray.core.indexing import (
     BasicIndexer,
@@ -1879,6 +1879,26 @@ def test_coarsen_2d(self):
         expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
         assert_equal(actual, expected)
 
+    # perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
+    def test_coarsen_keep_attrs(self, operation="mean"):
+        _attrs = {"units": "test", "long_name": "testing"}
+
+        test_func = getattr(duck_array_ops, operation, None)
+
+        # Test dropped attrs
+        with set_options(keep_attrs=False):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == {}
+
+        # Test kept attrs
+        with set_options(keep_attrs=True):
+            new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
+                windows={"coord": 1}, func=test_func, boundary="exact", side="left"
+            )
+        assert new.attrs == _attrs
+
 
 @requires_dask
 class TestVariableWithDask(VariableSubclassobjects):

From a333a5c73db078fa34324475f9d74d71d74d4659 Mon Sep 17 00:00:00 2001
From: Sander 
Date: Tue, 3 Mar 2020 01:38:04 +0100
Subject: [PATCH 07/54] =?UTF-8?q?removed=20mention=20that=20'dims'=20are?=
 =?UTF-8?q?=20inferred=20from=20'coords'-dict=20when=20omit=E2=80=A6=20(#3?=
 =?UTF-8?q?821)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* removed mention that 'dims' are inferred from 'coords'-dict when omitted in DataArray (fixes #3820)

* added summary of PR #3821 to whats-new
---
 doc/whats-new.rst        | 3 +++
 xarray/core/dataarray.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 089cbbe1be3..4a6083522ba 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -71,6 +71,9 @@ Bug fixes
 
 Documentation
 ~~~~~~~~~~~~~
+- Fix documentation of :py:class:`DataArray` removing the deprecated mention
+  that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`)
+  By `Sander van Rijn `_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 062cc6342df..b1da0ca1448 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -304,8 +304,7 @@ def __init__(
             Name(s) of the data dimension(s). Must be either a hashable (only
             for 1D data) or a sequence of hashables with length equal to the
             number of dimensions. If this argument is omitted, dimension names
-            are taken from ``coords`` (if possible) and otherwise default to
-            ``['dim_0', ... 'dim_n']``.
+            default to ``['dim_0', ... 'dim_n']``.
         name : str or None, optional
             Name of this array.
         attrs : dict_like or None, optional

From 01462d65c7213e5e1cddf36492c6a34a7e53ce55 Mon Sep 17 00:00:00 2001
From: dcherian 
Date: Wed, 4 Mar 2020 07:05:14 +0530
Subject: [PATCH 08/54] Use stable RTD image.

---
 readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index 88aee82a44b..173d61ec6f3 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,7 +1,7 @@
 version: 2
 
 build:
-    image: latest
+    image: stable
 
 conda:
     environment: ci/requirements/doc.yml

From b2f06cb9d36a2520fa4f3aee6c38cae9972e702e Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:11 +0530
Subject: [PATCH 09/54] DOC: Add rioxarray and other external examples (#3757)

* DOC: Add rioxarray link to examples and add example in file IO

* Add more external examples.

* fix spacing for ipython docs

* minor fixes

* fix bad edit

Co-authored-by: Deepak Cherian 
---
 doc/examples.rst |  9 +++++++++
 doc/io.rst       | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doc/examples.rst b/doc/examples.rst
index 3067ca824be..805395808e0 100644
--- a/doc/examples.rst
+++ b/doc/examples.rst
@@ -17,3 +17,12 @@ Using apply_ufunc
     :maxdepth: 2
 
     examples/apply_ufunc_vectorize_1d
+
+External Examples
+-----------------
+.. toctree::
+    :maxdepth: 2
+
+    Managing raster data with rioxarray 
+    Xarray with dask 
+    Xarray and dask on the cloud with Pangeo 
diff --git a/doc/io.rst b/doc/io.rst
index e910943236f..6064aa3568a 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -759,9 +759,53 @@ for an example of how to convert these to longitudes and latitudes.
     considered as being experimental. Please report any bug you may find
     on xarray's github repository.
 
+
+Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other
+GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF.
+`rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping.
+
+.. ipython::
+    :verbatim:
+
+    In [1]: import rioxarray
+
+    In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif')
+
+    In [3]: rds
+    Out[3]:
+    
+    [1703814 values with dtype=uint8]
+    Coordinates:
+      * band         (band) int64 1 2 3
+      * y            (y) float64 2.827e+06 2.826e+06 ... 2.612e+06 2.612e+06
+      * x            (x) float64 1.021e+05 1.024e+05 ... 3.389e+05 3.392e+05
+        spatial_ref  int64 0
+    Attributes:
+        STATISTICS_MAXIMUM:  255
+        STATISTICS_MEAN:     29.947726688477
+        STATISTICS_MINIMUM:  0
+        STATISTICS_STDDEV:   52.340921626611
+        transform:           (300.0379266750948, 0.0, 101985.0, 0.0, -300.0417827...
+        _FillValue:          0.0
+        scale_factor:        1.0
+        add_offset:          0.0
+        grid_mapping:        spatial_ref
+
+    In [4]: rds.rio.crs
+    Out[4]: CRS.from_epsg(32618)
+
+    In [5]: rds4326 = rio.rio.reproject("epsg:4326")
+
+    In [6]: rds4326.rio.crs
+    Out[6]: CRS.from_epsg(4326)
+
+    In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif')
+
+
 .. _rasterio: https://rasterio.readthedocs.io/en/latest/
+.. _rioxarray: https://corteva.github.io/rioxarray/stable/
 .. _test files: https://github.com/mapbox/rasterio/blob/master/tests/data/RGB.byte.tif
-.. _pyproj: https://github.com/jswhit/pyproj
+.. _pyproj: https://github.com/pyproj4/pyproj
 
 .. _io.zarr:
 

From 8fb47f282555fd1430b9621abedbed82cdac7d4a Mon Sep 17 00:00:00 2001
From: Deepak Cherian 
Date: Thu, 5 Mar 2020 18:26:54 +0530
Subject: [PATCH 10/54] Add note on diff's n differing from pandas (#3822)

* note that n != periods in diff docstring

* better wording based on feedback
---
 xarray/core/dataarray.py | 6 ++++++
 xarray/core/dataset.py   | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b1da0ca1448..4e80ef222c2 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -2692,6 +2692,12 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
+
         Examples
         --------
         >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x'])
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 7252dd2f3df..52940e98b27 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -4879,6 +4879,11 @@ def diff(self, dim, n=1, label="upper"):
         difference : same type as caller
             The n-th order finite difference of this object.
 
+        .. note::
+
+            `n` matches numpy's behavior and is different from pandas' first
+            argument named `periods`.
+
         Examples
         --------
         >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])})

From 69723ebf34cb9c37917b44b2ac1ab92ae553fecc Mon Sep 17 00:00:00 2001
From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Date: Thu, 5 Mar 2020 22:36:07 -0500
Subject: [PATCH 11/54] Label "Installed Versions" item in Issue template
 (#3832)

* Label Installed Versions details in GH Issue template

* Update bug_report.md
---
 .github/ISSUE_TEMPLATE/bug_report.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index df5b2304bc3..83c3aea53a8 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -25,8 +25,9 @@ assignees: ''
 
 
 
-#### Output of ``xr.show_versions()``
-
+#### Versions + +
Output of `xr.show_versions()`
From 00e5b367c483656c67c63c47a2a9e07112bbc885 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 08:57:17 -0500 Subject: [PATCH 12/54] update macos image (#3838) * update macos image * whatsnew --- azure-pipelines.yml | 2 +- doc/whats-new.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5789161c966..ce95fca1ba1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -38,7 +38,7 @@ jobs: py38: conda_env: py38 pool: - vmImage: 'macOS-10.13' + vmImage: 'macOS-10.15' steps: - template: ci/azure/unit-tests.yml diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4a6083522ba..99ee66fad67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Updated Azure CI MacOS image, given pending removal. + By `Maximilian Roos `_ .. _whats-new.0.15.0: From 9fbb4170c1732fe2f3cd57b2b96d770a5bac50ed Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 6 Mar 2020 23:38:11 -0500 Subject: [PATCH 13/54] Allow `where` to receive a callable (#3827) * allow where to receive a callable * Update xarray/core/common.py Co-Authored-By: keewis * docstring * whatsnew Co-authored-by: keewis --- doc/whats-new.rst | 4 +++- xarray/core/common.py | 22 ++++++++++++++++++++++ xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 9 +++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 99ee66fad67..24120270444 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,7 +43,9 @@ New Features in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. - +- :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a + first argument, which is then called on the input; replicating pandas' behavior. + By `Maximilian Roos `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index e3739d6d039..c80cb24c5b5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1119,6 +1119,15 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): >>> import numpy as np >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a + + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24]]) + Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4) array([[ 0., 1., 2., 3., nan], @@ -1127,6 +1136,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan, nan], [ nan, nan, nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 5, -1) array([[ 0, 1, 2, 3, 4], @@ -1135,6 +1145,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15, 16, -1, -1, -1], [20, -1, -1, -1, -1]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4, drop=True) array([[ 0., 1., 2., 3.], @@ -1143,6 +1154,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [ 15., nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(lambda x: x.x + x.y < 4, drop=True) + + array([[ 0., 1., 2., 3.], + [ 5., 6., 7., nan], + [ 10., 11., nan, nan], + [ 15., nan, nan, nan]]) + Dimensions without coordinates: x, y + See also -------- numpy.where : corresponding numpy function @@ -1152,6 +1171,9 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): from .dataarray import DataArray from .dataset import Dataset + if callable(cond): + cond = cond(self) + if drop: if other is not dtypes.NA: raise ValueError("cannot set `other` if drop=True") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0a622d279ba..b8a9c5edaf9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2215,6 +2215,12 @@ def test_where(self): actual = arr.where(arr.x < 2, drop=True) assert_identical(actual, expected) + def test_where_lambda(self): + arr = DataArray(np.arange(4), dims="y") + expected = arr.sel(y=slice(2)) + actual = arr.where(lambda x: x.y < 2, drop=True) + assert_identical(actual, expected) + def test_where_string(self): array = DataArray(["a", "b"]) expected = DataArray(np.array(["a", np.nan], dtype=object)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7bcf9379ae8..44ffafb23b1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4349,6 +4349,12 @@ def test_where(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs + # lambda + ds = Dataset({"a": ("x", range(5))}) + expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) + actual = ds.where(lambda x: x > 1) + assert_identical(expected, actual) + def test_where_other(self): ds = Dataset({"a": ("x", range(5))}, {"x": range(5)}) expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)}) @@ -4356,6 +4362,9 @@ def test_where_other(self): assert_equal(expected, actual) assert actual.a.dtype == int + actual = ds.where(lambda x: x > 1, -1) + assert_equal(expected, actual) + with raises_regex(ValueError, "cannot set"): ds.where(ds > 1, other=0, drop=True) From cdaac64fa528222d947bbc821ac6c919f7fa7fa8 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Sun, 8 Mar 2020 18:42:43 +0100 Subject: [PATCH 14/54] Implement skipna kwarg in xr.quantile (#3844) * quick fix, no docs, no tests * added tests * docstrings * added whatsnew * Update doc/whats-new.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Update doc/whats-new.rst Co-Authored-By: keewis Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis --- doc/whats-new.rst | 7 ++++++- xarray/core/dataarray.py | 11 +++++++++-- xarray/core/dataset.py | 13 +++++++++++-- xarray/core/groupby.py | 9 +++++++-- xarray/core/variable.py | 8 ++++++-- xarray/tests/test_dataarray.py | 8 +++++--- xarray/tests/test_dataset.py | 24 ++++++++++++++++++++---- xarray/tests/test_variable.py | 8 +++++--- 8 files changed, 69 insertions(+), 19 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24120270444..2c30db99bcd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,7 +45,12 @@ New Features By `Julia Signell `_. - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. - By `Maximilian Roos `_ + By `Maximilian Roos `_. +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e80ef222c2..7fcb42bf9d2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2939,6 +2939,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -2966,6 +2967,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2978,7 +2981,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3015,7 +3018,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 52940e98b27..f286236dd45 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5171,6 +5177,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5183,7 +5191,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- @@ -5258,6 +5266,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f2a9ebac6eb..4223d9dc255 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 62f9fde6a2e..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1725,6 +1727,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1743,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b8a9c5edaf9..33f1b403eb8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2368,13 +2368,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 44ffafb23b1..d2e8c6b7609 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4697,12 +4697,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4710,12 +4711,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c86ecd0121f..c600f7a77d0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1511,14 +1511,16 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): + def test_quantile(self, q, axis, dim, skipna): v = Variable(["x", "y"], self.d) - actual = v.quantile(q, dim=dim) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + actual = v.quantile(q, dim=dim, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask From 603b0ad3f8a02a9e1180eb8dfc72f7f885f0e19a Mon Sep 17 00:00:00 2001 From: Mirko Panighel <30869713+mpanighel@users.noreply.github.com> Date: Sun, 8 Mar 2020 18:43:36 +0100 Subject: [PATCH 15/54] Add nxarray to related-projects.rst (#3848) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 3188751366f..edee80b72b8 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -61,6 +61,7 @@ Extend xarray capabilities - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions - `eofs `_: EOF analysis in Python. - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. +- `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. From 203c3f4ee1b4220b3fa3a073b5412fb7bd72525b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 9 Mar 2020 03:11:55 -0400 Subject: [PATCH 16/54] remove panel conversion (#3845) --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 10 ++-------- xarray/tests/test_dataarray.py | 8 ++------ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c30db99bcd..ed94b84feea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,9 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas + in favor of xarray's objects. + By `Maximilian Roos `_ .. _whats-new.0.15.0: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7fcb42bf9d2..7a95aedc2f7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2243,20 +2243,14 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: * 0D -> `xarray.DataArray` * 1D -> `pandas.Series` * 2D -> `pandas.DataFrame` - * 3D -> `pandas.Panel` *(deprecated)* - Only works for arrays with 3 or fewer dimensions. + Only works for arrays with 2 or fewer dimensions. The DataArray constructor performs the inverse transformation. """ # TODO: consolidate the info about pandas constructors and the # attributes that correspond to their indexes into a separate module? - constructors = { - 0: lambda x: x, - 1: pd.Series, - 2: pd.DataFrame, - 3: pdcompat.Panel, - } + constructors = {0: lambda x: x, 1: pd.Series, 2: pd.DataFrame} try: constructor = constructors[self.ndim] except KeyError: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 33f1b403eb8..dfaf8fd4e28 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3411,14 +3411,10 @@ def test_to_pandas(self): assert_array_equal(actual.columns, [0, 1]) # roundtrips - for shape in [(3,), (3, 4), (3, 4, 5)]: - if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0": - continue + for shape in [(3,), (3, 4)]: dims = list("abc")[: len(shape)] da = DataArray(np.random.randn(*shape), dims=dims) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop_vars(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): From f4ebbfef8f317205fba9edecadaac843dfa131f7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 08:18:06 +0100 Subject: [PATCH 17/54] un-xfail tests that append to netCDF files with scipy (#3805) * remove ScipyWriteBase class * add whats new Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/tests/test_backends.py | 24 ++++-------------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ed94b84feea..bc0e5092d5b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -95,6 +95,8 @@ Internal Changes By `Bruno Pagani `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ +- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). + By `Mathias Hauser `_. - Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 015d2cbfdeb..59ed8e690cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1979,24 +1979,8 @@ def create_zarr_target(self): yield tmp -class ScipyWriteBase(CFEncodedBase, NetCDF3Only): - def test_append_write(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_write() - - def test_append_overwrite_values(self): - import scipy - - if scipy.__version__ == "1.0.1": - pytest.xfail("https://github.com/scipy/scipy/issues/8625") - super().test_append_overwrite_values() - - @requires_scipy -class TestScipyInMemoryData(ScipyWriteBase): +class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2017,7 +2001,7 @@ def test_bytes_pickle(self): @requires_scipy -class TestScipyFileObject(ScipyWriteBase): +class TestScipyFileObject(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -2050,7 +2034,7 @@ def test_pickle_dataarray(self): @requires_scipy -class TestScipyFilePath(ScipyWriteBase): +class TestScipyFilePath(CFEncodedBase, NetCDF3Only): engine = "scipy" @contextlib.contextmanager @@ -3317,7 +3301,7 @@ def test_session(self): @requires_scipy @requires_pynio -class TestPyNio(ScipyWriteBase): +class TestPyNio(CFEncodedBase, NetCDF3Only): def test_write_store(self): # pynio is read-only for now pass From 9f97c4384f6456a5582f2bf7277c90be110fce92 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 9 Mar 2020 08:40:45 +0100 Subject: [PATCH 18/54] Pint support for top-level functions (#3611) * get the align tests to pass * add pint to the upstream-dev ci job * special case for booleans * silence the pint behaviour change warning * preprocess the unit mapping parameter to convert_units * use assert_allclose and assert_identical instead * clean up a few tests * remove some xfails * use the unit registry's quantity class * explain the catch_warnings block * don't use the function wrapper class if we don't need arguments * whats-new.rst * require the new pint version * use functools.partial instead of function * remove the convert_from parameter of array_attach_units * make sure every top-level function test uses assert_units_equal * hide the traceback of the unit comparison function * considerably simplify the merge_dataarray test * simplify the merge_dataset test --- ci/requirements/py36-min-nep18.yml | 2 +- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 356 ++++++++++++++++------------- 3 files changed, 203 insertions(+), 157 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index c10fdf67dc4..a5eded49cd4 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.9 # Actually not enough as it doesn't implement __array_function__yet! + - pint=0.11 - pip - pytest - pytest-cov diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bc0e5092d5b..00c63b81260 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,7 @@ New Features - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- implement pint support. (:issue:`3594`, :pull:`3706`) +- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9f63ebb1d42..bef3af62d74 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1,3 +1,4 @@ +import functools import operator from distutils.version import LooseVersion @@ -8,6 +9,7 @@ import xarray as xr from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE +from xarray.testing import assert_allclose, assert_identical from .test_variable import VariableSubclassobjects @@ -70,53 +72,17 @@ def array_strip_units(array): return array -def array_attach_units(data, unit, convert_from=None): - try: - unit, convert_from = unit - except TypeError: - pass - +def array_attach_units(data, unit): if isinstance(data, Quantity): - if not convert_from: - raise ValueError( - "cannot attach unit {unit} to quantity ({data.units})".format( - unit=unit, data=data - ) - ) - elif isinstance(convert_from, unit_registry.Unit): - data = data.magnitude - elif convert_from is True: # intentionally accept exactly true - if data.check(unit): - convert_from = data.units - data = data.magnitude - else: - raise ValueError( - "cannot convert quantity ({data.units}) to {unit}".format( - unit=unit, data=data - ) - ) - else: - raise ValueError( - "cannot convert from invalid unit {convert_from}".format( - convert_from=convert_from - ) - ) + raise ValueError(f"cannot attach unit {unit} to quantity {data}") - # to make sure we also encounter the case of "equal if converted" - if convert_from is not None: - quantity = (data * convert_from).to( - unit - if isinstance(unit, unit_registry.Unit) - else unit_registry.dimensionless - ) - else: - try: - quantity = data * unit - except np.core._exceptions.UFuncTypeError: - if unit != 1: - raise + try: + quantity = data * unit + except np.core._exceptions.UFuncTypeError: + if isinstance(unit, unit_registry.Unit): + raise - quantity = data + quantity = data return quantity @@ -241,6 +207,11 @@ def attach_units(obj, units): def convert_units(obj, to): + # preprocess + to = { + key: None if not isinstance(value, unit_registry.Unit) else value + for key, value in to.items() + } if isinstance(obj, xr.Dataset): data_vars = { name: convert_units(array.variable, {None: to.get(name)}) @@ -282,6 +253,7 @@ def convert_units(obj, to): def assert_units_equal(a, b): + __tracebackhide__ = True assert extract_units(a) == extract_units(b) @@ -414,9 +386,8 @@ def __repr__(self): return f"function_{self.name}" -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataarray(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -427,12 +398,12 @@ def test_apply_ufunc_dataarray(dtype): expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="test bug: apply_ufunc should not be called that way") def test_apply_ufunc_dataset(dtype): - func = function( + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) @@ -450,10 +421,10 @@ def test_apply_ufunc_dataset(dtype): expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -475,36 +446,40 @@ def test_apply_ufunc_dataset(dtype): "coords", ), ) -@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +@pytest.mark.parametrize("fill_value", (10, np.nan)) def test_align_dataarray(fill_value, variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, 1, 1), - "dims": (original_unit, unit, 1), - "coords": (original_unit, 1, unit), + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - data_array1 = xr.DataArray( - data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y") - ) - data_array2 = xr.DataArray( - data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y") - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y")) + data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y")) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(data_array1, data_array2) @@ -524,15 +499,19 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(data_array1, data_array2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -558,31 +537,37 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): def test_align_dataset(fill_value, unit, variant, error, dtype): original_unit = unit_registry.m - variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit x = np.arange(2) * original_unit - x_a1 = np.array([10, 5]) * original_unit - x_a2 = np.array([10, 5]) * coord_unit y1 = np.arange(5) * original_unit y2 = np.arange(2, 7) * dim_unit + y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit + y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit - ds1 = xr.Dataset( - data_vars={"a": (("x", "y"), array1)}, - coords={"x": x, "x_a": ("x", x_a1), "y": y1}, - ) - ds2 = xr.Dataset( - data_vars={"a": (("x", "y"), array2)}, - coords={"x": x, "x_a": ("x", x_a2), "y": y2}, - ) + coords1 = {"x": x, "y": y1} + coords2 = {"x": x, "y": y2} + if variant == "coords": + coords1["y_a"] = ("y", y_a1) + coords2["y_a"] = ("y", y_a2) + + ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1) + ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2) fill_value = fill_value * data_unit func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): func(ds1, ds2) @@ -600,12 +585,17 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs ) expected_a = attach_units(expected_a, units_a) - expected_b = convert_units(attach_units(expected_b, units_a), units_b) + if isinstance(array2, Quantity): + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + else: + expected_b = attach_units(expected_b, units_b) actual_a, actual_b = func(ds1, ds2) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_allclose(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_allclose(expected_b, actual_b) def test_broadcast_dataarray(dtype): @@ -615,28 +605,53 @@ def test_broadcast_dataarray(dtype): a = xr.DataArray(data=array1, dims="x") b = xr.DataArray(data=array2, dims="y") - expected_a, expected_b = tuple( - attach_units(elem, extract_units(a)) - for elem in xr.broadcast(strip_units(a), strip_units(b)) - ) + units_a = extract_units(a) + units_b = extract_units(b) + expected_a, expected_b = xr.broadcast(strip_units(a), strip_units(b)) + expected_a = attach_units(expected_a, units_a) + expected_b = convert_units(attach_units(expected_b, units_a), units_b) + actual_a, actual_b = xr.broadcast(a, b) - assert_equal_with_units(expected_a, actual_a) - assert_equal_with_units(expected_b, actual_b) + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) def test_broadcast_dataset(dtype): array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa - ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)}) + x1 = np.arange(2) + y1 = np.arange(3) + + x2 = np.arange(2, 4) + y2 = np.arange(3, 6) - (expected,) = tuple( - attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, coords={"x": x1, "y": y1} + ) + other = xr.Dataset( + data_vars={ + "a": ("x", array1.to(unit_registry.hPa)), + "b": ("y", array2.to(unit_registry.hPa)), + }, + coords={"x": x2, "y": y2}, ) - (actual,) = xr.broadcast(ds) - assert_equal_with_units(expected, actual) + units_a = extract_units(ds) + units_b = extract_units(other) + expected_a, expected_b = xr.broadcast(strip_units(ds), strip_units(other)) + expected_a = attach_units(expected_a, units_a) + expected_b = attach_units(expected_b, units_b) + + actual_a, actual_b = xr.broadcast(ds, other) + + assert_units_equal(expected_a, actual_a) + assert_identical(expected_a, actual_a) + assert_units_equal(expected_b, actual_b) + assert_identical(expected_b, actual_b) @pytest.mark.parametrize( @@ -706,7 +721,8 @@ def test_combine_by_coords(variant, unit, error, dtype): ) actual = xr.combine_by_coords([ds, other]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -717,12 +733,7 @@ def test_combine_by_coords(variant, unit, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.mm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"), - ), + pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, @@ -810,7 +821,8 @@ def test_combine_nested(variant, unit, error, dtype): ) actual = func([[ds1, ds2], [ds3, ds4]]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -862,7 +874,8 @@ def test_concat_dataarray(variant, unit, error, dtype): ) actual = xr.concat([arr1, arr2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -912,10 +925,10 @@ def test_concat_dataset(variant, unit, error, dtype): ) actual = xr.concat([ds1, ds2], dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -948,64 +961,81 @@ def test_merge_dataarray(variant, unit, error, dtype): data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit + x1 = np.arange(2) * original_unit + y1 = np.arange(3) * original_unit + u1 = np.linspace(10, 20, 2) * original_unit + v1 = np.linspace(10, 20, 3) * original_unit + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit - array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + x2 = np.arange(2, 4) * dim_unit + z2 = np.arange(4) * original_unit + u2 = np.linspace(20, 30, 2) * coord_unit + w2 = np.linspace(10, 20, 4) * original_unit - x = np.arange(2) * original_unit - y = np.arange(3) * original_unit - z = np.arange(4) * original_unit - u = np.linspace(10, 20, 2) * original_unit - v = np.linspace(10, 20, 3) * original_unit - w = np.linspace(10, 20, 4) * original_unit + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + y3 = np.arange(3, 6) * dim_unit + z3 = np.arange(4, 8) * dim_unit + v3 = np.linspace(10, 20, 3) * coord_unit + w3 = np.linspace(10, 20, 4) * coord_unit arr1 = xr.DataArray( name="a", data=array1, - coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)}, + coords={"x": x1, "y": y1, "u": ("x", u1), "v": ("y", v1)}, dims=("x", "y"), ) arr2 = xr.DataArray( - name="b", + name="a", data=array2, - coords={ - "x": np.arange(2, 4) * dim_unit, - "z": z, - "u": ("x", np.linspace(20, 30, 2) * coord_unit), - "w": ("z", w), - }, + coords={"x": x2, "z": z2, "u": ("x", u2), "w": ("z", w2)}, dims=("x", "z"), ) arr3 = xr.DataArray( - name="c", + name="a", data=array3, - coords={ - "y": np.arange(3, 6) * dim_unit, - "z": np.arange(4, 8) * dim_unit, - "v": ("y", np.linspace(10, 20, 3) * coord_unit), - "w": ("z", np.linspace(10, 20, 4) * coord_unit), - }, + coords={"y": y3, "z": z3, "v": ("y", v3), "w": ("z", w3)}, dims=("y", "z"), ) - func = function(xr.merge) if error is not None: with pytest.raises(error): - func([arr1, arr2, arr3]) + xr.merge([arr1, arr2, arr3]) return - units = {name: original_unit for name in list("abcuvwxyz")} + units = {name: original_unit for name in list("axyzuvw")} + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) - expected = attach_units( - func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), - units, + expected_units = { + "a": original_unit, + "u": original_unit, + "v": original_unit, + "w": original_unit, + "x": original_unit, + "y": original_unit, + "z": original_unit, + } + + expected = convert_units( + attach_units( + xr.merge( + [ + convert_and_strip(arr1), + convert_and_strip(arr2), + convert_and_strip(arr3), + ] + ), + units, + ), + expected_units, ) - actual = func([arr1, arr2, arr3]) - assert_equal_with_units(expected, actual) + actual = xr.merge([arr1, arr2, arr3]) + + assert_units_equal(expected, actual) + assert_allclose(expected, actual) -@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -1046,7 +1076,7 @@ def test_merge_dataset(variant, unit, error, dtype): ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, - coords={"x": x, "y": y, "z": ("x", z)}, + coords={"x": x, "y": y, "u": ("x", z)}, ) ds2 = xr.Dataset( data_vars={ @@ -1056,18 +1086,18 @@ def test_merge_dataset(variant, unit, error, dtype): coords={ "x": np.arange(3) * dim_unit, "y": np.arange(2, 4) * dim_unit, - "z": ("x", np.arange(-3, 0) * coord_unit), + "u": ("x", np.arange(-3, 0) * coord_unit), }, ) ds3 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), - "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit), + "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit), }, coords={ "x": np.arange(3, 6) * dim_unit, "y": np.arange(4, 6) * dim_unit, - "z": ("x", np.arange(3, 6) * coord_unit), + "u": ("x", np.arange(3, 6) * coord_unit), }, ) @@ -1080,12 +1110,20 @@ def test_merge_dataset(variant, unit, error, dtype): units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) - expected = attach_units( - func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units + expected_units = {name: original_unit for name in list("abxyzu")} + expected = convert_units( + attach_units( + func( + [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)] + ), + units, + ), + expected_units, ) actual = func([ds1, ds2, ds3]) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1094,10 +1132,12 @@ def test_replication_dataarray(func, dtype): data_array = xr.DataArray(data=array, dims="x") numpy_func = getattr(np, func.__name__) - expected = xr.DataArray(data=numpy_func(array), dims="x") + units = extract_units(numpy_func(data_array)) + expected = attach_units(func(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1114,12 +1154,13 @@ def test_replication_dataset(func, dtype): ) numpy_func = getattr(np, func.__name__) - expected = ds.copy( - data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} - ) + units = extract_units(ds.map(numpy_func)) + expected = attach_units(func(strip_units(ds)), units) + actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1158,7 +1199,8 @@ def test_replication_full_like_dataarray(unit, error, dtype): ) actual = xr.full_like(data_array, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail( @@ -1208,7 +1250,8 @@ def test_replication_full_like_dataset(unit, error, dtype): ) actual = xr.full_like(ds, fill_value=fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1250,7 +1293,8 @@ def test_where_dataarray(fill_value, unit, error, dtype): ) actual = xr.where(cond, x, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( @@ -1294,7 +1338,8 @@ def test_where_dataset(fill_value, unit, error, dtype): ) actual = xr.where(cond, ds, fill_value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def test_dot_dataarray(dtype): @@ -1315,7 +1360,8 @@ def test_dot_dataarray(dtype): ) actual = xr.dot(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_identical(expected, actual) def delete_attrs(*to_delete): From 1db010bb1f84c63c45c1317a78e89362587e1423 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Mar 2020 15:07:02 +0100 Subject: [PATCH 19/54] update installation instruction (#3849) * installing.rst: update instructions * whats-new * explicit link and anchor * :doc: -> :ref: --- doc/installing.rst | 15 ++++++++------- doc/whats-new.rst | 3 +++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/installing.rst b/doc/installing.rst index dfc2841a956..a25bf65e342 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -11,6 +11,8 @@ Required dependencies - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) +.. _optional-dependencies: + Optional dependencies --------------------- @@ -24,7 +26,7 @@ For netCDF and IO - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries - `pynio `__: for reading GRIB and other - geoscience specific file formats + geoscience specific file formats. Note that pynio is not available for Windows. - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before @@ -121,16 +123,15 @@ xarray itself is a pure Python package, but its dependencies are not. The easiest way to get everything installed is to use conda_. To install xarray with its recommended dependencies using the conda command line tool:: - $ conda install xarray dask netCDF4 bottleneck + $ conda install -c conda-forge xarray dask netCDF4 bottleneck .. _conda: http://conda.io/ -We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy, pynio or PseudoNetCDF:: - - $ conda install -c conda-forge xarray cartopy pynio pseudonetcdf +If you require other :ref:`optional-dependencies` add them to the line above. -New releases may also appear in conda-forge before being updated in the default -channel. +We recommend using the community maintained `conda-forge `__ channel, +as some of the dependencies are difficult to build. New releases may also appear in conda-forge before +being updated in the default channel. If you don't use conda, be sure you have the required dependencies (numpy and pandas) installed first. Then, install xarray with pip:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00c63b81260..3f04ba4ec57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -81,6 +81,9 @@ Documentation - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. +- Update the installation instructions: only explicitly list recommended dependencies + (:issue:`3756`). + By `Mathias Hauser `_. Internal Changes ~~~~~~~~~~~~~~~~ From 7927c2b79e4dd7ecebb648e8e64e2647405b08db Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 9 Mar 2020 23:06:07 -0700 Subject: [PATCH 20/54] add xpublish to related projects (#3850) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index edee80b72b8..57b8da0c447 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -63,6 +63,7 @@ Extend xarray capabilities - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input. - `nxarray `_: NeXus input/output capability for xarray. - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). +- `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. From 739b34767ddd19b6168af05ee749b527266c104d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 10 Mar 2020 10:02:59 -0400 Subject: [PATCH 21/54] Doctests fixes (#3846) * start of doctest fixes * start of doctest fixes --- conftest.py | 11 +++++++++++ doc/contributing.rst | 8 ++++---- xarray/core/dataarray.py | 19 ++++++++++++------- xarray/core/dataset.py | 11 ++++++++--- xarray/core/rolling.py | 15 ++++++++------- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/conftest.py b/conftest.py index 25dc284975e..712af1d3759 100644 --- a/conftest.py +++ b/conftest.py @@ -21,3 +21,14 @@ def pytest_runtest_setup(item): pytest.skip( "set --run-network-tests to run test requiring an " "internet connection" ) + + +@pytest.fixture(autouse=True) +def add_standard_imports(doctest_namespace): + import numpy as np + import pandas as pd + import xarray as xr + + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["xr"] = xr diff --git a/doc/contributing.rst b/doc/contributing.rst index eb31db24591..f581bcd9741 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -51,8 +51,8 @@ Bug reports must: `_:: ```python - >>> from xarray import Dataset - >>> df = Dataset(...) + >>> import xarray as xr + >>> df = xr.Dataset(...) ... ``` @@ -378,8 +378,8 @@ and then running:: pre-commit install -from the root of the xarray repository. You can skip the pre-commit checks with -``git commit --no-verify``. +from the root of the xarray repository. You can skip the pre-commit checks +with ``git commit --no-verify``. Backwards Compatibility diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7a95aedc2f7..6782070da0b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1475,20 +1475,23 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- + >>> arr = xr.DataArray(data=[0, 1], dims="x", - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> arr array([0, 1]) Coordinates: * x (x) >> arr.swap_dims({"x": "y"}) array([0, 1]) Coordinates: x (y) >> arr.swap_dims({"x": "z"}) array([0, 1]) @@ -1718,7 +1721,7 @@ def stack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1768,7 +1771,7 @@ def unstack( Examples -------- - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> arr @@ -1817,7 +1820,7 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = DataArray(np.arange(6).reshape(2, 3), + >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) >>> data @@ -2623,7 +2626,7 @@ def plot(self) -> _PlotMethods: """ Access plotting functions for DataArray's - >>> d = DataArray([[1, 2], [3, 4]]) + >>> d = xr.DataArray([[1, 2], [3, 4]]) For convenience just call this directly @@ -2849,18 +2852,20 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) >>> dm_vals = np.arange(4) - >>> dm = DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=['z']) >>> dm.dims ('z') + >>> da.dims ('x', 'y', 'z') >>> dot_result = da.dot(dm) >>> dot_result.dims ('x', 'y') + """ if isinstance(other, Dataset): raise NotImplementedError( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f286236dd45..a4d20a79b7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1011,7 +1011,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": >>> da = xr.DataArray(np.random.randn(2, 3)) >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - coords={'x': ['one', 'two']}) + ... coords={'x': ['one', 'two']}) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1021,6 +1021,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 -0.8079 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds_0 = ds.copy(deep=False) >>> ds_0['foo'][0, 0] = 7 >>> ds_0 @@ -1032,6 +1033,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003 bar (x) int64 -1 2 + >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1055,6 +1057,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Data variables: foo (dim_0, dim_1) int64 0 1 2 3 4 5 bar (x) >> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -2883,7 +2886,7 @@ def swap_dims( Examples -------- >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) >>> ds Dimensions: (x: 2) @@ -2893,6 +2896,7 @@ def swap_dims( Data variables: a (x) int64 5 7 b (x) float64 0.1 2.4 + >>> ds.swap_dims({"x": "y"}) Dimensions: (y: 2) @@ -2902,6 +2906,7 @@ def swap_dims( Data variables: a (y) int64 5 7 b (y) float64 0.1 2.4 + >>> ds.swap_dims({"x": "z"}) Dimensions: (z: 2) @@ -3341,7 +3346,7 @@ def to_stacked_array( Examples -------- - >>> data = Dataset( + >>> data = xr.Dataset( ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), ... 'b': ('x', [6, 7])}, ... coords={'y': ['u', 'v', 'w']} diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 61178cfb15f..5f633abbde6 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -231,21 +231,22 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling = da.rolling(b=3, center=True) >>> rolling.construct('window_dim') array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) Dimensions without coordinates: a, b, window_dim + """ from .dataarray import DataArray @@ -278,26 +279,26 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) - >>> + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) >>> rolling = da.rolling(b=3) >>> rolling.construct('window_dim') array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim - >>> + >>> rolling.reduce(np.sum) array([[nan, nan, 3., 6.], [nan, nan, 15., 18.]]) Dimensions without coordinates: a, b - >>> + >>> rolling = da.rolling(b=3, min_periods=1) >>> rolling.reduce(np.nansum) array([[ 0., 1., 3., 6.], [ 4., 9., 15., 18.]]) + """ rolling_dim = utils.get_temp_dimname(self.obj.dims, "_rolling_dim") windows = self.construct(rolling_dim) From 650a981734ce3291f5aaa68648ebde451339f28a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 13 Mar 2020 02:14:41 -0400 Subject: [PATCH 22/54] Fix CFTimeIndex-related errors stemming from updates in pandas (#3764) * Allow subtraction of a generic Index of cftime.datetimes from a CFTimeIndex * black * Test that NotImplemented logic works * Vendor _get_nearest_indexer and _filter_indexer_tolerance * Test OverflowError in __rsub__ * Fix name of pandas method in docstring * Add what's new entries * Enable use of tolerance greater than 292 years * newlinw Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 +++ xarray/coding/cftimeindex.py | 54 +++++++++++++++++++++++---- xarray/tests/test_cftimeindex.py | 63 +++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3f04ba4ec57..80309dc4673 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,12 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Preserved the ability to index with ``method="nearest"`` with a + :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 + (:issue:`3751`). By `Spencer Clark `_. +- Greater flexibility and improved test coverage of subtracting various types + of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark + `_. - Updated Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ - Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 99f90430e91..1ea5d3a7d11 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -49,6 +49,7 @@ from xarray.core.utils import is_scalar +from ..core.common import _contains_cftime_datetimes from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -326,6 +327,32 @@ def _get_string_slice(self, key): raise KeyError(key) return loc + def _get_nearest_indexer(self, target, limit, tolerance): + """Adapted from pandas.Index._get_nearest_indexer""" + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + left_distances = abs(self.values[left_indexer] - target.values) + right_distances = abs(self.values[right_indexer] - target.values) + + if self.is_monotonic_increasing: + condition = (left_distances < right_distances) | (right_indexer == -1) + else: + condition = (left_distances <= right_distances) | (right_indexer == -1) + indexer = np.where(condition, left_indexer, right_indexer) + + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + """Adapted from pandas.Index._filter_indexer_tolerance""" + if isinstance(target, pd.Index): + distance = abs(self.values[indexer] - target.values) + else: + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): @@ -427,9 +454,11 @@ def __radd__(self, other): return CFTimeIndex(other + np.array(self)) def __sub__(self, other): - import cftime - - if isinstance(other, (CFTimeIndex, cftime.datetime)): + if _contains_datetime_timedeltas(other): + return CFTimeIndex(np.array(self) - other) + elif isinstance(other, pd.TimedeltaIndex): + return CFTimeIndex(np.array(self) - other.to_pytimedelta()) + elif _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: @@ -437,14 +466,17 @@ def __sub__(self, other): "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) - - elif isinstance(other, pd.TimedeltaIndex): - return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: - return CFTimeIndex(np.array(self) - other) + return NotImplemented def __rsub__(self, other): - return pd.TimedeltaIndex(other - np.array(self)) + try: + return pd.TimedeltaIndex(other - np.array(self)) + except OverflowError: + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. @@ -633,6 +665,12 @@ def _parse_array_of_cftime_strings(strings, date_type): ).reshape(strings.shape) +def _contains_datetime_timedeltas(array): + """Check if an input array contains datetime.timedelta objects.""" + array = np.atleast_1d(array) + return isinstance(array[0], timedelta) + + def _cftimeindex_from_i8(values, date_type, name): """Construct a CFTimeIndex from an array of integers. diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8d83b833ca3..43d6d7b068e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -451,10 +451,21 @@ def test_sel_date_scalar(da, date_type, index): @pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") +@requires_cftime +def test_sel_date_distant_date(da, date_type, index): + expected = xr.DataArray(4).assign_coords(time=index[3]) + result = da.sel(time=date_type(2000, 1, 1), method="nearest") + assert_identical(result, expected) + + @requires_cftime @pytest.mark.parametrize( "sel_kwargs", - [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], + [ + {"method": "nearest"}, + {"method": "nearest", "tolerance": timedelta(days=70)}, + {"method": "nearest", "tolerance": timedelta(days=1800000)}, + ], ) def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): expected = xr.DataArray(2).assign_coords(time=index[1]) @@ -738,7 +749,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): @requires_cftime -def test_cftimeindex_sub(index): +def test_cftimeindex_sub_timedelta(index): date_type = index.date_type expected_dates = [ date_type(1, 1, 2), @@ -753,6 +764,27 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize( + "other", + [np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"], +) +def test_cftimeindex_sub_timedelta_array(index, other): + date_type = index.date_type + expected_dates = [ + date_type(1, 1, 2), + date_type(1, 2, 2), + date_type(2, 1, 2), + date_type(2, 2, 2), + ] + expected = CFTimeIndex(expected_dates) + result = index + timedelta(days=2) + result = result - other + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): @@ -784,6 +816,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_distant_cftime_datetime_sub_cftimeindex(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(ValueError, match="difference exceeds"): + a.date_type(1, 1, 1) - a + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): @@ -795,6 +835,25 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + b = pd.Index(a.values) + expected = a - a + result = a - b + assert result.equals(expected) + assert isinstance(result, pd.TimedeltaIndex) + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_not_implemented(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(TypeError, match="unsupported operand"): + a - 1 + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): From 7f4f027e69b42ae1eb93fce2df708d65c70c0a10 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 13 Mar 2020 13:25:12 +0000 Subject: [PATCH 23/54] Fix alignment with join="override" when some dims are unindexed (#3839) --- doc/whats-new.rst | 2 ++ xarray/core/alignment.py | 2 +- xarray/tests/test_concat.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 80309dc4673..34d4342b028 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). + By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 908119f7995..a83b1b87aa4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -50,7 +50,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.dims: + for dim in obj.indexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bd99181a947..77c030198ac 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -250,6 +250,13 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join]) + # regression test for #3681 + actual = concat([ds1.drop("x"), ds2.drop("x")], join="override", dim="y") + expected = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2))}, coords={"y": [0, 0.0001]} + ) + assert_identical(actual, expected) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] From 0d95ebac19faa3af25ac369d1e8177535022c0d9 Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 13 Mar 2020 09:58:37 -0400 Subject: [PATCH 24/54] Fix interp bug when indexer shares coordinates with array (#3758) * added test demonstrating interp bug for nd indexes sharing coordinate with array * fix test so it works with sel * support shared dimensions in interp * isort fixes * update whats new * Revert "isort fixes" This reverts commit 5df6c9c0f99376dbc43f2f30567661ee49c00655. * test requires scipy --- doc/whats-new.rst | 3 +++ xarray/core/dataset.py | 11 +++++++++++ xarray/tests/test_interp.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 34d4342b028..df28837dcfa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the + indexed variable (:issue:`3252`). + By `David Huard `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a4d20a79b7c..880c574c9cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2574,6 +2574,17 @@ def interp( coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") indexers = dict(self._validate_interp_indexers(coords)) + if coords: + # This avoids broadcasting over coordinates that are both in + # the original array AND in the indexing array. It essentially + # forces interpolation along the shared coordinates. + sdims = ( + set(self.dims) + .intersection(*[set(nx.dims) for nx in indexers.values()]) + .difference(coords.keys()) + ) + indexers.update({d: self.variables[d] for d in sdims}) + obj = self if assume_sorted else self.sortby([k for k in coords]) def maybe_variable(obj, k): diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index c2bec2166c8..9cc4933f462 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -244,6 +244,36 @@ def test_interpolate_nd(case): assert_allclose(actual.transpose("y", "z"), expected) +@requires_scipy +def test_interpolate_nd_nd(): + """Interpolate nd array with an nd indexer sharing coordinates.""" + # Create original array + a = [0, 2] + x = [0, 1, 2] + da = xr.DataArray( + np.arange(6).reshape(2, 3), dims=("a", "x"), coords={"a": a, "x": x} + ) + + # Create indexer into `a` with dimensions (y, x) + y = [10] + c = {"x": x, "y": y} + ia = xr.DataArray([[1, 2, 2]], dims=("y", "x"), coords=c) + out = da.interp(a=ia) + expected = xr.DataArray([[1.5, 4, 5]], dims=("y", "x"), coords=c) + xr.testing.assert_allclose(out.drop_vars("a"), expected) + + # If the *shared* indexing coordinates do not match, interp should fail. + with pytest.raises(ValueError): + c = {"x": [1], "y": y} + ia = xr.DataArray([[1]], dims=("y", "x"), coords=c) + da.interp(a=ia) + + with pytest.raises(ValueError): + c = {"x": [5, 6, 7], "y": y} + ia = xr.DataArray([[1]], dims=("y", "x"), coords=c) + da.interp(a=ia) + + @pytest.mark.parametrize("method", ["linear"]) @pytest.mark.parametrize("case", [0, 1]) def test_interpolate_scalar(method, case): From ae03616dbd30544cadf4ff85e66cffb582ab3481 Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin <31126826+mancellin@users.noreply.github.com> Date: Fri, 13 Mar 2020 20:55:06 +0100 Subject: [PATCH 25/54] Fix multi-index with categorical values. (#3860) * Fix bug for multi-index with categorical values. See issue #3674. * Blacked. * Add line in whats-new.rst. * Remove forgotten print. Co-authored-by: Matthieu Ancellin --- doc/whats-new.rst | 3 +++ xarray/core/indexes.py | 2 ++ xarray/tests/test_dataset.py | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index df28837dcfa..9b78d046148 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,9 @@ Bug fixes indexed variable (:issue:`3252`). By `David Huard `_. + +- Fix use of multi-index with categorical values (:issue:`3674`). + By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 06bf08cefd2..dea1767d50c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -22,6 +22,8 @@ def remove_unused_levels_categories(index): for i, level in enumerate(index.levels): if isinstance(level, pd.CategoricalIndex): level = level[index.codes[i]].remove_unused_categories() + else: + level = level[index.codes[i]] levels.append(level) index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e8c6b7609..6a6c496591a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1458,6 +1458,17 @@ def test_categorical_reindex(self): actual = ds.reindex(cat=["foo"])["cat"].values assert (actual == np.array(["foo"])).all() + def test_categorical_multiindex(self): + i1 = pd.Series([0, 0]) + cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"]) + i2 = pd.Series(["baz", "bar"], dtype=cat) + + df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index( + ["i1", "i2"] + ) + actual = df.to_xarray() + assert actual["values"].shape == (1, 2) + def test_sel_drop(self): data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1}) From cafab46aac8f7a073a32ec5aa47e213a9810ed54 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 14 Mar 2020 22:25:46 +0100 Subject: [PATCH 26/54] Blacken the doctest code in docstrings (#3857) * fix a few erroneous doctest blocks * blacken the doctest code * manually remove the trailing comma from doctest lines --- xarray/backends/api.py | 4 +- xarray/coding/cftime_offsets.py | 2 +- xarray/coding/cftimeindex.py | 46 ++++++----- xarray/coding/strings.py | 2 +- xarray/conventions.py | 4 +- xarray/core/accessor_dt.py | 10 +-- xarray/core/accessor_str.py | 2 +- xarray/core/alignment.py | 34 ++++---- xarray/core/combine.py | 6 +- xarray/core/common.py | 114 +++++++++++++++------------ xarray/core/computation.py | 31 ++++---- xarray/core/dataarray.py | 101 +++++++++++++----------- xarray/core/dataset.py | 133 ++++++++++++++++++-------------- xarray/core/extensions.py | 5 +- xarray/core/merge.py | 16 ++-- xarray/core/nputils.py | 2 +- xarray/core/options.py | 2 +- xarray/core/parallel.py | 15 ++-- xarray/core/rolling.py | 10 +-- xarray/core/rolling_exp.py | 4 +- xarray/core/variable.py | 8 +- 21 files changed, 307 insertions(+), 244 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 56cd0649989..e828faabc27 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1196,8 +1196,8 @@ def save_mfdataset( Save a dataset into one netCDF per year of data: - >>> years, datasets = zip(*ds.groupby('time.year')) - >>> paths = ['%s.nc' % y for y in years] + >>> years, datasets = zip(*ds.groupby("time.year")) + >>> paths = ["%s.nc" % y for y in years] >>> xr.save_mfdataset(datasets, paths) """ if mode == "w" and len(set(paths)) < len(paths): diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index eeb68508527..a2306331ca7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -938,7 +938,7 @@ def cftime_range( This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` objects associated with the specified calendar type, e.g. - >>> xr.cftime_range(start='2000', periods=6, freq='2MS', calendar='noleap') + >>> xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap") CFTimeIndex([2000-01-01 00:00:00, 2000-03-01 00:00:00, 2000-05-01 00:00:00, 2000-07-01 00:00:00, 2000-09-01 00:00:00, 2000-11-01 00:00:00], dtype='object') diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 1ea5d3a7d11..c680a7e0bcf 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -269,29 +269,32 @@ def _partial_date_slice(self, resolution, parsed): >>> from cftime import DatetimeNoLeap >>> import pandas as pd >>> import xarray as xr - >>> da = xr.DataArray([1, 2], - coords=[[DatetimeNoLeap(2001, 1, 1), - DatetimeNoLeap(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[DatetimeNoLeap(2001, 1, 1), DatetimeNoLeap(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array([1]) Coordinates: * time (time) object 2001-01-01 00:00:00 - >>> da = xr.DataArray([1, 2], - coords=[[pd.Timestamp(2001, 1, 1), - pd.Timestamp(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[pd.Timestamp(2001, 1, 1), pd.Timestamp(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array(1) Coordinates: time datetime64[ns] 2001-01-01 - >>> da = xr.DataArray([1, 2], - coords=[[pd.Timestamp(2001, 1, 1, 1), - pd.Timestamp(2001, 2, 1)]], - dims=['time']) - >>> da.sel(time='2001-01-01') + >>> da = xr.DataArray( + ... [1, 2], + ... coords=[[pd.Timestamp(2001, 1, 1, 1), pd.Timestamp(2001, 2, 1)]], + ... dims=["time"], + ... ) + >>> da.sel(time="2001-01-01") array([1]) Coordinates: @@ -423,10 +426,10 @@ def shift(self, n, freq): Examples -------- - >>> index = xr.cftime_range('2000', periods=1, freq='M') + >>> index = xr.cftime_range("2000", periods=1, freq="M") >>> index CFTimeIndex([2000-01-31 00:00:00], dtype='object') - >>> index.shift(1, 'M') + >>> index.shift(1, "M") CFTimeIndex([2000-02-29 00:00:00], dtype='object') """ from .cftime_offsets import to_offset @@ -511,7 +514,7 @@ def to_datetimeindex(self, unsafe=False): Examples -------- >>> import xarray as xr - >>> times = xr.cftime_range('2000', periods=2, calendar='gregorian') + >>> times = xr.cftime_range("2000", periods=2, calendar="gregorian") >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object') >>> times.to_datetimeindex() @@ -550,9 +553,10 @@ def strftime(self, date_format): Examples -------- - >>> rng = xr.cftime_range(start='2000', periods=5, freq='2MS', - ... calendar='noleap') - >>> rng.strftime('%B %d, %Y, %r') + >>> rng = xr.cftime_range( + ... start="2000", periods=5, freq="2MS", calendar="noleap" + ... ) + >>> rng.strftime("%B %d, %Y, %r") Index(['January 01, 2000, 12:00:00 AM', 'March 01, 2000, 12:00:00 AM', 'May 01, 2000, 12:00:00 AM', 'July 01, 2000, 12:00:00 AM', 'September 01, 2000, 12:00:00 AM'], diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 6d383fcf318..35cc190ffe3 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -201,7 +201,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> StackedBytesArray(np.array(['a', 'b', 'c']))[:] + >>> StackedBytesArray(np.array(["a", "b", "c"]))[:] array('abc', dtype='|S3') """ diff --git a/xarray/conventions.py b/xarray/conventions.py index a8b9906c153..df24d0d3d8d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -19,7 +19,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): big endian) into native endianness, so they can be used with Cython functions, such as those found in bottleneck and pandas. - >>> x = np.arange(5, dtype='>i2') + >>> x = np.arange(5, dtype=">i2") >>> x.dtype dtype('>i2') @@ -50,7 +50,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): This is useful for decoding boolean arrays from integer typed netCDF variables. - >>> x = np.array([1, 0, 1, 1, 0], dtype='i1') + >>> x = np.array([1, 0, 1, 1, 0], dtype="i1") >>> x.dtype dtype('>i2') diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index de0e332b26c..2977596036c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -250,8 +250,8 @@ class DatetimeAccessor(Properties): --------- >>> import xarray as xr >>> import pandas as pd - >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) - >>> ts = xr.DataArray(dates, dims=('time')) + >>> dates = pd.date_range(start="2000/01/01", freq="D", periods=10) + >>> ts = xr.DataArray(dates, dims=("time")) >>> ts array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', @@ -296,8 +296,8 @@ def strftime(self, date_format): Examples -------- - >>> rng = xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) - >>> rng['time'].dt.strftime('%B %d, %Y, %r') + >>> rng = xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) + >>> rng["time"].dt.strftime("%B %d, %Y, %r") array('January 01, 2000, 12:00:00 AM', dtype=object) """ @@ -400,7 +400,7 @@ class TimedeltaAccessor(Properties): >>> import pandas as pd >>> import xarray as xr >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) - >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts = xr.DataArray(dates, dims=("time")) >>> ts array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 6a975b948eb..5502ba72855 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -67,7 +67,7 @@ class StringAccessor: Similar to pandas, fields can be accessed through the `.str` attribute for applicable DataArrays. - >>> da = xr.DataArray(['some', 'text', 'in', 'an', 'array']) + >>> da = xr.DataArray(["some", "text", "in", "an", "array"]) >>> ds.str.len() array([4, 4, 2, 2, 5]) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index a83b1b87aa4..abc180e049c 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -121,10 +121,16 @@ def align( -------- >>> import xarray as xr - >>> x = xr.DataArray([[25, 35], [10, 24]], dims=('lat', 'lon'), - ... coords={'lat': [35., 40.], 'lon': [100., 120.]}) - >>> y = xr.DataArray([[20, 5], [7, 13]], dims=('lat', 'lon'), - ... coords={'lat': [35., 42.], 'lon': [100., 120.]}) + >>> x = xr.DataArray( + ... [[25, 35], [10, 24]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, + ... ) + >>> y = xr.DataArray( + ... [[20, 5], [7, 13]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 42.0], "lon": [100.0, 120.0]}, + ... ) >>> x @@ -156,7 +162,7 @@ def align( * lat (lat) float64 35.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='outer') + >>> a, b = xr.align(x, y, join="outer") >>> a array([[25., 35.], @@ -174,7 +180,7 @@ def align( * lat (lat) float64 35.0 40.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='outer', fill_value=-999) + >>> a, b = xr.align(x, y, join="outer", fill_value=-999) >>> a array([[ 25, 35], @@ -192,7 +198,7 @@ def align( * lat (lat) float64 35.0 40.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='left') + >>> a, b = xr.align(x, y, join="left") >>> a array([[25, 35], @@ -208,7 +214,7 @@ def align( * lat (lat) float64 35.0 40.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='right') + >>> a, b = xr.align(x, y, join="right") >>> a array([[25., 35.], @@ -224,13 +230,13 @@ def align( * lat (lat) float64 35.0 42.0 * lon (lon) float64 100.0 120.0 - >>> a, b = xr.align(x, y, join='exact') + >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... "indexes along dimension {!r} are not equal".format(dim) ValueError: indexes along dimension 'lat' are not equal - >>> a, b = xr.align(x, y, join='override') + >>> a, b = xr.align(x, y, join="override") >>> a array([[25, 35], @@ -674,8 +680,8 @@ def broadcast(*args, exclude=None): Broadcast two data arrays against one another to fill out their dimensions: - >>> a = xr.DataArray([1, 2, 3], dims='x') - >>> b = xr.DataArray([5, 6], dims='y') + >>> a = xr.DataArray([1, 2, 3], dims="x") + >>> b = xr.DataArray([5, 6], dims="y") >>> a array([1, 2, 3]) @@ -706,8 +712,8 @@ def broadcast(*args, exclude=None): Fill out the dimensions of all data variables in a dataset: - >>> ds = xr.Dataset({'a': a, 'b': b}) - >>> ds2, = xr.broadcast(ds) # use tuple unpacking to extract one dataset + >>> ds = xr.Dataset({"a": a, "b": b}) + >>> (ds2,) = xr.broadcast(ds) # use tuple unpacking to extract one dataset >>> ds2 Dimensions: (x: 3, y: 2) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 3f6e0e79351..1fa2df00352 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -412,7 +412,7 @@ def combine_nested( precipitation (x, y) float64 5.904 2.453 3.404 ... >>> ds_grid = [[x1y1, x1y2], [x2y1, x2y2]] - >>> combined = xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + >>> combined = xr.combine_nested(ds_grid, concat_dim=["x", "y"]) Dimensions: (x: 4, y: 4) Dimensions without coordinates: x, y @@ -441,7 +441,7 @@ def combine_nested( precipitation (t) float64 5.904 2.453 3.404 ... >>> ds_grid = [[t1temp, t1precip], [t2temp, t2precip]] - >>> combined = xr.combine_nested(ds_grid, concat_dim=['t', None]) + >>> combined = xr.combine_nested(ds_grid, concat_dim=["t", None]) Dimensions: (t: 10) Dimensions without coordinates: t @@ -650,7 +650,7 @@ def combine_by_coords( temperature (y, x) float64 1.654 10.63 7.015 nan ... nan 12.46 2.22 15.96 precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 - >>> xr.combine_by_coords([x3, x1], join='override') + >>> xr.combine_by_coords([x3, x1], join="override") Dimensions: (x: 3, y: 4) Coordinates: diff --git a/xarray/core/common.py b/xarray/core/common.py index c80cb24c5b5..39aa7982091 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -418,9 +418,9 @@ def assign_coords(self, coords=None, **coords_kwargs): -------- Convert longitude coordinates from 0-359 to -180-179: - >>> da = xr.DataArray(np.random.rand(4), - ... coords=[np.array([358, 359, 0, 1])], - ... dims='lon') + >>> da = xr.DataArray( + ... np.random.rand(4), coords=[np.array([358, 359, 0, 1])], dims="lon", + ... ) >>> da array([0.28298 , 0.667347, 0.657938, 0.177683]) @@ -434,7 +434,7 @@ def assign_coords(self, coords=None, **coords_kwargs): The function also accepts dictionary arguments: - >>> da.assign_coords({'lon': (((da.lon + 180) % 360) - 180)}) + >>> da.assign_coords({"lon": (((da.lon + 180) % 360) - 180)}) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -518,19 +518,13 @@ def pipe( You can write - >>> (ds.pipe(h) - ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c) - ... ) + >>> (ds.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)) If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the data. For example, suppose ``f`` takes its data as ``arg2``: - >>> (ds.pipe(h) - ... .pipe(g, arg1=a) - ... .pipe((f, 'arg2'), arg1=a, arg3=c) - ... ) + >>> (ds.pipe(h).pipe(g, arg1=a).pipe((f, "arg2"), arg1=a, arg3=c)) Examples -------- @@ -539,7 +533,10 @@ def pipe( >>> import xarray as xr >>> x = xr.Dataset( ... { - ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "temperature_c": ( + ... ("lat", "lon"), + ... 20 * np.random.rand(4).reshape(2, 2), + ... ), ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), ... }, ... coords={"lat": [10, 20], "lon": [150, 160]}, @@ -584,10 +581,9 @@ def pipe( precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 >>> ( - ... x - ... .pipe(adder, arg=2) - ... .pipe(div, arg=2) - ... .pipe(sub_mult, sub_arg=2, mult_arg=2) + ... x.pipe(adder, arg=2) + ... .pipe(div, arg=2) + ... .pipe(sub_mult, sub_arg=2, mult_arg=2) ... ) Dimensions: (lat: 2, lon: 2) @@ -639,16 +635,17 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): -------- Calculate daily anomalies for daily data: - >>> da = xr.DataArray(np.linspace(0, 1826, num=1827), - ... coords=[pd.date_range('1/1/2000', '31/12/2004', - ... freq='D')], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 1826, num=1827), + ... coords=[pd.date_range("1/1/2000", "31/12/2004", freq="D")], + ... dims="time", + ... ) >>> da array([0.000e+00, 1.000e+00, 2.000e+00, ..., 1.824e+03, 1.825e+03, 1.826e+03]) Coordinates: * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - >>> da.groupby('time.dayofyear') - da.groupby('time.dayofyear').mean('time') + >>> da.groupby("time.dayofyear") - da.groupby("time.dayofyear").mean("time") array([-730.8, -730.8, -730.8, ..., 730.2, 730.2, 730.5]) Coordinates: @@ -787,10 +784,15 @@ def rolling( -------- Create rolling seasonal average of monthly data e.g. DJF, JFM, ..., SON: - >>> da = xr.DataArray(np.linspace(0, 11, num=12), - ... coords=[pd.date_range('15/12/1999', - ... periods=12, freq=pd.DateOffset(months=1))], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 11, num=12), + ... coords=[ + ... pd.date_range( + ... "15/12/1999", periods=12, freq=pd.DateOffset(months=1), + ... ) + ... ], + ... dims="time", + ... ) >>> da array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.]) @@ -804,7 +806,7 @@ def rolling( Remove the NaNs using ``dropna()``: - >>> da.rolling(time=3, center=True).mean().dropna('time') + >>> da.rolling(time=3, center=True).mean().dropna("time") array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]) Coordinates: @@ -906,10 +908,11 @@ def coarsen( -------- Coarsen the long time series by averaging over every four days. - >>> da = xr.DataArray(np.linspace(0, 364, num=364), - ... dims='time', - ... coords={'time': pd.date_range( - ... '15/12/1999', periods=364)}) + >>> da = xr.DataArray( + ... np.linspace(0, 364, num=364), + ... dims="time", + ... coords={"time": pd.date_range("15/12/1999", periods=364)}, + ... ) >>> da array([ 0. , 1.002755, 2.00551 , ..., 361.99449 , 362.997245, @@ -917,7 +920,7 @@ def coarsen( Coordinates: * time (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-12-12 >>> - >>> da.coarsen(time=3, boundary='trim').mean() + >>> da.coarsen(time=3, boundary="trim").mean() array([ 1.002755, 4.011019, 7.019284, ..., 358.986226, 361.99449 ]) @@ -1000,10 +1003,15 @@ def resample( -------- Downsample monthly time-series data to seasonal data: - >>> da = xr.DataArray(np.linspace(0, 11, num=12), - ... coords=[pd.date_range('15/12/1999', - ... periods=12, freq=pd.DateOffset(months=1))], - ... dims='time') + >>> da = xr.DataArray( + ... np.linspace(0, 11, num=12), + ... coords=[ + ... pd.date_range( + ... "15/12/1999", periods=12, freq=pd.DateOffset(months=1), + ... ) + ... ], + ... dims="time", + ... ) >>> da array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.]) @@ -1017,7 +1025,7 @@ def resample( Upsample monthly time-series data to daily data: - >>> da.resample(time='1D').interpolate('linear') + >>> da.resample(time="1D").interpolate("linear") array([ 0. , 0.032258, 0.064516, ..., 10.935484, 10.967742, 11. ]) Coordinates: @@ -1025,7 +1033,7 @@ def resample( Limit scope of upsampling method - >>> da.resample(time='1D').nearest(tolerance='1D') + >>> da.resample(time="1D").nearest(tolerance="1D") array([ 0., 0., nan, ..., nan, 11., 11.]) Coordinates: @@ -1118,7 +1126,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): -------- >>> import numpy as np - >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y')) + >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=("x", "y")) >>> a array([[ 0, 1, 2, 3, 4], @@ -1227,7 +1235,7 @@ def isin(self, test_elements): Examples -------- - >>> array = xr.DataArray([1, 2, 3], dims='x') + >>> array = xr.DataArray([1, 2, 3], dims="x") >>> array.isin([1, 3]) array([ True, False, True]) @@ -1296,9 +1304,11 @@ def full_like(other, fill_value, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1410,9 +1420,11 @@ def zeros_like(other, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1468,9 +1480,11 @@ def ones_like(other, dtype: DTypeLike = None): >>> import numpy as np >>> import xarray as xr - >>> x = xr.DataArray(np.arange(6).reshape(2, 3), - ... dims=['lat', 'lon'], - ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... dims=["lat", "lon"], + ... coords={"lat": [1, 2], "lon": [0, 1, 2]}, + ... ) >>> x array([[0, 1, 2], @@ -1479,7 +1493,7 @@ def ones_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - >>> >>> xr.ones_like(x) + >>> xr.ones_like(x) array([[1, 1, 1], [1, 1, 1]]) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index d2c5c32bc00..f99764448da 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -889,7 +889,7 @@ def apply_ufunc( You can now apply ``magnitude()`` to ``xr.DataArray`` and ``xr.Dataset`` objects, with automatically preserved dimensions and coordinates, e.g., - >>> array = xr.DataArray([1, 2, 3], coords=[('x', [0.1, 0.2, 0.3])]) + >>> array = xr.DataArray([1, 2, 3], coords=[("x", [0.1, 0.2, 0.3])]) >>> magnitude(array, -array) array([1.414214, 2.828427, 4.242641]) @@ -1093,10 +1093,9 @@ def dot(*arrays, dims=None, **kwargs): >>> import numpy as np >>> import xarray as xr - >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) - >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), - ... dims=['a', 'b', 'c']) - >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=['c', 'd']) + >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=["a", "b"]) + >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), dims=["a", "b", "c"]) + >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=["c", "d"]) >>> da_a @@ -1121,18 +1120,18 @@ def dot(*arrays, dims=None, **kwargs): [3, 4, 5]]) Dimensions without coordinates: c, d - >>> xr.dot(da_a, da_b, dims=['a', 'b']) + >>> xr.dot(da_a, da_b, dims=["a", "b"]) array([110, 125]) Dimensions without coordinates: c - >>> xr.dot(da_a, da_b, dims=['a']) + >>> xr.dot(da_a, da_b, dims=["a"]) array([[40, 46], [70, 79]]) Dimensions without coordinates: b, c - >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']) + >>> xr.dot(da_a, da_b, da_c, dims=["b", "c"]) array([[ 9, 14, 19], [ 93, 150, 207], @@ -1238,21 +1237,25 @@ def where(cond, x, y): -------- >>> import xarray as xr >>> import numpy as np - >>> x = xr.DataArray(0.1 * np.arange(10), dims=['lat'], - ... coords={'lat': np.arange(10)}, name='sst') + >>> x = xr.DataArray( + ... 0.1 * np.arange(10), + ... dims=["lat"], + ... coords={"lat": np.arange(10)}, + ... name="sst", + ... ) >>> x array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> xr.where(x < 0.5, x, 100*x) + >>> xr.where(x < 0.5, x, 100 * x) array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> >>> y = xr.DataArray( + >>> y = xr.DataArray( ... 0.1 * np.arange(9).reshape(3, 3), ... dims=["lat", "lon"], ... coords={"lat": np.arange(3), "lon": 10 + np.arange(3)}, @@ -1276,8 +1279,8 @@ def where(cond, x, y): * lat (lat) int64 0 1 2 * lon (lon) int64 10 11 12 - >>> cond = xr.DataArray([True, False], dims=['x']) - >>> x = xr.DataArray([1, 2], dims=['y']) + >>> cond = xr.DataArray([True, False], dims=["x"]) + >>> x = xr.DataArray([1, 2], dims=["y"]) >>> xr.where(cond, x, 0) array([[1, 2], diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6782070da0b..b335eeb293b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -875,8 +875,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": Shallow versus deep copy - >>> array = xr.DataArray([1, 2, 3], dims='x', - ... coords={'x': ['a', 'b', 'c']}) + >>> array = xr.DataArray([1, 2, 3], dims="x", coords={"x": ["a", "b", "c"]}) >>> array.copy() array([1, 2, 3]) @@ -1344,7 +1343,7 @@ def interp( Examples -------- - >>> da = xr.DataArray([1, 3], [('x', np.arange(2))]) + >>> da = xr.DataArray([1, 3], [("x", np.arange(2))]) >>> da.interp(x=0.5) array(2.0) @@ -1476,8 +1475,9 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Examples -------- - >>> arr = xr.DataArray(data=[0, 1], dims="x", - ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + >>> arr = xr.DataArray( + ... data=[0, 1], dims="x", coords={"x": ["a", "b"], "y": ("x", [0, 1])}, + ... ) >>> arr array([0, 1]) @@ -1592,12 +1592,11 @@ def set_index( Examples -------- - >>> arr = xr.DataArray(data=np.ones((2, 3)), - ... dims=['x', 'y'], - ... coords={'x': - ... range(2), 'y': - ... range(3), 'a': ('x', [3, 4]) - ... }) + >>> arr = xr.DataArray( + ... data=np.ones((2, 3)), + ... dims=["x", "y"], + ... coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, + ... ) >>> arr array([[1., 1., 1.], @@ -1606,7 +1605,7 @@ def set_index( * x (x) int64 0 1 * y (y) int64 0 1 2 a (x) int64 3 4 - >>> arr.set_index(x='a') + >>> arr.set_index(x="a") array([[1., 1., 1.], [1., 1., 1.]]) @@ -1721,8 +1720,10 @@ def stack( Examples -------- - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) >>> arr array([[0, 1, 2], @@ -1730,8 +1731,8 @@ def stack( Coordinates: * x (x) |S1 'a' 'b' * y (y) int64 0 1 2 - >>> stacked = arr.stack(z=('x', 'y')) - >>> stacked.indexes['z'] + >>> stacked = arr.stack(z=("x", "y")) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=['x', 'y']) @@ -1771,8 +1772,10 @@ def unstack( Examples -------- - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) >>> arr array([[0, 1, 2], @@ -1780,8 +1783,8 @@ def unstack( Coordinates: * x (x) |S1 'a' 'b' * y (y) int64 0 1 2 - >>> stacked = arr.stack(z=('x', 'y')) - >>> stacked.indexes['z'] + >>> stacked = arr.stack(z=("x", "y")) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=['x', 'y']) @@ -1820,9 +1823,11 @@ def to_unstacked_dataset(self, dim, level=0): Examples -------- >>> import xarray as xr - >>> arr = xr.DataArray(np.arange(6).reshape(2, 3), - ... coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - >>> data = xr.Dataset({'a': arr, 'b': arr.isel(y=0)}) + >>> arr = xr.DataArray( + ... np.arange(6).reshape(2, 3), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... ) + >>> data = xr.Dataset({"a": arr, "b": arr.isel(y=0)}) >>> data Dimensions: (x: 2, y: 3) @@ -1832,12 +1837,12 @@ def to_unstacked_dataset(self, dim, level=0): Data variables: a (x, y) int64 0 1 2 3 4 5 b (x) int64 0 3 - >>> stacked = data.to_stacked_array("z", ['y']) - >>> stacked.indexes['z'] + >>> stacked = data.to_stacked_array("z", ["y"]) + >>> stacked.indexes["z"] MultiIndex(levels=[['a', 'b'], [0, 1, 2]], labels=[[0, 0, 0, 1], [0, 1, 2, -1]], names=['variable', 'y']) - >>> roundtripped = stacked.to_unstacked_dataset(dim='z') + >>> roundtripped = stacked.to_unstacked_dataset(dim="z") >>> data.identical(roundtripped) True @@ -2697,13 +2702,13 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr Examples -------- - >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ['x']) - >>> arr.diff('x') + >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ["x"]) + >>> arr.diff("x") array([0, 1, 0]) Coordinates: * x (x) int64 2 3 4 - >>> arr.diff('x', 2) + >>> arr.diff("x", 2) array([ 1, -1]) Coordinates: @@ -2753,7 +2758,7 @@ def shift( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.shift(x=1) array([ nan, 5., 6.]) @@ -2803,7 +2808,7 @@ def roll( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.roll(x=1) array([7, 5, 6]) @@ -2852,9 +2857,9 @@ def dot( -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) - >>> da = xr.DataArray(da_vals, dims=['x', 'y', 'z']) + >>> da = xr.DataArray(da_vals, dims=["x", "y", "z"]) >>> dm_vals = np.arange(4) - >>> dm = xr.DataArray(dm_vals, dims=['z']) + >>> dm = xr.DataArray(dm_vals, dims=["z"]) >>> dm.dims ('z') @@ -2914,9 +2919,11 @@ def sortby( Examples -------- - >>> da = xr.DataArray(np.random.rand(5), - ... coords=[pd.date_range('1/1/2000', periods=5)], - ... dims='time') + >>> da = xr.DataArray( + ... np.random.rand(5), + ... coords=[pd.date_range("1/1/2000", periods=5)], + ... dims="time", + ... ) >>> da array([ 0.965471, 0.615637, 0.26532 , 0.270962, 0.552878]) @@ -3057,8 +3064,8 @@ def rank( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims='x') - >>> arr.rank('x') + >>> arr = xr.DataArray([5, 6, 7], dims="x") + >>> arr.rank("x") array([ 1., 2., 3.]) Dimensions without coordinates: x @@ -3098,8 +3105,11 @@ def differentiate( Examples -------- - >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], - ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da = xr.DataArray( + ... np.arange(12).reshape(4, 3), + ... dims=["x", "y"], + ... coords={"x": [0, 0.1, 1.1, 1.2]}, + ... ) >>> da array([[ 0, 1, 2], @@ -3110,7 +3120,7 @@ def differentiate( * x (x) float64 0.0 0.1 1.1 1.2 Dimensions without coordinates: y >>> - >>> da.differentiate('x') + >>> da.differentiate("x") array([[30. , 30. , 30. ], [27.545455, 27.545455, 27.545455], @@ -3152,8 +3162,11 @@ def integrate( Examples -------- - >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], - ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da = xr.DataArray( + ... np.arange(12).reshape(4, 3), + ... dims=["x", "y"], + ... coords={"x": [0, 0.1, 1.1, 1.2]}, + ... ) >>> da array([[ 0, 1, 2], @@ -3164,7 +3177,7 @@ def integrate( * x (x) float64 0.0 0.1 1.1 1.2 Dimensions without coordinates: y >>> - >>> da.integrate('x') + >>> da.integrate("x") array([5.4, 6.6, 7.8]) Dimensions without coordinates: y diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 880c574c9cb..d5ad1123a54 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1010,8 +1010,9 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Shallow copy versus deep copy >>> da = xr.DataArray(np.random.randn(2, 3)) - >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}, - ... coords={'x': ['one', 'two']}) + >>> ds = xr.Dataset( + ... {"foo": da, "bar": ("x", [-1, 2])}, coords={"x": ["one", "two"]}, + ... ) >>> ds.copy() Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1023,7 +1024,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": bar (x) int64 -1 2 >>> ds_0 = ds.copy(deep=False) - >>> ds_0['foo'][0, 0] = 7 + >>> ds_0["foo"][0, 0] = 7 >>> ds_0 Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -1048,7 +1049,9 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy(data={'foo': np.arange(6).reshape(2, 3), 'bar': ['a', 'b']}) + >>> ds.copy( + ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} + ... ) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -2358,9 +2361,10 @@ def reindex( >>> x = xr.Dataset( ... { ... "temperature": ("station", 20 * np.random.rand(4)), - ... "pressure": ("station", 500 * np.random.rand(4)) + ... "pressure": ("station", 500 * np.random.rand(4)), ... }, - ... coords={"station": ["boston", "nyc", "seattle", "denver"]}) + ... coords={"station": ["boston", "nyc", "seattle", "denver"]}, + ... ) >>> x Dimensions: (station: 4) @@ -2375,8 +2379,8 @@ def reindex( Create a new index and reindex the dataset. By default values in the new index that do not have corresponding records in the dataset are assigned `NaN`. - >>> new_index = ['boston', 'austin', 'seattle', 'lincoln'] - >>> x.reindex({'station': new_index}) + >>> new_index = ["boston", "austin", "seattle", "lincoln"] + >>> x.reindex({"station": new_index}) Dimensions: (station: 4) Coordinates: @@ -2387,7 +2391,7 @@ def reindex( We can fill in the missing values by passing a value to the keyword `fill_value`. - >>> x.reindex({'station': new_index}, fill_value=0) + >>> x.reindex({"station": new_index}, fill_value=0) Dimensions: (station: 4) Coordinates: @@ -2399,7 +2403,7 @@ def reindex( Because the index is not monotonically increasing or decreasing, we cannot use arguments to the keyword method to fill the `NaN` values. - >>> x.reindex({'station': new_index}, method='nearest') + >>> x.reindex({"station": new_index}, method="nearest") Traceback (most recent call last): ... raise ValueError('index must be monotonic increasing or decreasing') @@ -2410,10 +2414,14 @@ def reindex( >>> x2 = xr.Dataset( ... { - ... "temperature": ("time", [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12]), - ... "pressure": ("time", 500 * np.random.rand(6)) + ... "temperature": ( + ... "time", + ... [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12], + ... ), + ... "pressure": ("time", 500 * np.random.rand(6)), ... }, - ... coords={"time": pd.date_range('01/01/2019', periods=6, freq='D')}) + ... coords={"time": pd.date_range("01/01/2019", periods=6, freq="D")}, + ... ) >>> x2 Dimensions: (time: 6) @@ -2425,8 +2433,8 @@ def reindex( Suppose we decide to expand the dataset to cover a wider date range. - >>> time_index2 = pd.date_range('12/29/2018', periods=10, freq='D') - >>> x2.reindex({'time': time_index2}) + >>> time_index2 = pd.date_range("12/29/2018", periods=10, freq="D") + >>> x2.reindex({"time": time_index2}) Dimensions: (time: 10) Coordinates: @@ -2441,7 +2449,7 @@ def reindex( For example, to back-propagate the last valid value to fill the `NaN` values, pass `bfill` as an argument to the `method` keyword. - >>> x3 = x2.reindex({'time': time_index2}, method='bfill') + >>> x3 = x2.reindex({"time": time_index2}, method="bfill") >>> x3 Dimensions: (time: 10) @@ -2896,8 +2904,10 @@ def swap_dims( Examples -------- - >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, - ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}) + >>> ds = xr.Dataset( + ... data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])}, + ... coords={"x": ["a", "b"], "y": ("x", [0, 1])}, + ... ) >>> ds Dimensions: (x: 2) @@ -3138,13 +3148,12 @@ def set_index( Examples -------- - >>> arr = xr.DataArray(data=np.ones((2, 3)), - ... dims=['x', 'y'], - ... coords={'x': - ... range(2), 'y': - ... range(3), 'a': ('x', [3, 4]) - ... }) - >>> ds = xr.Dataset({'v': arr}) + >>> arr = xr.DataArray( + ... data=np.ones((2, 3)), + ... dims=["x", "y"], + ... coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])}, + ... ) + >>> ds = xr.Dataset({"v": arr}) >>> ds Dimensions: (x: 2, y: 3) @@ -3154,7 +3163,7 @@ def set_index( a (x) int64 3 4 Data variables: v (x, y) float64 1.0 1.0 1.0 1.0 1.0 1.0 - >>> ds.set_index(x='a') + >>> ds.set_index(x="a") Dimensions: (x: 2, y: 3) Coordinates: @@ -3358,9 +3367,11 @@ def to_stacked_array( Examples -------- >>> data = xr.Dataset( - ... data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - ... 'b': ('x', [6, 7])}, - ... coords={'y': ['u', 'v', 'w']} + ... data_vars={ + ... "a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), + ... "b": ("x", [6, 7]), + ... }, + ... coords={"y": ["u", "v", "w"]}, ... ) >>> data @@ -3373,7 +3384,7 @@ def to_stacked_array( a (x, y) int64 0 1 2 3 4 5 b (x) int64 6 7 - >>> data.to_stacked_array("z", sample_dims=['x']) + >>> data.to_stacked_array("z", sample_dims=["x"]) array([[0, 1, 2, 6], [3, 4, 5, 7]]) @@ -3744,9 +3755,9 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): Examples -------- >>> data = np.random.randn(2, 3) - >>> labels = ['a', 'b', 'c'] - >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) - >>> ds.drop_sel(y=['a', 'c']) + >>> labels = ["a", "b", "c"] + >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds.drop_sel(y=["a", "c"]) Dimensions: (x: 2, y: 1) Coordinates: @@ -3754,7 +3765,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): Dimensions without coordinates: x Data variables: A (x, y) float64 -0.3454 0.1734 - >>> ds.drop_sel(y='b') + >>> ds.drop_sel(y="b") Dimensions: (x: 2, y: 2) Coordinates: @@ -3959,9 +3970,10 @@ def fillna(self, value: Any) -> "Dataset": ... "A": ("x", [np.nan, 2, np.nan, 0]), ... "B": ("x", [3, 4, np.nan, 1]), ... "C": ("x", [np.nan, np.nan, np.nan, 5]), - ... "D": ("x", [np.nan, 3, np.nan, 4]) + ... "D": ("x", [np.nan, 3, np.nan, 4]), ... }, - ... coords={"x": [0, 1, 2, 3]}) + ... coords={"x": [0, 1, 2, 3]}, + ... ) >>> ds Dimensions: (x: 4) @@ -3988,7 +4000,7 @@ def fillna(self, value: Any) -> "Dataset": Replace all `NaN` elements in column ‘A’, ‘B’, ‘C’, and ‘D’, with 0, 1, 2, and 3 respectively. - >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> values = {"A": 0, "B": 1, "C": 2, "D": 3} >>> ds.fillna(value=values) Dimensions: (x: 4) @@ -4295,7 +4307,7 @@ def map( Examples -------- >>> da = xr.DataArray(np.random.randn(2, 3)) - >>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])}) + >>> ds = xr.Dataset({"foo": da, "bar": ("x", [-1, 2])}) >>> ds Dimensions: (dim_0: 2, dim_1: 3, x: 2) @@ -4382,7 +4394,10 @@ def assign( >>> import xarray as xr >>> x = xr.Dataset( ... { - ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "temperature_c": ( + ... ("lat", "lon"), + ... 20 * np.random.rand(4).reshape(2, 2), + ... ), ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), ... }, ... coords={"lat": [10, 20], "lon": [150, 160]}, @@ -4399,7 +4414,7 @@ def assign( Where the value is a callable, evaluated on dataset: - >>> x.assign(temperature_f = lambda x: x.temperature_c * 9 / 5 + 32) + >>> x.assign(temperature_f=lambda x: x.temperature_c * 9 / 5 + 32) Dimensions: (lat: 2, lon: 2) Coordinates: @@ -4902,15 +4917,15 @@ def diff(self, dim, n=1, label="upper"): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', [5, 5, 6, 6])}) - >>> ds.diff('x') + >>> ds = xr.Dataset({"foo": ("x", [5, 5, 6, 6])}) + >>> ds.diff("x") Dimensions: (x: 3) Coordinates: * x (x) int64 1 2 3 Data variables: foo (x) int64 0 1 0 - >>> ds.diff('x', 2) + >>> ds.diff("x", 2) Dimensions: (x: 2) Coordinates: @@ -4994,7 +5009,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', list('abcde'))}) + >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.shift(x=2) Dimensions: (x: 5) @@ -5053,7 +5068,7 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): Examples -------- - >>> ds = xr.Dataset({'foo': ('x', list('abcde'))}) + >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.roll(x=2) Dimensions: (x: 5) @@ -5566,19 +5581,23 @@ def filter_by_attrs(self, **kwargs): >>> precip = 10 * np.random.rand(2, 2, 3) >>> lon = [[-99.83, -99.32], [-99.79, -99.23]] >>> lat = [[42.25, 42.21], [42.63, 42.59]] - >>> dims = ['x', 'y', 'time'] - >>> temp_attr = dict(standard_name='air_potential_temperature') - >>> precip_attr = dict(standard_name='convective_precipitation_flux') - >>> ds = xr.Dataset({ - ... 'temperature': (dims, temp, temp_attr), - ... 'precipitation': (dims, precip, precip_attr)}, - ... coords={ - ... 'lon': (['x', 'y'], lon), - ... 'lat': (['x', 'y'], lat), - ... 'time': pd.date_range('2014-09-06', periods=3), - ... 'reference_time': pd.Timestamp('2014-09-05')}) + >>> dims = ["x", "y", "time"] + >>> temp_attr = dict(standard_name="air_potential_temperature") + >>> precip_attr = dict(standard_name="convective_precipitation_flux") + >>> ds = xr.Dataset( + ... { + ... "temperature": (dims, temp, temp_attr), + ... "precipitation": (dims, precip, precip_attr), + ... }, + ... coords={ + ... "lon": (["x", "y"], lon), + ... "lat": (["x", "y"], lat), + ... "time": pd.date_range("2014-09-06", periods=3), + ... "reference_time": pd.Timestamp("2014-09-05"), + ... }, + ... ) >>> # Get variables matching a specific standard_name. - >>> ds.filter_by_attrs(standard_name='convective_precipitation_flux') + >>> ds.filter_by_attrs(standard_name="convective_precipitation_flux") Dimensions: (time: 3, x: 2, y: 2) Coordinates: diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 79abbccea39..e81070d18fd 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -110,8 +110,9 @@ def plot(self): Back in an interactive IPython session: - >>> ds = xarray.Dataset({'longitude': np.linspace(0, 10), - ... 'latitude': np.linspace(0, 20)}) + >>> ds = xarray.Dataset( + ... {"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)} + ... ) >>> ds.geo.center (5.0, 10.0) >>> ds.geo.plot() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 10c7804d718..1d1b8d39a20 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -678,7 +678,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='identical') + >>> xr.merge([x, y, z], compat="identical") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -690,7 +690,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='equals') + >>> xr.merge([x, y, z], compat="equals") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -702,7 +702,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], compat='equals', fill_value=-999.) + >>> xr.merge([x, y, z], compat="equals", fill_value=-999.0) Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -714,7 +714,7 @@ def merge( var2 (lat, lon) float64 5.0 -999.0 6.0 -999.0 ... -999.0 7.0 -999.0 8.0 var3 (time, lon) float64 0.0 -999.0 3.0 4.0 -999.0 9.0 - >>> xr.merge([x, y, z], join='override') + >>> xr.merge([x, y, z], join="override") Dimensions: (lat: 2, lon: 2, time: 2) Coordinates: @@ -726,7 +726,7 @@ def merge( var2 (lat, lon) float64 5.0 6.0 7.0 8.0 var3 (time, lon) float64 0.0 3.0 4.0 9.0 - >>> xr.merge([x, y, z], join='inner') + >>> xr.merge([x, y, z], join="inner") Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: @@ -738,7 +738,7 @@ def merge( var2 (lat, lon) float64 5.0 var3 (time, lon) float64 0.0 4.0 - >>> xr.merge([x, y, z], compat='identical', join='inner') + >>> xr.merge([x, y, z], compat="identical", join="inner") Dimensions: (lat: 1, lon: 1, time: 2) Coordinates: @@ -750,7 +750,7 @@ def merge( var2 (lat, lon) float64 5.0 var3 (time, lon) float64 0.0 4.0 - >>> xr.merge([x, y, z], compat='broadcast_equals', join='outer') + >>> xr.merge([x, y, z], compat="broadcast_equals", join="outer") Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: @@ -762,7 +762,7 @@ def merge( var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - >>> xr.merge([x, y, z], join='exact') + >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... ValueError: indexes along dimension 'lat' are not equal diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index cf189e471cc..5dd8219ebca 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -165,7 +165,7 @@ def _rolling_window(a, window, axis=-1): Examples -------- - >>> x=np.arange(10).reshape((2,5)) + >>> x = np.arange(10).reshape((2, 5)) >>> np.rolling_window(x, 3, axis=-1) array([[[0, 1, 2], [1, 2, 3], [2, 3, 4]], [[5, 6, 7], [6, 7, 8], [7, 8, 9]]]) diff --git a/xarray/core/options.py b/xarray/core/options.py index 15d05159d6d..5d81ca40a6e 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -108,7 +108,7 @@ class set_options: You can use ``set_options`` either as a context manager: - >>> ds = xr.Dataset({'x': np.arange(1000)}) + >>> ds = xr.Dataset({"x": np.arange(1000)}) >>> with xr.set_options(display_width=40): ... print(ds) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index facfa06b23c..8429d0f71ad 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -162,18 +162,19 @@ def map_blocks( ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. - >>> def calculate_anomaly(da, groupby_type='time.month'): + >>> def calculate_anomaly(da, groupby_type="time.month"): ... # Necessary workaround to xarray's check with zero dimensions ... # https://github.com/pydata/xarray/issues/3575 ... if sum(da.shape) == 0: ... return da ... gb = da.groupby(groupby_type) - ... clim = gb.mean(dim='time') + ... clim = gb.mean(dim="time") ... return gb - clim - >>> time = xr.cftime_range('1990-01', '1992-01', freq='M') + >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") >>> np.random.seed(123) - >>> array = xr.DataArray(np.random.rand(len(time)), - ... dims="time", coords=[time]).chunk() + >>> array = xr.DataArray( + ... np.random.rand(len(time)), dims="time", coords=[time] + ... ).chunk() >>> xr.map_blocks(calculate_anomaly, array).compute() array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, @@ -187,7 +188,9 @@ def map_blocks( Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: - >>> xr.map_blocks(calculate_anomaly, array, kwargs={'groupby_type': 'time.year'}) + >>> xr.map_blocks( + ... calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, + ... ) array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 5f633abbde6..58f0b275b21 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -231,17 +231,17 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): Examples -------- - >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) >>> rolling = da.rolling(b=3) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) Dimensions without coordinates: a, b, window_dim >>> rolling = da.rolling(b=3, center=True) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, np.nan]], [[np.nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, np.nan]]]) @@ -279,9 +279,9 @@ def reduce(self, func, **kwargs): Examples -------- - >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=('a', 'b')) + >>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b")) >>> rolling = da.rolling(b=3) - >>> rolling.construct('window_dim') + >>> rolling.construct("window_dim") array([[[np.nan, np.nan, 0], [np.nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[np.nan, np.nan, 4], [np.nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index ac6768e8a9c..6ef63e42291 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -94,8 +94,8 @@ def mean(self): Examples -------- - >>> da = xr.DataArray([1,1,2,2,2], dims='x') - >>> da.rolling_exp(x=2, window_type='span').mean() + >>> da = xr.DataArray([1, 1, 2, 2, 2], dims="x") + >>> da.rolling_exp(x=2, window_type="span").mean() array([1. , 1. , 1.692308, 1.9 , 1.966942]) Dimensions without coordinates: x diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 435edb6f014..01f816941b5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -843,7 +843,7 @@ def copy(self, deep=True, data=None): Shallow copy versus deep copy - >>> var = xr.Variable(data=[1, 2, 3], dims='x') + >>> var = xr.Variable(data=[1, 2, 3], dims="x") >>> var.copy() array([1, 2, 3]) @@ -1844,13 +1844,13 @@ def rolling_window( Examples -------- - >>> v=Variable(('a', 'b'), np.arange(8).reshape((2,4))) - >>> v.rolling_window(x, 'b', 3, 'window_dim') + >>> v = Variable(("a", "b"), np.arange(8).reshape((2, 4))) + >>> v.rolling_window(x, "b", 3, "window_dim") array([[[nan, nan, 0], [nan, 0, 1], [0, 1, 2], [1, 2, 3]], [[nan, nan, 4], [nan, 4, 5], [4, 5, 6], [5, 6, 7]]]) - >>> v.rolling_window(x, 'b', 3, 'window_dim', center=True) + >>> v.rolling_window(x, "b", 3, "window_dim", center=True) array([[[nan, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, nan]], [[nan, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, nan]]]) From 65a5bff79479c4b56d6f733236fe544b7f4120a8 Mon Sep 17 00:00:00 2001 From: Eric Jansen Date: Tue, 17 Mar 2020 17:34:36 +0100 Subject: [PATCH 27/54] Fix recombination in groupby when changing size along the grouped dimension (#3807) * Fix recombination in groupby when changing size along the grouped dimension * cleanup tests * minor test rename * minor fix Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 ++++-- xarray/core/groupby.py | 8 +++++--- xarray/tests/test_groupby.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b78d046148..aad0e083a8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,8 +57,10 @@ Bug fixes - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. - - +- Fix recombination of groups in :py:meth:`Dataset.groupby` and + :py:meth:`DataArray.groupby` when performing an operation that changes the + size of the groups along the grouped dimension. By `Eric Jansen + `_. - Fix use of multi-index with categorical values (:issue:`3674`). By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4223d9dc255..67e8f0588b3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -720,7 +720,7 @@ def assign_coords(self, coords=None, **coords_kwargs): def _maybe_reorder(xarray_obj, dim, positions): order = _inverse_permutation_indices(positions) - if order is None: + if order is None or len(order) != xarray_obj.sizes[dim]: return xarray_obj else: return xarray_obj[{dim: order}] @@ -838,7 +838,8 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): if isinstance(combined, type(self._obj)): # only restore dimension order for arrays combined = self._restore_dim_order(combined) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: if shortcut: coord_var = as_variable(coord) combined._coords[coord.name] = coord_var @@ -954,7 +955,8 @@ def _combine(self, applied): coord, dim, positions = self._infer_concat_args(applied_example) combined = concat(applied, dim) combined = _maybe_reorder(combined, dim, positions) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: combined[coord.name] = coord combined = self._maybe_restore_empty_groups(combined) combined = self._maybe_unstack(combined) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 77558e741be..8ab4b7b2f80 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -107,6 +107,39 @@ def test_groupby_input_mutation(): assert_identical(array, array_copy) # should not modify inputs +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3, 4, 5, 6])}, {"x": [1, 1, 1, 2, 2, 2]}), + ], +) +def test_groupby_map_shrink_groups(obj): + expected = obj.isel(x=[0, 1, 3, 4]) + actual = obj.groupby("x").map(lambda f: f.isel(x=[0, 1])) + assert_identical(expected, actual) + + +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3], [("x", [1, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3])}, {"x": [1, 2, 2]}), + ], +) +def test_groupby_map_change_group_size(obj): + def func(group): + if group.sizes["x"] == 1: + result = group.isel(x=[0, 0]) + else: + result = group.isel(x=[0]) + return result + + expected = obj.isel(x=[0, 0, 1]) + actual = obj.groupby("x").map(func) + assert_identical(expected, actual) + + def test_da_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3 From df614b96082b38966a329b115082cd8dddf9fb29 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 19 Mar 2020 15:29:41 +0100 Subject: [PATCH 28/54] Feature/weighted (#2922) * weighted for DataArray * remove some commented code * pep8 and faulty import tests * add weighted sum, replace 0s in sum_of_wgt * weighted: overhaul tests * weighted: pep8 * weighted: pep8 lines * weighted update docs * weighted: fix typo * weighted: pep8 * undo changes to avoid merge conflict * add weighted to dataarray again * remove super * overhaul core/weighted.py * add DatasetWeighted class * _maybe_get_all_dims return sorted tuple * work on: test_weighted * black and flake8 * Apply suggestions from code review (docs) * restructure interim * restructure classes * update weighted.py * black * use map; add keep_attrs * implement expected_weighted; update tests * add whats new * undo changes to whats-new * F811: noqa where? * api.rst * add to computation * small updates * add example to gallery * typo * another typo * correct docstring in core/common.py * typos * adjust review * clean tests * add test nonequal coords * comment on use of dot * fix erroneous merge * update tests * move example to notebook * move whats-new entry to 15.1 * some doc updates * dot to own function * simplify some tests * Doc updates * very minor changes. * fix & add references * doc: return 0/NaN on 0 weights * Update xarray/core/common.py Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/api.rst | 18 ++ doc/computation.rst | 86 ++++- doc/examples.rst | 1 + doc/examples/area_weighted_temperature.ipynb | 226 ++++++++++++++ doc/whats-new.rst | 3 + xarray/core/common.py | 19 ++ xarray/core/dataarray.py | 2 + xarray/core/dataset.py | 2 + xarray/core/weighted.py | 255 +++++++++++++++ xarray/tests/test_weighted.py | 311 +++++++++++++++++++ 10 files changed, 922 insertions(+), 1 deletion(-) create mode 100644 doc/examples/area_weighted_temperature.ipynb create mode 100644 xarray/core/weighted.py create mode 100644 xarray/tests/test_weighted.py diff --git a/doc/api.rst b/doc/api.rst index 4492d882355..43a9cf53ead 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -165,6 +165,7 @@ Computation Dataset.groupby_bins Dataset.rolling Dataset.rolling_exp + Dataset.weighted Dataset.coarsen Dataset.resample Dataset.diff @@ -340,6 +341,7 @@ Computation DataArray.groupby_bins DataArray.rolling DataArray.rolling_exp + DataArray.weighted DataArray.coarsen DataArray.dt DataArray.resample @@ -577,6 +579,22 @@ Rolling objects core.rolling.DatasetRolling.reduce core.rolling_exp.RollingExp +Weighted objects +================ + +.. autosummary:: + :toctree: generated/ + + core.weighted.DataArrayWeighted + core.weighted.DataArrayWeighted.mean + core.weighted.DataArrayWeighted.sum + core.weighted.DataArrayWeighted.sum_of_weights + core.weighted.DatasetWeighted + core.weighted.DatasetWeighted.mean + core.weighted.DatasetWeighted.sum + core.weighted.DatasetWeighted.sum_of_weights + + Coarsen objects =============== diff --git a/doc/computation.rst b/doc/computation.rst index 1ac30f55ee7..5309f27e9b6 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -1,3 +1,5 @@ +.. currentmodule:: xarray + .. _comput: ########### @@ -241,12 +243,94 @@ You can also use ``construct`` to compute a weighted rolling sum: To avoid this, use ``skipna=False`` as the above example. +.. _comput.weighted: + +Weighted array reductions +========================= + +:py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted` +and :py:meth:`Dataset.weighted` array reduction methods. They currently +support weighted ``sum`` and weighted ``mean``. + +.. ipython:: python + + coords = dict(month=('month', [1, 2, 3])) + + prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + +Create a weighted object: + +.. ipython:: python + + weighted_prec = prec.weighted(weights) + weighted_prec + +Calculate the weighted sum: + +.. ipython:: python + + weighted_prec.sum() + +Calculate the weighted mean: + +.. ipython:: python + + weighted_prec.mean(dim="month") + +The weighted sum corresponds to: + +.. ipython:: python + + weighted_sum = (prec * weights).sum() + weighted_sum + +and the weighted mean to: + +.. ipython:: python + + weighted_mean = weighted_sum / weights.sum() + weighted_mean + +However, the functions also take missing values in the data into account: + +.. ipython:: python + + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) + + data.weighted(weights).mean() + +Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result +in 0.6. + + +If the weights add up to to 0, ``sum`` returns 0: + +.. ipython:: python + + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) + + data.weighted(weights).sum() + +and ``mean`` returns ``NaN``: + +.. ipython:: python + + data.weighted(weights).mean() + + +.. note:: + ``weights`` must be a :py:class:`DataArray` and cannot contain missing values. + Missing values can be replaced manually by ``weights.fillna(0)``. + .. _comput.coarsen: Coarsen large arrays ==================== -``DataArray`` and ``Dataset`` objects include a +:py:class:`DataArray` and :py:class:`Dataset` objects include a :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` methods. This supports the block aggregation along multiple dimensions, diff --git a/doc/examples.rst b/doc/examples.rst index 805395808e0..1d48d29bcc5 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -6,6 +6,7 @@ Examples examples/weather-data examples/monthly-means + examples/area_weighted_temperature examples/multidimensional-coords examples/visualization_gallery examples/ROMS_ocean_model diff --git a/doc/examples/area_weighted_temperature.ipynb b/doc/examples/area_weighted_temperature.ipynb new file mode 100644 index 00000000000..72876e3fc29 --- /dev/null +++ b/doc/examples/area_weighted_temperature.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compare weighted and unweighted mean temperature\n", + "\n", + "\n", + "Author: [Mathias Hauser](https://github.com/mathause/)\n", + "\n", + "\n", + "We use the `air_temperature` example dataset to calculate the area-weighted temperature over its domain. This dataset has a regular latitude/ longitude grid, thus the gridcell area decreases towards the pole. For this grid we can use the cosine of the latitude as proxy for the grid cell area.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:57.222351Z", + "start_time": "2020-03-17T14:43:56.147541Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data\n", + "\n", + "Load the data, convert to celsius, and resample to daily values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:57.831734Z", + "start_time": "2020-03-17T14:43:57.651845Z" + } + }, + "outputs": [], + "source": [ + "ds = xr.tutorial.load_dataset(\"air_temperature\")\n", + "\n", + "# to celsius\n", + "air = ds.air - 273.15\n", + "\n", + "# resample from 6-hourly to daily values\n", + "air = air.resample(time=\"D\").mean()\n", + "\n", + "air" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the first timestep:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:43:59.887120Z", + "start_time": "2020-03-17T14:43:59.582894Z" + } + }, + "outputs": [], + "source": [ + "projection = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)\n", + "\n", + "f, ax = plt.subplots(subplot_kw=dict(projection=projection))\n", + "\n", + "air.isel(time=0).plot(transform=ccrs.PlateCarree(), cbar_kwargs=dict(shrink=0.7))\n", + "ax.coastlines()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating weights\n", + "\n", + "For a for a rectangular grid the cosine of the latitude is proportional to the grid cell area." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:18.777092Z", + "start_time": "2020-03-17T14:44:18.736587Z" + } + }, + "outputs": [], + "source": [ + "weights = np.cos(np.deg2rad(air.lat))\n", + "weights.name = \"weights\"\n", + "weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Weighted mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:52.607120Z", + "start_time": "2020-03-17T14:44:52.564674Z" + } + }, + "outputs": [], + "source": [ + "air_weighted = air.weighted(weights)\n", + "air_weighted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:44:54.334279Z", + "start_time": "2020-03-17T14:44:54.280022Z" + } + }, + "outputs": [], + "source": [ + "weighted_mean = air_weighted.mean((\"lon\", \"lat\"))\n", + "weighted_mean" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot: comparison with unweighted mean\n", + "\n", + "Note how the weighted mean temperature is higher than the unweighted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-17T14:45:08.877307Z", + "start_time": "2020-03-17T14:45:08.673383Z" + } + }, + "outputs": [], + "source": [ + "weighted_mean.plot(label=\"weighted\")\n", + "air.mean((\"lon\", \"lat\")).plot(label=\"unweighted\")\n", + "\n", + "plt.legend()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aad0e083a8c..5640e872bea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` + and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`). + By `Mathias Hauser `_ - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. diff --git a/xarray/core/common.py b/xarray/core/common.py index 39aa7982091..a003642076f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -745,6 +745,25 @@ def groupby_bins( }, ) + def weighted(self, weights): + """ + Weighted operations. + + Parameters + ---------- + weights : DataArray + An array of weights associated with the values in this Dataset. + Each value in the data contributes to the reduction operation + according to its associated weight. + + Notes + ----- + ``weights`` must be a DataArray and cannot contain missing values. + Missing values can be replaced by ``weights.fillna(0)``. + """ + + return self._weighted_cls(self, weights) + def rolling( self, dim: Mapping[Hashable, int] = None, diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b335eeb293b..4b3ecb2744c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,6 +33,7 @@ resample, rolling, utils, + weighted, ) from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor @@ -258,6 +259,7 @@ class DataArray(AbstractArray, DataWithCoords): _rolling_cls = rolling.DataArrayRolling _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample + _weighted_cls = weighted.DataArrayWeighted dt = property(CombinedDatetimelikeAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d5ad1123a54..c10447f6d11 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -46,6 +46,7 @@ resample, rolling, utils, + weighted, ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .common import ( @@ -457,6 +458,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _rolling_cls = rolling.DatasetRolling _coarsen_cls = rolling.DatasetCoarsen _resample_cls = resample.DatasetResample + _weighted_cls = weighted.DatasetWeighted def __init__( self, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py new file mode 100644 index 00000000000..996d2e4c43e --- /dev/null +++ b/xarray/core/weighted.py @@ -0,0 +1,255 @@ +from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload + +from .computation import dot +from .options import _get_keep_attrs + +if TYPE_CHECKING: + from .dataarray import DataArray, Dataset + +_WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ + Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to apply the weighted ``{fcn}``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + + Returns + ------- + reduced : {cls} + New {cls} object with weighted ``{fcn}`` applied to its data and + the indicated dimension(s) removed. + + Notes + ----- + Returns {on_zero} if the ``weights`` sum to 0.0 along the reduced + dimension(s). + """ + +_SUM_OF_WEIGHTS_DOCSTRING = """ + Calculate the sum of weights, accounting for missing values in the data + + Parameters + ---------- + dim : str or sequence of str, optional + Dimension(s) over which to sum the weights. + keep_attrs : bool, optional + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + + Returns + ------- + reduced : {cls} + New {cls} object with the sum of the weights over the given dimension. + """ + + +class Weighted: + """An object that implements weighted operations. + + You should create a Weighted object by using the ``DataArray.weighted`` or + ``Dataset.weighted`` methods. + + See Also + -------- + Dataset.weighted + DataArray.weighted + """ + + __slots__ = ("obj", "weights") + + @overload + def __init__(self, obj: "DataArray", weights: "DataArray") -> None: + ... + + @overload # noqa: F811 + def __init__(self, obj: "Dataset", weights: "DataArray") -> None: # noqa: F811 + ... + + def __init__(self, obj, weights): # noqa: F811 + """ + Create a Weighted object + + Parameters + ---------- + obj : DataArray or Dataset + Object over which the weighted reduction operation is applied. + weights : DataArray + An array of weights associated with the values in the obj. + Each value in the obj contributes to the reduction operation + according to its associated weight. + + Notes + ----- + ``weights`` must be a ``DataArray`` and cannot contain missing values. + Missing values can be replaced by ``weights.fillna(0)``. + """ + + from .dataarray import DataArray + + if not isinstance(weights, DataArray): + raise ValueError("`weights` must be a DataArray") + + if weights.isnull().any(): + raise ValueError( + "`weights` cannot contain missing values. " + "Missing values can be replaced by `weights.fillna(0)`." + ) + + self.obj = obj + self.weights = weights + + @staticmethod + def _reduce( + da: "DataArray", + weights: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """reduce using dot; equivalent to (da * weights).sum(dim, skipna) + + for internal use only + """ + + # need to infer dims as we use `dot` + if dim is None: + dim = ... + + # need to mask invalid values in da, as `dot` does not implement skipna + if skipna or (skipna is None and da.dtype.kind in "cfO"): + da = da.fillna(0.0) + + # `dot` does not broadcast arrays, so this avoids creating a large + # DataArray (if `weights` has additional dimensions) + # maybe add fasttrack (`(da * weights).sum(dims=dim, skipna=skipna)`) + return dot(da, weights, dims=dim) + + def _sum_of_weights( + self, da: "DataArray", dim: Optional[Union[Hashable, Iterable[Hashable]]] = None + ) -> "DataArray": + """ Calculate the sum of weights, accounting for missing values """ + + # we need to mask data values that are nan; else the weights are wrong + mask = da.notnull() + + sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False) + + # 0-weights are not valid + valid_weights = sum_of_weights != 0.0 + + return sum_of_weights.where(valid_weights) + + def _weighted_sum( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a by a weighted ``sum`` along some dimension(s).""" + + return self._reduce(da, self.weights, dim=dim, skipna=skipna) + + def _weighted_mean( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a weighted ``mean`` along some dimension(s).""" + + weighted_sum = self._weighted_sum(da, dim=dim, skipna=skipna) + + sum_of_weights = self._sum_of_weights(da, dim=dim) + + return weighted_sum / sum_of_weights + + def _implementation(self, func, dim, **kwargs): + + raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`") + + def sum_of_weights( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._sum_of_weights, dim=dim, keep_attrs=keep_attrs + ) + + def sum( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._weighted_sum, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + + def mean( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> Union["DataArray", "Dataset"]: + + return self._implementation( + self._weighted_mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + + def __repr__(self): + """provide a nice str repr of our Weighted object""" + + klass = self.__class__.__name__ + weight_dims = ", ".join(self.weights.dims) + return f"{klass} with weights along dimensions: {weight_dims}" + + +class DataArrayWeighted(Weighted): + def _implementation(self, func, dim, **kwargs): + + keep_attrs = kwargs.pop("keep_attrs") + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + + weighted = func(self.obj, dim=dim, **kwargs) + + if keep_attrs: + weighted.attrs = self.obj.attrs + + return weighted + + +class DatasetWeighted(Weighted): + def _implementation(self, func, dim, **kwargs) -> "Dataset": + + return self.obj.map(func, dim=dim, **kwargs) + + +def _inject_docstring(cls, cls_name): + + cls.sum_of_weights.__doc__ = _SUM_OF_WEIGHTS_DOCSTRING.format(cls=cls_name) + + cls.sum.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="sum", on_zero="0" + ) + + cls.mean.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="mean", on_zero="NaN" + ) + + +_inject_docstring(DataArrayWeighted, "DataArray") +_inject_docstring(DatasetWeighted, "Dataset") diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py new file mode 100644 index 00000000000..24531215dfb --- /dev/null +++ b/xarray/tests/test_weighted.py @@ -0,0 +1,311 @@ +import numpy as np +import pytest + +import xarray as xr +from xarray import DataArray +from xarray.tests import assert_allclose, assert_equal, raises_regex + + +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_non_DataArray_weights(as_dataset): + + data = DataArray([1, 2]) + if as_dataset: + data = data.to_dataset(name="data") + + with raises_regex(ValueError, "`weights` must be a DataArray"): + data.weighted([1, 2]) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) +def test_weighted_weights_nan_raises(as_dataset, weights): + + data = DataArray([1, 2]) + if as_dataset: + data = data.to_dataset(name="data") + + with pytest.raises(ValueError, match="`weights` cannot contain missing values."): + data.weighted(DataArray(weights)) + + +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 3), ([2, 0], 2), ([0, 0], np.nan), ([-1, 1], np.nan)), +) +def test_weighted_sum_of_weights_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_weights() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 2), ([2, 0], np.nan), ([0, 0], np.nan), ([-1, 1], 1)), +) +def test_weighted_sum_of_weights_nan(weights, expected): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_weights() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) +@pytest.mark.parametrize("factor", [0, 1, 3.14]) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_sum_equal_weights(da, factor, skipna): + # if all weights are 'f'; weighted sum is f times the ordinary sum + + da = DataArray(da) + weights = xr.full_like(da, factor) + + expected = da.sum(skipna=skipna) * factor + result = da.weighted(weights).sum(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([1, 2], 5), ([0, 2], 4), ([0, 0], 0)) +) +def test_weighted_sum_no_nan(weights, expected): + + da = DataArray([1, 2]) + + weights = DataArray(weights) + result = da.weighted(weights).sum() + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([1, 2], 4), ([0, 2], 4), ([1, 0], 0), ([0, 0], 0)) +) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_sum_nan(weights, expected, skipna): + + da = DataArray([np.nan, 2]) + + weights = DataArray(weights) + result = da.weighted(weights).sum(skipna=skipna) + + if skipna: + expected = DataArray(expected) + else: + expected = DataArray(np.nan) + + assert_equal(expected, result) + + +@pytest.mark.filterwarnings("ignore:Mean of empty slice") +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan])) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 2, 3.14]) +def test_weighted_mean_equal_weights(da, skipna, factor): + # if all weights are equal (!= 0), should yield the same result as mean + + da = DataArray(da) + + # all weights as 1. + weights = xr.full_like(da, factor) + + expected = da.mean(skipna=skipna) + result = da.weighted(weights).mean(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 1.6), ([1, 0], 1.0), ([0, 0], np.nan)) +) +def test_weighted_mean_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).mean() + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 2.0), ([1, 0], np.nan), ([0, 0], np.nan)) +) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_mean_nan(weights, expected, skipna): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + + if skipna: + expected = DataArray(expected) + else: + expected = DataArray(np.nan) + + result = da.weighted(weights).mean(skipna=skipna) + + assert_equal(expected, result) + + +def expected_weighted(da, weights, dim, skipna, operation): + """ + Generate expected result using ``*`` and ``sum``. This is checked against + the result of da.weighted which uses ``dot`` + """ + + weighted_sum = (da * weights).sum(dim=dim, skipna=skipna) + + if operation == "sum": + return weighted_sum + + masked_weights = weights.where(da.notnull()) + sum_of_weights = masked_weights.sum(dim=dim, skipna=True) + valid_weights = sum_of_weights != 0 + sum_of_weights = sum_of_weights.where(valid_weights) + + if operation == "sum_of_weights": + return sum_of_weights + + weighted_mean = weighted_sum / sum_of_weights + + if operation == "mean": + return weighted_mean + + +@pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("add_nans", (True, False)) +@pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_3D(dim, operation, add_nans, skipna, as_dataset): + + dims = ("a", "b", "c") + coords = dict(a=[0, 1, 2, 3], b=[0, 1, 2, 3], c=[0, 1, 2, 3]) + + weights = DataArray(np.random.randn(4, 4, 4), dims=dims, coords=coords) + + data = np.random.randn(4, 4, 4) + + # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) + if add_nans: + c = int(data.size * 0.25) + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + + data = DataArray(data, dims=dims, coords=coords) + + if as_dataset: + data = data.to_dataset(name="data") + + if operation == "sum_of_weights": + result = data.weighted(weights).sum_of_weights(dim) + else: + result = getattr(data.weighted(weights), operation)(dim, skipna=skipna) + + expected = expected_weighted(data, weights, dim, skipna, operation) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_nonequal_coords(operation, as_dataset): + + weights = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[0, 1, 2, 3])) + data = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[1, 2, 3, 4])) + + if as_dataset: + data = data.to_dataset(name="data") + + expected = expected_weighted( + data, weights, dim="a", skipna=None, operation=operation + ) + result = getattr(data.weighted(weights), operation)(dim="a") + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("dim", ("dim_0", None)) +@pytest.mark.parametrize("shape_data", ((4,), (4, 4), (4, 4, 4))) +@pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4))) +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("add_nans", (True, False)) +@pytest.mark.parametrize("skipna", (None, True, False)) +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_operations_different_shapes( + dim, shape_data, shape_weights, operation, add_nans, skipna, as_dataset +): + + weights = DataArray(np.random.randn(*shape_weights)) + + data = np.random.randn(*shape_data) + + # add approximately 25 % NaNs + if add_nans: + c = int(data.size * 0.25) + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + + data = DataArray(data) + + if as_dataset: + data = data.to_dataset(name="data") + + if operation == "sum_of_weights": + result = getattr(data.weighted(weights), operation)(dim) + else: + result = getattr(data.weighted(weights), operation)(dim, skipna=skipna) + + expected = expected_weighted(data, weights, dim, skipna, operation) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("keep_attrs", (True, False, None)) +def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): + + weights = DataArray(np.random.randn(2, 2), attrs=dict(attr="weights")) + data = DataArray(np.random.randn(2, 2)) + + if as_dataset: + data = data.to_dataset(name="data") + + data.attrs = dict(attr="weights") + + result = getattr(data.weighted(weights), operation)(keep_attrs=True) + + if operation == "sum_of_weights": + assert weights.attrs == result.attrs + else: + assert data.attrs == result.attrs + + result = getattr(data.weighted(weights), operation)(keep_attrs=None) + assert not result.attrs + + result = getattr(data.weighted(weights), operation)(keep_attrs=False) + assert not result.attrs + + +@pytest.mark.xfail(reason="xr.Dataset.map does not copy attrs of DataArrays GH: 3595") +@pytest.mark.parametrize("operation", ("sum", "mean")) +def test_weighted_operations_keep_attr_da_in_ds(operation): + # GH #3595 + + weights = DataArray(np.random.randn(2, 2)) + data = DataArray(np.random.randn(2, 2), attrs=dict(attr="data")) + data = data.to_dataset(name="a") + + result = getattr(data.weighted(weights), operation)(keep_attrs=True) + + assert data.a.attrs == result.a.attrs From beea37e90ac9d6410ae696dec4d6b052bdb05ba7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 19 Mar 2020 08:32:39 -0600 Subject: [PATCH 29/54] Fix some warnings (#3864) * Fix some warnings * Update xarray/backends/api.py Co-Authored-By: keewis * fix test Co-authored-by: keewis --- xarray/backends/api.py | 2 +- xarray/tests/test_accessor_dt.py | 1 + xarray/tests/test_backends.py | 38 ++++++++++++++++------------- xarray/tests/test_concat.py | 3 +-- xarray/tests/test_dask.py | 1 + xarray/tests/test_dataarray.py | 4 +-- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_duck_array_ops.py | 1 + xarray/tests/test_groupby.py | 3 ++- xarray/tests/test_plot.py | 1 + 10 files changed, 32 insertions(+), 24 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e828faabc27..c7481e22b59 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1253,7 +1253,7 @@ def check_dtype(var): if ( not np.issubdtype(var.dtype, np.number) and not np.issubdtype(var.dtype, np.datetime64) - and not np.issubdtype(var.dtype, np.bool) + and not np.issubdtype(var.dtype, np.bool_) and not coding.strings.is_unicode_dtype(var.dtype) and not var.dtype == object ): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 1a8a2732eeb..20a9283e32c 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -347,6 +347,7 @@ def test_field_access(data, field): @requires_cftime +@pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_cftime_strftime_access(data): """ compare cftime formatting against datetime formatting """ date_format = "%Y%m%d%H" diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59ed8e690cc..5f8ba83c330 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1921,33 +1921,36 @@ def test_to_zarr_append_compute_false_roundtrip(self): ds, ds_to_append, _ = create_append_test_data() ds, ds_to_append = ds.chunk(), ds_to_append.chunk() - with self.create_zarr_target() as store: - delayed_obj = self.save(ds, store, compute=False, mode="w") - assert isinstance(delayed_obj, Delayed) + with pytest.warns(SerializationWarning): + with self.create_zarr_target() as store: + delayed_obj = self.save(ds, store, compute=False, mode="w") + assert isinstance(delayed_obj, Delayed) + + with pytest.raises(AssertionError): + with self.open(store) as actual: + assert_identical(ds, actual) + + delayed_obj.compute() - with pytest.raises(AssertionError): with self.open(store) as actual: assert_identical(ds, actual) - delayed_obj.compute() + delayed_obj = self.save( + ds_to_append, store, compute=False, append_dim="time" + ) + assert isinstance(delayed_obj, Delayed) - with self.open(store) as actual: - assert_identical(ds, actual) + with pytest.raises(AssertionError): + with self.open(store) as actual: + assert_identical( + xr.concat([ds, ds_to_append], dim="time"), actual + ) - delayed_obj = self.save( - ds_to_append, store, compute=False, append_dim="time" - ) - assert isinstance(delayed_obj, Delayed) + delayed_obj.compute() - with pytest.raises(AssertionError): with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) - delayed_obj.compute() - - with self.open(store) as actual: - assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) - def test_encoding_chunksizes(self): # regression test for GH2278 # see also test_encoding_chunksizes_unlimited @@ -3519,6 +3522,7 @@ def test_uamiv_format_mfread(self): ["example.uamiv", "example.uamiv"], engine="pseudonetcdf", concat_dim="TSTEP", + combine="nested", backend_kwargs={"format": "uamiv"}, ) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 77c030198ac..1a498496c03 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -40,8 +40,7 @@ def test_concat_compat(): assert_equal(ds2.no_x_y, result.no_x_y.transpose()) for var in ["has_x", "no_x_y"]: - assert "y" not in result[var] - + assert "y" not in result[var].dims and "y" not in result[var].coords with raises_regex(ValueError, "coordinates in some datasets but not others"): concat([ds1, ds2], dim="q") with raises_regex(ValueError, "'q' is not present in all datasets"): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 8fb54c4ee84..4f7e3910f82 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1344,6 +1344,7 @@ def test_normalize_token_with_backend(map_ds): map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) + read.close() @pytest.mark.parametrize( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index dfaf8fd4e28..ef3da5a3b94 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2035,7 +2035,7 @@ def test_stack_unstack(self): codes=[[], []], names=["x", "y"], ) - pd.util.testing.assert_index_equal(a, b) + pd.testing.assert_index_equal(a, b) actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) @@ -3488,7 +3488,7 @@ def test_from_series_sparse(self): def test_to_and_from_empty_series(self): # GH697 - expected = pd.Series([]) + expected = pd.Series([], dtype=np.float64) da = DataArray.from_series(expected) assert len(da) == 0 actual = da.to_series() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6a6c496591a..c7f39108477 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6042,7 +6042,7 @@ def test_integrate(dask): actual = da.integrate("x") # coordinate that contains x should be dropped. expected_x = xr.DataArray( - np.trapz(da, da["x"], axis=0), + np.trapz(da.compute(), da["x"], axis=0), dims=["y"], coords={k: v for k, v in da.coords.items() if "x" not in v.dims}, ) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index f4f11473e48..157cd16cba6 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -279,6 +279,7 @@ def assert_dask_array(da, dask): @arm_xfail +@pytest.mark.filterwarnings("ignore::RuntimeWarning") @pytest.mark.parametrize("dask", [False, True] if has_dask else [False]) def test_datetime_mean(dask): # Note: only testing numpy, as dask is broken upstream diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8ab4b7b2f80..866d5fb0899 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -447,7 +447,8 @@ def test_groupby_drops_nans(): # reduction operation along a different dimension actual = grouped.mean("time") - expected = ds.mean("time").where(ds.id.notnull()) + with pytest.warns(RuntimeWarning): # mean of empty slice + expected = ds.mean("time").where(ds.id.notnull()) assert_identical(actual, expected) # NaN in non-dimensional coordinate diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 9ffbcd9c85e..c1549c62038 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1749,6 +1749,7 @@ def test_can_set_vmin_vmax(self): assert np.allclose(expected, clim) @pytest.mark.slow + @pytest.mark.filterwarnings("ignore") def test_can_set_norm(self): norm = mpl.colors.SymLogNorm(0.1) self.g.map_dataarray(xplt.imshow, "x", "y", norm=norm) From e7d6e12662ae113a57eaf38eb2a19ab9ff92b9a8 Mon Sep 17 00:00:00 2001 From: Mark Boer Date: Thu, 19 Mar 2020 15:41:49 +0100 Subject: [PATCH 30/54] Add DataArray.pad, Dataset.pad, Variable.pad (#3596) * add pad method to Variable and add corresponding test * move pad_with_fill value to dask_array_compat.py and make it default to dask.array.pad * add pad method to dataarray * add docstrings for variable.pad and dataarray.pad * add tests for DataArray.pad * improve pad method signature and support dictionaries as pad_options instead of list of tuples * fix linting errors and remove typo from tests * implement suggested changes: pad_width => padwidths, use pytest.mark.parametrize in test_variable.test_pad * move pad method to dataset * add helper function to variable.pad and fix some mypy errors * add some more tests for DataArray.pad and add docstrings to all pad methods * add workaround for dask.pad mode=mean that converts integers to floats, and add an additional check if the shape of output * disable linear_ramp test and add pad to whats-new.rst and api.rst * fix small mege issue in test_unit * fix DataArray.pad and Dataset.pad docstrings * implement suggested changes from code review: add option of integer pad_width, add a warning and exception to dask_array_compad.pad * apply isort and and set linear_ramp to xfail * Minor fixes. 1. Add warning category 2. Use variable for pad arguments when testing 3. Add example. * fix merge issue and make some minor changes as suggested in the code review * fix test_unit.test_pad_constant_values * Keewis review comments * Add experimental warning Co-authored-by: dcherian --- doc/api-hidden.rst | 2 - doc/api.rst | 2 + doc/whats-new.rst | 2 + xarray/core/dask_array_compat.py | 47 +++++++++ xarray/core/dataarray.py | 168 +++++++++++++++++++++++++++++++ xarray/core/dataset.py | 166 ++++++++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 2 +- xarray/core/rolling.py | 2 +- xarray/core/variable.py | 161 ++++++++++++++++++----------- xarray/tests/test_dataarray.py | 107 ++++++++++++++++++++ xarray/tests/test_dataset.py | 13 +++ xarray/tests/test_sparse.py | 2 +- xarray/tests/test_units.py | 60 +++++------ xarray/tests/test_variable.py | 107 +++++++++++++++----- 14 files changed, 717 insertions(+), 124 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 437f53b1a91..cc9517a98ba 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -379,7 +379,6 @@ Variable.min Variable.no_conflicts Variable.notnull - Variable.pad_with_fill_value Variable.prod Variable.quantile Variable.rank @@ -453,7 +452,6 @@ IndexVariable.min IndexVariable.no_conflicts IndexVariable.notnull - IndexVariable.pad_with_fill_value IndexVariable.prod IndexVariable.quantile IndexVariable.rank diff --git a/doc/api.rst b/doc/api.rst index 43a9cf53ead..b9c3e3bdd33 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -221,6 +221,7 @@ Reshaping and reorganizing Dataset.to_stacked_array Dataset.shift Dataset.roll + Dataset.pad Dataset.sortby Dataset.broadcast_like @@ -401,6 +402,7 @@ Reshaping and reorganizing DataArray.to_unstacked_dataset DataArray.shift DataArray.roll + DataArray.pad DataArray.sortby DataArray.broadcast_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5640e872bea..8140288f350 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -157,6 +157,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`, :pull:`3596`). + By `Mark Boer `_. - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`) By `Keisuke Fujii `_. - Support using an existing, opened h5netcdf ``File`` with diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 05f750a1355..94c50d90e84 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,3 +1,4 @@ +import warnings from distutils.version import LooseVersion from typing import Iterable @@ -99,6 +100,52 @@ def meta_from_array(x, ndim=None, dtype=None): return meta +def _validate_pad_output_shape(input_shape, pad_width, output_shape): + """ Validates the output shape of dask.array.pad, raising a RuntimeError if they do not match. + In the current versions of dask (2.2/2.4), dask.array.pad with mode='reflect' sometimes returns + an invalid shape. + """ + isint = lambda i: isinstance(i, int) + + if isint(pad_width): + pass + elif len(pad_width) == 2 and all(map(isint, pad_width)): + pad_width = sum(pad_width) + elif ( + len(pad_width) == len(input_shape) + and all(map(lambda x: len(x) == 2, pad_width)) + and all((isint(i) for p in pad_width for i in p)) + ): + pad_width = np.sum(pad_width, axis=1) + else: + # unreachable: dask.array.pad should already have thrown an error + raise ValueError("Invalid value for `pad_width`") + + if not np.array_equal(np.array(input_shape) + pad_width, output_shape): + raise RuntimeError( + "There seems to be something wrong with the shape of the output of dask.array.pad, " + "try upgrading Dask, use a different pad mode e.g. mode='constant' or first convert " + "your DataArray/Dataset to one backed by a numpy array by calling the `compute()` method." + "See: https://github.com/dask/dask/issues/5303" + ) + + +def pad(array, pad_width, mode="constant", **kwargs): + padded = da.pad(array, pad_width, mode=mode, **kwargs) + # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 + if mode == "mean" and issubclass(array.dtype.type, np.integer): + warnings.warn( + 'dask.array.pad(mode="mean") converts integers to floats. xarray converts ' + "these floats back to integers to keep the interface consistent. There is a chance that " + "this introduces rounding errors. If you wish to keep the values as floats, first change " + "the dtype to a float before calling pad.", + UserWarning, + ) + return da.round(padded).astype(array.dtype) + _validate_pad_output_shape(array.shape, pad_width, padded.shape) + return padded + + if LooseVersion(dask_version) >= LooseVersion("2.8.1"): median = da.median else: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4b3ecb2744c..bd956553929 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3260,6 +3260,174 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, + ) -> "DataArray": + """Pad this array along one or more dimensions. + + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode : str + One of the following string values (taken from numpy docs) + + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. + ((before, after),) yields same before and after statistic lengths + for each dimension. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. + ``((before, after),)`` yields same before and after constants for each + dimension. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all dimensions. + Default is 0. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. + + Returns + ------- + padded : DataArray + DataArray with the padded coordinates and data. + + See also + -------- + DataArray.shift, DataArray.roll, DataArray.bfill, DataArray.ffill, numpy.pad, dask.array.pad + + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + + Examples + -------- + + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) + >>> arr.pad(x=(1,2), constant_values=0) + + array([0, 5, 6, 7, 0, 0]) + Coordinates: + * x (x) float64 nan 0.0 1.0 2.0 nan nan + + >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], + dims=["x", "y"], + coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} + ) + >>> da.pad(x=1) + + array([[nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan + >>> da.pad(x=1, constant_values=np.nan) + + array([[-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808], + [ 0, 1, 2, + 3], + [ 10, 11, 12, + 13], + [-9223372036854775808, -9223372036854775808, -9223372036854775808, + -9223372036854775808]]) + Coordinates: + * x (x) float64 nan 0.0 1.0 nan + * y (y) int64 10 20 30 40 + z (x) float64 nan 100.0 200.0 nan + """ + ds = self._to_temp_dataset().pad( + pad_width=pad_width, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, + **pad_width_kwargs, + ) + return self._from_temp_dataset(ds) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c10447f6d11..7c218e209cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5745,5 +5745,171 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, + ) -> "Dataset": + """Pad this dataset along one or more dimensions. + + .. warning:: + This function is experimental and its behaviour is likely to change + especially regarding padding of dimension coordinates (or IndexVariables). + + When using one of the modes ("edge", "reflect", "symmetric", "wrap"), + coordinates will be padded with the same mode, otherwise coordinates + are padded using the "constant" mode with fill_value dtypes.NA. + + Parameters + ---------- + pad_width : Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode : str + One of the following string values (taken from numpy docs). + + 'constant' (default) + Pads with a constant value. + 'edge' + Pads with the edge values of array. + 'linear_ramp' + Pads with the linear ramp between end_value and the + array edge value. + 'maximum' + Pads with the maximum value of all or part of the + vector along each axis. + 'mean' + Pads with the mean value of all or part of the + vector along each axis. + 'median' + Pads with the median value of all or part of the + vector along each axis. + 'minimum' + Pads with the minimum value of all or part of the + vector along each axis. + 'reflect' + Pads with the reflection of the vector mirrored on + the first and last values of the vector along each + axis. + 'symmetric' + Pads with the reflection of the vector mirrored + along the edge of the array. + 'wrap' + Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique + statistic lengths along each dimension. + ((before, after),) yields same before and after statistic lengths + for each dimension. + (stat_length,) or int is a shortcut for before = after = statistic + length for all axes. + Default is ``None``, to use the entire axis. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + pad constants along each dimension. + ``((before, after),)`` yields same before and after constants for each + dimension. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all dimensions. + Default is 0. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique + end values along each dimension. + ``((before, after),)`` yields same before and after end values for each + axis. + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for + all axes. + Default is 0. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + The keyword arguments form of ``pad_width``. + One of ``pad_width`` or ``pad_width_kwargs`` must be provided. + + Returns + ------- + padded : Dataset + Dataset with the padded coordinates and data. + + See also + -------- + Dataset.shift, Dataset.roll, Dataset.bfill, Dataset.ffill, numpy.pad, dask.array.pad + + Notes + ----- + By default when ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. To avoid type promotion + specify ``constant_values=np.nan`` + + Examples + -------- + + >>> ds = xr.Dataset({'foo': ('x', range(5))}) + >>> ds.pad(x=(1,2)) + + Dimensions: (x: 8) + Dimensions without coordinates: x + Data variables: + foo (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan + """ + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") + + if mode in ("edge", "reflect", "symmetric", "wrap"): + coord_pad_mode = mode + coord_pad_options = { + "stat_length": stat_length, + "constant_values": constant_values, + "end_values": end_values, + "reflect_type": reflect_type, + } + else: + coord_pad_mode = "constant" + coord_pad_options = {} + + variables = {} + for name, var in self.variables.items(): + var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims} + if not var_pad_width: + variables[name] = var + elif name in self.data_vars: + variables[name] = var.pad( + pad_width=var_pad_width, + mode=mode, + stat_length=stat_length, + constant_values=constant_values, + end_values=end_values, + reflect_type=reflect_type, + ) + else: + variables[name] = var.pad( + pad_width=var_pad_width, + mode=coord_pad_mode, + **coord_pad_options, # type: ignore + ) + + return self._replace_vars_and_dims(variables) + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6d0abe9a6fc..ff2d0af63ed 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -114,7 +114,7 @@ def notnull(data): isin = _dask_or_eager_func("isin", array_args=slice(2)) take = _dask_or_eager_func("take") broadcast_to = _dask_or_eager_func("broadcast_to") -pad = _dask_or_eager_func("pad") +pad = _dask_or_eager_func("pad", dask_module=dask_array_compat) _concatenate = _dask_or_eager_func("concatenate", list_of_args=True) _stack = _dask_or_eager_func("stack", list_of_args=True) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 58f0b275b21..ecba5307680 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -349,7 +349,7 @@ def _bottleneck_reduce(self, func, **kwargs): else: shift = (-self.window // 2) + 1 valid = (slice(None),) * axis + (slice(-shift, None),) - padded = padded.pad_with_fill_value({self.dim: (0, -shift)}) + padded = padded.pad({self.dim: (0, -shift)}, mode="constant") if isinstance(padded.data, dask_array_type): raise AssertionError("should not be reachable") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 01f816941b5..1ec6512e4fb 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,11 +1,12 @@ import copy import functools import itertools +import numbers import warnings from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, TypeVar, Union +from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union import numpy as np import pandas as pd @@ -32,12 +33,6 @@ infix_dims, ) -try: - import dask.array as da -except ImportError: - pass - - NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, pd.Index, @@ -1150,66 +1145,114 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): result = result._shift_one_dim(dim, count, fill_value=fill_value) return result - def pad_with_fill_value( - self, pad_widths=None, fill_value=dtypes.NA, **pad_widths_kwargs + def _pad_options_dim_to_index( + self, + pad_option: Mapping[Hashable, Union[int, Tuple[int, int]]], + fill_with_shape=False, + ): + if fill_with_shape: + return [ + (n, n) if d not in pad_option else pad_option[d] + for d, n in zip(self.dims, self.data.shape) + ] + return [(0, 0) if d not in pad_option else pad_option[d] for d in self.dims] + + def pad( + self, + pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + mode: str = "constant", + stat_length: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + constant_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + end_values: Union[ + int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] + ] = None, + reflect_type: str = None, + **pad_width_kwargs: Any, ): """ - Return a new Variable with paddings. + Return a new Variable with padded data. Parameters ---------- - pad_width: Mapping of the form {dim: (before, after)} - Number of values padded to the edges of each dimension. - **pad_widths_kwargs: - Keyword argument for pad_widths + pad_width: Mapping with the form of {dim: (pad_before, pad_after)} + Number of values padded along each dimension. + {dim: pad} is a shortcut for pad_before = pad_after = pad + mode: (str) + See numpy / Dask docs + stat_length : int, tuple or mapping of the form {dim: tuple} + Used in 'maximum', 'mean', 'median', and 'minimum'. Number of + values at edge of each axis used to calculate the statistic value. + constant_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'constant'. The values to set the padded values for each + axis. + end_values : scalar, tuple or mapping of the form {dim: tuple} + Used in 'linear_ramp'. The values used for the ending value of the + linear_ramp and that will form the edge of the padded array. + reflect_type : {'even', 'odd'}, optional + Used in 'reflect', and 'symmetric'. The 'even' style is the + default with an unaltered reflection around the edge value. For + the 'odd' style, the extended part of the array is created by + subtracting the reflected values from two times the edge value. + **pad_width_kwargs: + One of pad_width or pad_width_kwargs must be provided. + + Returns + ------- + padded : Variable + Variable with the same dimensions and attributes but padded data. """ - pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, "pad") + pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") - if fill_value is dtypes.NA: - dtype, fill_value = dtypes.maybe_promote(self.dtype) + # change default behaviour of pad with mode constant + if mode == "constant" and ( + constant_values is None or constant_values is dtypes.NA + ): + dtype, constant_values = dtypes.maybe_promote(self.dtype) else: dtype = self.dtype - if isinstance(self.data, dask_array_type): - array = self.data - - # Dask does not yet support pad. We manually implement it. - # https://github.com/dask/dask/issues/1926 - for d, pad in pad_widths.items(): - axis = self.get_axis_num(d) - before_shape = list(array.shape) - before_shape[axis] = pad[0] - before_chunks = list(array.chunks) - before_chunks[axis] = (pad[0],) - after_shape = list(array.shape) - after_shape[axis] = pad[1] - after_chunks = list(array.chunks) - after_chunks[axis] = (pad[1],) - - arrays = [] - if pad[0] > 0: - arrays.append( - da.full( - before_shape, fill_value, dtype=dtype, chunks=before_chunks - ) - ) - arrays.append(array) - if pad[1] > 0: - arrays.append( - da.full( - after_shape, fill_value, dtype=dtype, chunks=after_chunks - ) - ) - if len(arrays) > 1: - array = da.concatenate(arrays, axis=axis) - else: - pads = [(0, 0) if d not in pad_widths else pad_widths[d] for d in self.dims] - array = np.pad( - self.data.astype(dtype, copy=False), - pads, - mode="constant", - constant_values=fill_value, + # create pad_options_kwargs, numpy requires only relevant kwargs to be nonempty + if isinstance(stat_length, dict): + stat_length = self._pad_options_dim_to_index( + stat_length, fill_with_shape=True ) + if isinstance(constant_values, dict): + constant_values = self._pad_options_dim_to_index(constant_values) + if isinstance(end_values, dict): + end_values = self._pad_options_dim_to_index(end_values) + + # workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303 + if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]: + stat_length = [(n, n) for n in self.data.shape] # type: ignore + + # change integer values to a tuple of two of those values and change pad_width to index + for k, v in pad_width.items(): + if isinstance(v, numbers.Number): + pad_width[k] = (v, v) + pad_width_by_index = self._pad_options_dim_to_index(pad_width) + + # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty + pad_option_kwargs = {} + if stat_length is not None: + pad_option_kwargs["stat_length"] = stat_length + if constant_values is not None: + pad_option_kwargs["constant_values"] = constant_values + if end_values is not None: + pad_option_kwargs["end_values"] = end_values + if reflect_type is not None: + pad_option_kwargs["reflect_type"] = reflect_type # type: ignore + + array = duck_array_ops.pad( + self.data.astype(dtype, copy=False), + pad_width_by_index, + mode=mode, + **pad_option_kwargs, + ) + return type(self)(self.dims, array) def _roll_one_dim(self, dim, count): @@ -1930,10 +1973,10 @@ def _coarsen_reshape(self, windows, boundary, side): if pad < 0: pad += window if side[d] == "left": - pad_widths = {d: (0, pad)} + pad_width = {d: (0, pad)} else: - pad_widths = {d: (pad, 0)} - variable = variable.pad_with_fill_value(pad_widths) + pad_width = {d: (pad, 0)} + variable = variable.pad(pad_width, mode="constant") else: raise TypeError( "{} is invalid for boundary. Valid option is 'exact', " diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ef3da5a3b94..de02f8e059d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4175,6 +4175,113 @@ def test_rank(self): y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",)) assert_equal(y.rank("z", pct=True), y) + def test_pad_constant(self): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad(dim_0=(1, 3)) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5).astype(np.float32), + mode="constant", + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=np.nan, + ) + ) + assert actual.shape == (7, 4, 5) + assert_identical(actual, expected) + + def test_pad_coords(self): + ar = DataArray( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + [("x", np.arange(3)), ("y", np.arange(4)), ("z", np.arange(5))], + ) + actual = ar.pad(x=(1, 3), constant_values=1) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + mode="constant", + pad_width=((1, 3), (0, 0), (0, 0)), + constant_values=1, + ), + [ + ( + "x", + np.pad( + np.arange(3).astype(np.float32), + mode="constant", + pad_width=(1, 3), + constant_values=np.nan, + ), + ), + ("y", np.arange(4)), + ("z", np.arange(5)), + ], + ) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("minimum", "maximum", "mean", "median")) + @pytest.mark.parametrize( + "stat_length", (None, 3, (1, 3), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) + def test_pad_stat_length(self, mode, stat_length): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length) + if isinstance(stat_length, dict): + stat_length = (stat_length["dim_0"], (4, 4), stat_length["dim_2"]) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode=mode, + stat_length=stat_length, + ) + ) + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + + @pytest.mark.parametrize( + "end_values", (None, 3, (3, 5), {"dim_0": (2, 1), "dim_2": (4, 2)}) + ) + def test_pad_linear_ramp(self, end_values): + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), dim_2=(2, 2), mode="linear_ramp", end_values=end_values + ) + if end_values is None: + end_values = 0 + elif isinstance(end_values, dict): + end_values = (end_values["dim_0"], (4, 4), end_values["dim_2"]) + expected = DataArray( + np.pad( + np.arange(3 * 4 * 5).reshape(3, 4, 5), + pad_width=((1, 3), (0, 0), (2, 2)), + mode="linear_ramp", + end_values=end_values, + ) + ) + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + + @pytest.mark.parametrize("mode", ("reflect", "symmetric")) + @pytest.mark.parametrize("reflect_type", (None, "even", "odd")) + def test_pad_reflect(self, mode, reflect_type): + + ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) + actual = ar.pad( + dim_0=(1, 3), dim_2=(2, 2), mode=mode, reflect_type=reflect_type + ) + np_kwargs = { + "array": np.arange(3 * 4 * 5).reshape(3, 4, 5), + "pad_width": ((1, 3), (0, 0), (2, 2)), + "mode": mode, + } + # numpy does not support reflect_type=None + if reflect_type is not None: + np_kwargs["reflect_type"] = reflect_type + expected = DataArray(np.pad(**np_kwargs)) + + assert actual.shape == (7, 4, 9) + assert_identical(actual, expected) + @pytest.fixture(params=[1]) def da(request): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c7f39108477..74173e71af6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5484,6 +5484,19 @@ def test_ipython_key_completion(self): ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) + def test_pad(self): + ds = create_test_data(seed=1) + padded = ds.pad(dim2=(1, 1), constant_values=42) + + assert padded["dim2"].shape == (11,) + assert padded["var1"].shape == (8, 11) + assert padded["var2"].shape == (8, 11) + assert padded["var3"].shape == (10, 8) + assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + + np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) + np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + # Py.test tests diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 21a212c29b3..09ab1be9af9 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -175,7 +175,7 @@ def test_variable_property(prop): marks=xfail(reason="mixed sparse-dense operation"), ), param( - do("pad_with_fill_value", pad_widths={"x": (1, 1)}, fill_value=5), + do("pad", mode="constant", pad_widths={"x": (1, 1)}, fill_value=5), True, marks=xfail(reason="Missing implementation for np.pad"), ), diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index bef3af62d74..2826dc2479c 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -11,7 +11,7 @@ from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.testing import assert_allclose, assert_identical -from .test_variable import VariableSubclassobjects +from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects pint = pytest.importorskip("pint") DimensionalityError = pint.errors.DimensionalityError @@ -2078,42 +2078,32 @@ def test_no_conflicts(self, unit, dtype): assert expected == actual - def test_pad(self, dtype): + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, dtype, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m v = xr.Variable(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) - expected = xr.Variable( - v.dims, - np.pad( - v.data.astype(float), - np_arg, - mode="constant", - constant_values=np.nan, - ), - ) - xr.testing.assert_identical(expected, actual) - assert_units_equal(expected, actual) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(**xr_arg, mode="constant") + expected = xr.Variable( + v.dims, + np.pad( + v.data.astype(float), np_arg, mode="constant", constant_values=np.nan, + ), + ) + xr.testing.assert_identical(expected, actual) + assert_units_equal(expected, actual) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = xr.Variable(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=data.flat[0], **xr_arg) - expected = xr.Variable( - v.dims, - np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), - ) - xr.testing.assert_identical(actual, expected) - assert_units_equal(expected, actual) + actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) + expected = xr.Variable( + v.dims, + np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), + ) + xr.testing.assert_identical(actual, expected) + assert_units_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2135,16 +2125,16 @@ def test_pad(self, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_pad_with_fill_value(self, unit, error, dtype): + def test_pad_unit_constant_value(self, unit, error, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) fill_value = -100 * unit - func = method("pad_with_fill_value", x=(2, 3), y=(1, 4)) + func = method("pad", mode="constant", x=(2, 3), y=(1, 4)) if error is not None: with pytest.raises(error): - func(variable, fill_value=fill_value) + func(variable, constant_values=fill_value) return @@ -2152,11 +2142,11 @@ def test_pad_with_fill_value(self, unit, error, dtype): expected = attach_units( func( strip_units(variable), - fill_value=strip_units(convert_units(fill_value, units)), + constant_values=strip_units(convert_units(fill_value, units)), ), units, ) - actual = func(variable, fill_value=fill_value) + actual = func(variable, constant_values=fill_value) assert_units_equal(expected, actual) xr.testing.assert_identical(expected, actual) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c600f7a77d0..525a005c601 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -38,6 +38,14 @@ source_ndarray, ) +_PAD_XR_NP_ARGS = [ + [{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))], + [{"x": 1}, ((1, 1), (0, 0), (0, 0))], + [{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))], + [{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))], + [{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))], +] + class VariableSubclassobjects: def test_properties(self): @@ -785,36 +793,65 @@ def test_getitem_error(self): with raises_regex(IndexError, "Dimensions of indexers mis"): v[:, ind] - def test_pad(self): + @pytest.mark.parametrize( + "mode", + [ + "mean", + pytest.param( + "median", + marks=pytest.mark.xfail(reason="median is not implemented by Dask"), + ), + pytest.param( + "reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug") + ), + "edge", + pytest.param( + "linear_ramp", + marks=pytest.mark.xfail( + reason="pint bug: https://github.com/hgrecco/pint/issues/1026" + ), + ), + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}] - np_args = [ - ((2, 1), (0, 0), (0, 0)), - ((0, 0), (0, 3), (0, 0)), - ((3, 1), (0, 0), (2, 0)), - ] - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(**xr_arg) - expected = np.pad( - np.array(v.data.astype(float)), - np_arg, - mode="constant", - constant_values=np.nan, - ) - assert_array_equal(actual, expected) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(mode=mode, **xr_arg) + expected = np.pad(data, np_arg, mode=mode) + + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) + + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, xr_arg, np_arg): + data = np.arange(4 * 3 * 2).reshape(4, 3, 2) + v = self.cls(["x", "y", "z"], data) + + actual = v.pad(**xr_arg) + expected = np.pad( + np.array(v.data.astype(float)), + np_arg, + mode="constant", + constant_values=np.nan, + ) + assert_array_equal(actual, expected) + assert isinstance(actual._data, type(v._data)) # for the boolean array, we pad False data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) - for xr_arg, np_arg in zip(xr_args, np_args): - actual = v.pad_with_fill_value(fill_value=False, **xr_arg) - expected = np.pad( - np.array(v.data), np_arg, mode="constant", constant_values=False - ) - assert_array_equal(actual, expected) + + actual = v.pad(mode="constant", constant_values=False, **xr_arg) + expected = np.pad( + np.array(v.data), np_arg, mode="constant", constant_values=False + ) + assert_array_equal(actual, expected) def test_rolling_window(self): # Just a working test. See test_nputils for the algorithm validation @@ -2056,8 +2093,28 @@ def test_getitem_uint(self): super().test_getitem_fancy() @pytest.mark.xfail - def test_pad(self): - super().test_rolling_window() + @pytest.mark.parametrize( + "mode", + [ + "mean", + "median", + "reflect", + "edge", + "linear_ramp", + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad(self, mode, xr_arg, np_arg): + super().test_pad(mode, xr_arg, np_arg) + + @pytest.mark.xfail + @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + def test_pad_constant_values(self, xr_arg, np_arg): + super().test_pad_constant_values(xr_arg, np_arg) @pytest.mark.xfail def test_rolling_window(self): From 5548c1e13ad076196d06a9b99cff0dcc4ef2be5e Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 19 Mar 2020 17:29:58 -0400 Subject: [PATCH 31/54] Improve where docstring (#3836) * improve the where docstring * whatsnew * improve assign docstring * changes from @dcherian --- doc/whats-new.rst | 2 ++ xarray/core/computation.py | 12 ++++++++---- xarray/core/dataset.py | 2 -- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8140288f350..6ae7398626f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -94,6 +94,8 @@ Documentation - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. +- Improve the :py:func:`where` docstring. + By `Maximilian Roos `_ - Update the installation instructions: only explicitly list recommended dependencies (:issue:`3756`). By `Mathias Hauser `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f99764448da..f2941a3d0ba 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1224,9 +1224,13 @@ def where(cond, x, y): ---------- cond : scalar, array, Variable, DataArray or Dataset with boolean dtype When True, return values from `x`, otherwise returns values from `y`. - x, y : scalar, array, Variable, DataArray or Dataset - Values from which to choose. All dimension coordinates on these objects - must be aligned with each other and with `cond`. + x : scalar, array, Variable, DataArray or Dataset + values to choose from where `cond` is True + y : scalar, array, Variable, DataArray or Dataset + values to choose from where `cond` is False + + All dimension coordinates on these objects must be aligned with each + other and with `cond`. Returns ------- @@ -1249,7 +1253,7 @@ def where(cond, x, y): Coordinates: * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 - >>> xr.where(x < 0.5, x, 100 * x) + >>> xr.where(x < 0.5, x, x * 100) array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) Coordinates: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7c218e209cb..a607f1aa164 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4392,8 +4392,6 @@ def assign( Examples -------- - >>> import numpy as np - >>> import xarray as xr >>> x = xr.Dataset( ... { ... "temperature_c": ( From e8a284f341645a63a4d83676a6b268394c721bbc Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 19 Mar 2020 18:55:08 -0400 Subject: [PATCH 32/54] Allow ellipsis to be used in stack (#3826) * allow ellipsis to be used in stack * doc fix * support ellipsis only as part of an iterable * docs * whatsnew * docstring, whatsnew * docstring, whatsnew * add passing a partial list of dims * more wording changes * improvement from @dcherian --- doc/reshaping.rst | 8 ++++++++ doc/whats-new.rst | 10 ++++++++-- xarray/core/dataarray.py | 4 +++- xarray/core/dataset.py | 7 ++++++- xarray/tests/test_dataarray.py | 3 +++ xarray/tests/test_dataset.py | 11 +++++++++++ 6 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 455a24f9216..465ca14dfc2 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -109,6 +109,13 @@ implemented :py:meth:`~xarray.DataArray.stack` and stacked stacked.unstack('z') +As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: + +.. ipython:: python + + stacked = array.stack(z=[..., "x"]) + stacked + These methods are modeled on the :py:class:`pandas.DataFrame` methods of the same name, although in xarray they always create new dimensions rather than adding to the existing index or columns. @@ -164,6 +171,7 @@ like this: 'b': ('x', [6, 7])}, coords={'y': ['u', 'v', 'w']} ) + data stacked = data.to_stacked_array("z", sample_dims=['x']) stacked unstacked = stacked.to_unstacked_dataset("z") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6ae7398626f..6863d52d9bf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,13 +39,19 @@ New Features By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments - often means a user is attempting to pass multiple dimensions to group over - and should instead pass a list. + often means a user is attempting to pass multiple dimensions as separate + arguments and should instead pass a single list of dimensions. + (:pull:`3802`) By `Maximilian Roos `_ - The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. +- An ellipsis (``...``) is now supported in the ``dims`` argument of + :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack`, meaning all + unlisted dimensions, similar to its meaning in :py:meth:`DataArray.transpose`. + (:pull:`3826`) + By `Maximilian Roos `_ - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bd956553929..324e7ccd290 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1709,7 +1709,9 @@ def stack( ---------- dimensions : Mapping of the form new_name=(dim1, dim2, ...) Names of new dimensions, and the existing dimensions that they - replace. + replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. + Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over + all dimensions. **dimensions_kwargs: The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a607f1aa164..b7ce0ec4e1e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -87,6 +87,7 @@ decode_numpy_dict_values, either_dict_or_kwargs, hashable, + infix_dims, is_dict_like, is_scalar, maybe_wrap_array, @@ -3262,6 +3263,8 @@ def reorder_levels( return self._replace(variables, indexes=indexes) def _stack_once(self, dims, new_dim): + if ... in dims: + dims = list(infix_dims(dims, self.dims)) variables = {} for name, var in self.variables.items(): if name not in dims: @@ -3304,7 +3307,9 @@ def stack( ---------- dimensions : Mapping of the form new_name=(dim1, dim2, ...) Names of new dimensions, and the existing dimensions that they - replace. + replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. + Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over + all dimensions. **dimensions_kwargs: The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index de02f8e059d..6f065c9daed 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2040,6 +2040,9 @@ def test_stack_unstack(self): actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) + actual = orig.stack(z=[...]).unstack("z").drop_vars(["x", "y"]) + assert_identical(orig, actual) + dims = ["a", "b", "c", "d", "e"] orig = xr.DataArray(np.random.rand(1, 2, 3, 2, 1), dims=dims) stacked = orig.stack(ab=["a", "b"], cd=["c", "d"]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 74173e71af6..d2e7bcdabf8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2879,6 +2879,17 @@ def test_stack(self): actual = ds.stack(z=["x", "y"]) assert_identical(expected, actual) + actual = ds.stack(z=[...]) + assert_identical(expected, actual) + + # non list dims with ellipsis + actual = ds.stack(z=(...,)) + assert_identical(expected, actual) + + # ellipsis with given dim + actual = ds.stack(z=[..., "y"]) + assert_identical(expected, actual) + exp_index = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["y", "x"]) expected = Dataset( {"a": ("z", [0, 1, 0, 1]), "b": ("z", [0, 2, 1, 3]), "z": exp_index} From 564a291b13db73a31c15c4cf2a9ff5ec1ad2498c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 20 Mar 2020 13:04:26 -0400 Subject: [PATCH 33/54] Fix html repr on non-str keys (#3870) * fix html repr on non-str keys * whatsnew * Update doc/whats-new.rst Co-Authored-By: keewis Co-authored-by: Deepak Cherian Co-authored-by: keewis --- doc/whats-new.rst | 2 ++ xarray/core/formatting_html.py | 2 +- xarray/tests/test_formatting_html.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6863d52d9bf..5bdf6536d3d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -94,6 +94,8 @@ Bug fixes - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. +- Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). + By `Maximilian Roos `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8ceda8bfbfa..8678a58b381 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -95,7 +95,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): cssclass_idx = " class='xr-has-index'" if is_index else "" dims_str = f"({', '.join(escape(dim) for dim in var.dims)})" - name = escape(name) + name = escape(str(name)) dtype = dtype or escape(str(var.dtype)) # "unique" ids required to expand/collapse subsections diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 01357000b20..239f339208d 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -51,6 +51,11 @@ def test_short_data_repr_html(dataarray): assert data_repr.startswith("array") +def test_short_data_repr_html_non_str_keys(dataset): + ds = dataset.assign({2: lambda x: x["tmin"]}) + fh.dataset_repr(ds) + + def test_short_data_repr_html_dask(dask_dataarray): import dask From 889240bcd1cc81747beef941002125c597f48b14 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 21 Mar 2020 13:54:56 -0400 Subject: [PATCH 34/54] remove macos build while waiting for libwebp fix (#3875) --- azure-pipelines.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ce95fca1ba1..8d43de7b1d5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -32,15 +32,16 @@ jobs: steps: - template: ci/azure/unit-tests.yml -- job: MacOSX - strategy: - matrix: - py38: - conda_env: py38 - pool: - vmImage: 'macOS-10.15' - steps: - - template: ci/azure/unit-tests.yml +# excluded while waiting for https://github.com/conda-forge/libwebp-feedstock/issues/26 +# - job: MacOSX +# strategy: +# matrix: +# py38: +# conda_env: py38 +# pool: +# vmImage: 'macOS-10.15' +# steps: +# - template: ci/azure/unit-tests.yml - job: Windows strategy: From 5354679579d46d3bcb620817125c5bde3c4f1cff Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 21 Mar 2020 19:03:51 +0000 Subject: [PATCH 35/54] Delete associated indexes when deleting coordinate variables. (#3840) * Delete associated indexes when deleting coordinate variables. Fixes #3746 * review * fix tests --- doc/whats-new.rst | 3 ++- xarray/core/coordinates.py | 11 ++++++++--- xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 4 ++++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5bdf6536d3d..ac80524a3c4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,7 +91,8 @@ Bug fixes to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - +- Delete associated indexes when deleting coordinate variables. (:issue:`3746`). + By `Deepak Cherian `_. - Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 3d51c9b4271..83c4d2a8636 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -247,7 +247,7 @@ def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] else: - raise KeyError(key) + raise KeyError(f"{key!r} is not a coordinate variable.") def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython. """ @@ -291,7 +291,7 @@ def _update_coords( dims = calculate_dimensions(coords_plus_data) if not set(dims) <= set(self.dims): raise ValueError( - "cannot add coordinates with new dimensions to " "a DataArray" + "cannot add coordinates with new dimensions to a DataArray" ) self._data._coords = coords @@ -312,7 +312,12 @@ def to_dataset(self) -> "Dataset": return Dataset._construct_direct(coords, set(coords)) def __delitem__(self, key: Hashable) -> None: - del self._data._coords[key] + if key in self: + del self._data._coords[key] + if self._data._indexes is not None and key in self._data._indexes: + del self._data._indexes[key] + else: + raise KeyError(f"{key!r} is not a coordinate variable.") def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython. """ diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6f065c9daed..fbd9810f285 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1412,6 +1412,12 @@ def test_coords_non_string(self): expected = DataArray(2, coords={1: 2}, name=1) assert_identical(actual, expected) + def test_coords_delitem_delete_indexes(self): + # regression test for GH3746 + arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]}) + del arr.coords["x"] + assert "x" not in arr.indexes + def test_broadcast_like(self): arr1 = DataArray( np.ones((2, 3)), diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e7bcdabf8..20b814a25c7 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -744,6 +744,10 @@ def test_coords_modify(self): expected = data.merge({"c": 11}).set_coords("c") assert_identical(expected, actual) + # regression test for GH3746 + del actual.coords["x"] + assert "x" not in actual.indexes + def test_update_index(self): actual = Dataset(coords={"x": [1, 2, 3]}) actual["x"] = ["a", "b", "c"] From b6409f0627d813065b58f67e6244cbe47f84090c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 21 Mar 2020 19:51:06 +0000 Subject: [PATCH 36/54] map_blocks: allow user function to add new unindexed dimension. (#3817) --- doc/whats-new.rst | 3 ++- xarray/core/parallel.py | 3 +++ xarray/tests/test_dask.py | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac80524a3c4..86272cf8710 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,8 @@ New Features arguments and should instead pass a single list of dimensions. (:pull:`3802`) By `Maximilian Roos `_ +- :py:func:`map_blocks` can now apply functions that add new unindexed dimensions. + By `Deepak Cherian `_ - The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. @@ -60,7 +62,6 @@ New Features (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. - Bug fixes ~~~~~~~~~ - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 8429d0f71ad..6f1668f698f 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -386,6 +386,9 @@ def _wrapper(func, obj, to_array, args, kwargs): var_chunks.append(input_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) + elif dim in template.dims: + # new unindexed dimension + var_chunks.append((template.sizes[dim],)) data = dask.array.Array( hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 4f7e3910f82..923b35e5946 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1147,6 +1147,7 @@ def test_map_blocks_to_array(map_ds): lambda x: x.to_dataset(), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), + lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], @@ -1167,6 +1168,7 @@ def test_map_blocks_da_transformations(func, map_da): lambda x: x.drop_vars("a"), lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), + lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), # TODO: [lambda x: x.isel(x=1)], ], From 6c19aab1ae52ba820e7b61cad8636d7af37830aa Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 22 Mar 2020 02:27:13 -0400 Subject: [PATCH 37/54] add spacing in the versions section of the issue report (#3876) --- .github/ISSUE_TEMPLATE/bug_report.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 83c3aea53a8..37dbcd2ebb0 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -28,6 +28,8 @@ assignees: '' #### Versions
Output of `xr.show_versions()` + +
From 2d0b85e84fa1d3d540ead8be04fc27703041b2cb Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 22 Mar 2020 18:19:41 -0400 Subject: [PATCH 38/54] Re-enable tests xfailed in #3808 and fix new CFTimeIndex failures due to upstream changes (#3874) * Re-enable tests xfailed in #3808 * Add _cache attribute to CFTimeIndex * Temporarily install pandas master from GitHub instead of wheel * Fix pandas url --- ci/azure/install.yml | 4 ++-- xarray/coding/cftimeindex.py | 1 + xarray/tests/test_cftimeindex.py | 17 ++--------------- xarray/tests/test_interp.py | 4 ---- 4 files changed, 5 insertions(+), 21 deletions(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index 958e3c180fa..60559dd2064 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -19,7 +19,6 @@ steps: --upgrade \ matplotlib \ numpy \ - pandas \ scipy python -m pip install \ --no-deps \ @@ -30,7 +29,8 @@ steps: git+https://github.com/Unidata/cftime \ git+https://github.com/mapbox/rasterio \ git+https://github.com/hgrecco/pint \ - git+https://github.com/pydata/bottleneck + git+https://github.com/pydata/bottleneck \ + git+https://github.com/pandas-dev/pandas condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index c680a7e0bcf..2e42702caac 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -253,6 +253,7 @@ def __new__(cls, data, name=None): result = object.__new__(cls) result._data = np.array(data, dtype="O") result.name = name + result._cache = {} return result def _partial_date_slice(self, resolution, parsed): diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 43d6d7b068e..d31bf9471ea 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -450,7 +450,6 @@ def test_sel_date_scalar(da, date_type, index): assert_identical(result, expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_sel_date_distant_date(da, date_type, index): expected = xr.DataArray(4).assign_coords(time=index[3]) @@ -513,12 +512,7 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - pytest.param( - {"method": "nearest", "tolerance": timedelta(days=20)}, - marks=pytest.mark.xfail( - reason="https://github.com/pydata/xarray/issues/3751" - ), - ), + {"method": "nearest", "tolerance": timedelta(days=20)}, ], ) def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): @@ -526,7 +520,6 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=date_type(1, 5, 1), **sel_kwargs) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @pytest.mark.parametrize( "sel_kwargs", @@ -574,12 +567,7 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): [ {"method": "pad", "tolerance": timedelta(days=20)}, {"method": "backfill", "tolerance": timedelta(days=20)}, - pytest.param( - {"method": "nearest", "tolerance": timedelta(days=20)}, - marks=pytest.mark.xfail( - reason="https://github.com/pydata/xarray/issues/3751" - ), - ), + {"method": "nearest", "tolerance": timedelta(days=20)}, ], ) def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): @@ -614,7 +602,6 @@ def range_args(date_type): ] -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 9cc4933f462..0502348160e 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -586,7 +586,6 @@ def test_datetime_single_string(): assert_allclose(actual.drop_vars("time"), expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime(): @@ -613,7 +612,6 @@ def test_cftime_type_error(): da.interp(time=times_new) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_list_of_strings(): @@ -635,7 +633,6 @@ def test_cftime_list_of_strings(): assert_allclose(actual, expected) -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime @requires_scipy def test_cftime_single_string(): @@ -697,7 +694,6 @@ def test_datetime_interp_noerror(): a.interp(x=xi, time=xi.time) # should not raise an error -@pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/3751") @requires_cftime def test_3641(): times = xr.cftime_range("0001", periods=3, freq="500Y") From 9eec56c833da6dca02c3e6c593586fd201a534a0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Mar 2020 07:42:49 -0600 Subject: [PATCH 39/54] Raise error when assigning to IndexVariable.values & IndexVariable.data (#3862) * Raise error when assigning IndexVariable.values, IndexVariable.data Fixes #3470 * fix existing tests * Add new test * whats-new * Fix more existing tests * Update doc/whats-new.rst * fix docs * update whats-new --- doc/plotting.rst | 2 +- doc/whats-new.rst | 6 ++++++ xarray/core/variable.py | 14 +++++++++++--- xarray/tests/test_accessor_dt.py | 4 ++-- xarray/tests/test_dask.py | 2 +- xarray/tests/test_variable.py | 7 +++++-- 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/doc/plotting.rst b/doc/plotting.rst index ea9816780a7..f3d9c0213de 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -657,7 +657,7 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display .. ipython:: python - ds.w.values = [1, 2, 3, 5] + ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 86272cf8710..40307827bc9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,12 @@ v0.15.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- Raise an error when assigning to the ``.values`` or ``.data`` attribute of + dimension coordinates i.e. ``IndexVariable`` objects. This has been broken since + v0.12.0. Please use :py:meth:`DataArray.assign_coords` or :py:meth:`Dataset.assign_coords` + instead. (:issue:`3470`, :pull:`3862`) + By `Deepak Cherian `_ + New Features ~~~~~~~~~~~~ diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1ec6512e4fb..c9addeefb04 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2104,9 +2104,17 @@ def load(self): # https://github.com/python/mypy/issues/1465 @Variable.data.setter # type: ignore def data(self, data): - Variable.data.fset(self, data) - if not isinstance(self._data, PandasIndexAdapter): - self._data = PandasIndexAdapter(self._data) + raise ValueError( + f"Cannot assign to the .data attribute of dimension coordinate a.k.a IndexVariable {self.name!r}. " + f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate." + ) + + @Variable.values.setter # type: ignore + def values(self, values): + raise ValueError( + f"Cannot assign to the .values attribute of dimension coordinate a.k.a IndexVariable {self.name!r}. " + f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate." + ) def chunk(self, chunks=None, name=None, lock=False): # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk() diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 20a9283e32c..b3640722106 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -80,7 +80,7 @@ def test_strftime(self): def test_not_datetime_type(self): nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") - nontime_data["time"].values = int_data + nontime_data = nontime_data.assign_coords(time=int_data) with raises_regex(TypeError, "dt"): nontime_data.time.dt @@ -213,7 +213,7 @@ def setup(self): def test_not_datetime_type(self): nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") - nontime_data["time"].values = int_data + nontime_data = nontime_data.assign_coords(time=int_data) with raises_regex(TypeError, "dt"): nontime_data.time.dt diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 923b35e5946..538dbbfb58b 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1276,7 +1276,7 @@ def test_token_changes_when_data_changes(obj): assert t3 != t2 # Change IndexVariable - obj.coords["x"] *= 2 + obj = obj.assign_coords(x=obj.x * 2) with raise_if_dask_computes(): t4 = dask.base.tokenize(obj) assert t4 != t3 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 525a005c601..116466e112d 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -538,8 +538,7 @@ def test_copy_index_with_data(self): orig = IndexVariable("x", np.arange(5)) new_data = np.arange(5, 10) actual = orig.copy(data=new_data) - expected = orig.copy() - expected.data = new_data + expected = IndexVariable("x", np.arange(5, 10)) assert_identical(expected, actual) def test_copy_index_with_data_errors(self): @@ -547,6 +546,10 @@ def test_copy_index_with_data_errors(self): new_data = np.arange(5, 20) with raises_regex(ValueError, "must match shape of object"): orig.copy(data=new_data) + with raises_regex(ValueError, "Cannot assign to the .data"): + orig.data = new_data + with raises_regex(ValueError, "Cannot assign to the .values"): + orig.values = new_data def test_replace(self): var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) From c32d7bdda1ab00f37989e57605a851ca07c30d82 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 23 Mar 2020 19:03:04 +0100 Subject: [PATCH 40/54] reword the whats-new entry for unit support (#3878) * reword the whats-new entry for unit support of top-level functions and Variable --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 40307827bc9..aac2ca7bbf8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,7 +41,7 @@ New Features - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. -- Support unit aware arrays with pint. (:issue:`3594`, :pull:`3706`, :pull:`3611`) +- Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments From 6c27ef24616c050ee1d0c510e13d33c7378c9fe2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 15:14:12 -0400 Subject: [PATCH 41/54] update panel documentation (#3880) * update panel documentation * @keewis feedback --- doc/pandas.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/pandas.rst b/doc/pandas.rst index b1660e48dd2..b0ec2a117dc 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -110,10 +110,10 @@ Multi-dimensional data Tidy data is great, but it sometimes you want to preserve dimensions instead of automatically stacking them into a ``MultiIndex``. -:py:meth:`DataArray.to_pandas()` is a shortcut that -lets you convert a DataArray directly into a pandas object with the same -dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, -2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``): +:py:meth:`DataArray.to_pandas()` is a shortcut that lets you convert a +DataArray directly into a pandas object with the same dimensionality, if +available in pandas (i.e., a 1D array is converted to a +:py:class:`~pandas.Series` and 2D to :py:class:`~pandas.DataFrame`): .. ipython:: python @@ -151,13 +151,13 @@ However, you will need to set dimension names explicitly, either with the Transitioning from pandas.Panel to xarray ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``Panel``, pandas' data structure for 3D arrays, has always -been a second class data structure compared to the Series and DataFrame. To -allow pandas developers to focus more on its core functionality built around -the DataFrame, pandas has deprecated ``Panel``. It will be removed in pandas -0.25. +``Panel``, pandas' data structure for 3D arrays, was always a second class +data structure compared to the Series and DataFrame. To allow pandas +developers to focus more on its core functionality built around the +DataFrame, pandas removed ``Panel`` in favor of directing users who use +multi-dimensional arrays to xarray. -xarray has most of ``Panel``'s features, a more explicit API (particularly around +Xarray has most of ``Panel``'s features, a more explicit API (particularly around indexing), and the ability to scale to >3 dimensions with the same interface. As discussed :ref:`elsewhere ` in the docs, there are two primary data structures in @@ -210,7 +210,7 @@ You can also easily convert this data into ``Dataset``: array.to_dataset(dim='dim_0') Here, there are two data variables, each representing a DataFrame on panel's -``items`` axis, and labelled as such. Each variable is a 2D array of the +``items`` axis, and labeled as such. Each variable is a 2D array of the respective values along the ``items`` dimension. While the xarray docs are relatively complete, a few items stand out for Panel users: From 321f2e55253b61a251cd5d2db5329dd37d39a471 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 15:14:32 -0400 Subject: [PATCH 42/54] whatsnew for 0.15.1 (#3879) * whatsnew for 0.15.1 * title formatting --- doc/whats-new.rst | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aac2ca7bbf8..db50d09f431 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,9 +16,13 @@ What's New .. _whats-new.0.15.1: -v0.15.1 (unreleased) +v0.15.1 (23 Mar 2020) --------------------- +This release brings many new features such as :py:meth:`Dataset.weighted` methods for weighted array +reductions, a new jupyter repr by default, and the start of units integration with pint. There's also +the usual batch of usability improvements, documentation additions, and bug fixes. + Breaking changes ~~~~~~~~~~~~~~~~ @@ -34,6 +38,10 @@ New Features - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`). By `Mathias Hauser `_ +- The new jupyter notebook repr (``Dataset._repr_html_`` and + ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To + disable, use ``xarray.set_options(display_style="text")``. + By `Julia Signell `_. - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. @@ -51,10 +59,6 @@ New Features By `Maximilian Roos `_ - :py:func:`map_blocks` can now apply functions that add new unindexed dimensions. By `Deepak Cherian `_ -- The new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` (introduced - in 0.14.1) is now on by default. To disable, use - ``xarray.set_options(display_style="text")``. - By `Julia Signell `_. - An ellipsis (``...``) is now supported in the ``dims`` argument of :py:meth:`Dataset.stack` and :py:meth:`DataArray.stack`, meaning all unlisted dimensions, similar to its meaning in :py:meth:`DataArray.transpose`. @@ -63,13 +67,14 @@ New Features - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. -- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, +- ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. Bug fixes ~~~~~~~~~ + - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. @@ -107,6 +112,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ + - Fix documentation of :py:class:`DataArray` removing the deprecated mention that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) By `Sander van Rijn `_. @@ -119,25 +125,25 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ -- Removed the internal ``import_seaborn`` function which handled the deprecation of +- Remove the internal ``import_seaborn`` function which handled the deprecation of the ``seaborn.apionly`` entry point (:issue:`3747`). By `Mathias Hauser `_. - Don't test pint integration in combination with datetime objects. (:issue:`3778`, :pull:`3788`) By `Justus Magin `_. -- Changed test_open_mfdataset_list_attr to only run with dask installed +- Change test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. -- Preserved the ability to index with ``method="nearest"`` with a +- Preserve the ability to index with ``method="nearest"`` with a :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 (:issue:`3751`). By `Spencer Clark `_. - Greater flexibility and improved test coverage of subtracting various types of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark `_. -- Updated Azure CI MacOS image, given pending removal. +- Update Azure CI MacOS image, given pending removal. By `Maximilian Roos `_ -- Removed xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). +- Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Removed conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ From 732b6cd6248ce715da74f3cd7a0e211eaa1d0aa2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 23 Mar 2020 16:41:44 -0400 Subject: [PATCH 43/54] Release v0.15.1 From a2cccd641d7f4c66e0a517a721a3e06f415ae0ee Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 23 Mar 2020 17:05:09 -0400 Subject: [PATCH 44/54] whatsnew section for 0.16.0 --- doc/whats-new.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index db50d09f431..10f6b23ca66 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,9 +13,33 @@ What's New import xarray as xr np.random.seed(123456) -.. _whats-new.0.15.1: +.. _whats-new.0.16.0: + +v0.16.0 (unreleased) +--------------------- + +Breaking changes +~~~~~~~~~~~~~~~~ + + +New Features +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ +Internal Changes +~~~~~~~~~~~~~~~~ + + +.. _whats-new.0.15.1: + v0.15.1 (23 Mar 2020) --------------------- From c707b337a0f75224ee3b3e3b65a08da792df2fa6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 23 Mar 2020 20:52:13 -0400 Subject: [PATCH 45/54] Tweaks to "how_to_release" (#3882) * tweaks to how_to_release * previous release * copy paste list of names looks terrible * should not show --- HOW_TO_RELEASE.md | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 4ef7342a5ed..3fdd1d7236d 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -23,7 +23,7 @@ Time required: about an hour. 4. Check that the ReadTheDocs build is passing. 5. On the master branch, commit the release in git: ``` - git commit -a -m 'Release v0.X.Y' + git commit -am 'Release v0.X.Y' ``` 6. Tag the release: ``` @@ -60,10 +60,35 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -12. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst. +12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: + ``` + .. _whats-new.0.X.Y+1: + + v0.X.Y+1 (unreleased) + --------------------- + + Breaking changes + ~~~~~~~~~~~~~~~~ + + + New Features + ~~~~~~~~~~~~ + + + Bug fixes + ~~~~~~~~~ + + + Documentation + ~~~~~~~~~~~~~ + + + Internal Changes + ~~~~~~~~~~~~~~~~ + ``` 13. Commit your changes and push to master again: ``` - git commit -a -m 'New whatsnew section' + git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! @@ -88,15 +113,17 @@ Time required: about an hour. ``` git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u ``` - or by replacing `v0.X.Y` with the _previous_ release in: + or by substituting the _previous_ release in: ``` - git log v0.X.Y.. --format="%aN" | sort -u + git log v0.X.Y-1.. --format="%aN" | sort -u ``` + NB: copying this output into a Google Groups form can cause + [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care Note on version numbering: We follow a rough approximation of semantic version. Only major releases (0.X.0) -show include breaking changes. Minor releases (0.X.Y) are for bug fixes and +should include breaking changes. Minor releases (0.X.Y) are for bug fixes and backwards compatible new features, but if a sufficient number of new features have arrived we will issue a major release even if there are no compatibility breaks. From ee3c87659d1687a86d406065a5af1b4b87beec17 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 24 Mar 2020 14:48:35 -0400 Subject: [PATCH 46/54] Use `fixes` in PR template (#3886) * Use `fixes` in PR template * whatsnew --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/whats-new.rst | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a921bddaa23..c30202ac046 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - - [ ] Closes #xxxx + - [ ] Fixes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 10f6b23ca66..d40ca82ba85 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,9 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Use ``Fixes`` rather than ``Closes`` in GitHub Pull Request template, allowing + linking to issues. + By `Maximilian Roos `_ .. _whats-new.0.15.1: From c10c9928d8800e32a4c127429b1fa11bdb68aca1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 24 Mar 2020 15:24:42 -0400 Subject: [PATCH 47/54] xfail test_uamiv_format_write (#3885) * xfail test_uamiv_format_write * add reason kwarg * xfail test_dayofyear_after_cftime_range --- xarray/tests/test_backends.py | 1 + xarray/tests/test_cftime_offsets.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5f8ba83c330..a4585985bdc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3548,6 +3548,7 @@ def test_uamiv_format_mfread(self): assert_allclose(expected, actual) camxfile.close() + @pytest.mark.xfail(reason="Flaky; see GH3711") def test_uamiv_format_write(self): fmtkw = {"format": "uamiv"} diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 343e059f53c..2352f9e8cdd 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1176,6 +1176,7 @@ def test_dayofweek_after_cftime_range(freq): np.testing.assert_array_equal(result, expected) +@pytest.mark.xfail(reason="See GH3885") @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): pytest.importorskip("cftime", minversion="1.0.2.1") From d8bb6204dc6a4bacdfca25b02ba62bb7f1bb5795 Mon Sep 17 00:00:00 2001 From: johnomotani Date: Tue, 24 Mar 2020 20:40:17 +0000 Subject: [PATCH 48/54] Control attrs of result in `merge()`, `concat()`, `combine_by_coords()` and `combine_nested()` (#3877) * Optionally promote attrs from DataArray to Dataset in to_dataset Adds option 'promote_attrs' to DataArray.to_dataset(). By default promote_attrs=False, maintaining current behaviour. If promote_attrs=True, the attrs of the DataArray are shallow-copied to the Dataset returned by to_dataset(). * utils.ordered_dict_union returns the union of two compatible dicts If the values of any shared key are not equivalent, then raises an error. * combine_attrs argument for merge() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * combine_attrs argument for concat() Provides several options for how to combine the attributes of the passed objects and give them to the returned DataArray or Dataset. * combine_attrs argument for combine_by_coords() and combine_nested() Provides several options for how to combine the attributes of the passed objects and give them to the returned Dataset. * Add combine_attrs changes to whats-new.rst * Update docstrings to note default values Apply suggestions from code review Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * First argument of update_safety_check and ordered_dict_union not mutable No need for these arguments to be MutableMapping rather than just Mapping. * Rename ordered_dict_union -> compat_dict_union Do not use OrderedDicts any more, so name did not make sense. * Move combine_attrs to v0.16.0 in whats-new.rst * Fix merge of whats-new.rst Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 4 ++ xarray/core/combine.py | 50 +++++++++++++++++-- xarray/core/concat.py | 34 ++++++++++--- xarray/core/dataarray.py | 19 ++++++-- xarray/core/dataset.py | 2 +- xarray/core/merge.py | 85 +++++++++++++++++++++++++++++--- xarray/core/utils.py | 34 ++++++++++++- xarray/tests/test_combine.py | 89 ++++++++++++++++++++++++++++++++++ xarray/tests/test_concat.py | 46 ++++++++++++++++++ xarray/tests/test_dataarray.py | 11 ++++- xarray/tests/test_merge.py | 60 +++++++++++++++++++++++ xarray/tests/test_utils.py | 11 ++++- 12 files changed, 420 insertions(+), 25 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d40ca82ba85..4515f552812 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Control over attributes of result in :py:func:`merge`, :py:func:`concat`, + :py:func:`combine_by_coords` and :py:func:`combine_nested` using + combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) + By `John Omotani `_ Bug fixes diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1fa2df00352..1f990457798 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -155,6 +155,7 @@ def _combine_nd( compat="no_conflicts", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Combines an N-dimensional structure of datasets into one by applying a @@ -202,13 +203,21 @@ def _combine_nd( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) (combined_ds,) = combined_ids.values() return combined_ds def _combine_all_along_first_dim( - combined_ids, dim, data_vars, coords, compat, fill_value=dtypes.NA, join="outer" + combined_ids, + dim, + data_vars, + coords, + compat, + fill_value=dtypes.NA, + join="outer", + combine_attrs="drop", ): # Group into lines of datasets which must be combined along dim @@ -223,7 +232,7 @@ def _combine_all_along_first_dim( combined_ids = dict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _combine_1d( - datasets, dim, compat, data_vars, coords, fill_value, join + datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs ) return new_combined_ids @@ -236,6 +245,7 @@ def _combine_1d( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Applies either concat or merge to 1D list of datasets depending on value @@ -252,6 +262,7 @@ def _combine_1d( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) except ValueError as err: if "encountered unexpected variable" in str(err): @@ -265,7 +276,13 @@ def _combine_1d( else: raise else: - combined = merge(datasets, compat=compat, fill_value=fill_value, join=join) + combined = merge( + datasets, + compat=compat, + fill_value=fill_value, + join=join, + combine_attrs=combine_attrs, + ) return combined @@ -284,6 +301,7 @@ def _nested_combine( ids, fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): if len(datasets) == 0: @@ -311,6 +329,7 @@ def _nested_combine( coords=coords, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) return combined @@ -323,6 +342,7 @@ def combine_nested( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="drop", ): """ Explicitly combine an N-dimensional grid of datasets into one by using a @@ -390,6 +410,16 @@ def combine_nested( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -468,6 +498,7 @@ def combine_nested( ids=False, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) @@ -482,6 +513,7 @@ def combine_by_coords( coords="different", fill_value=dtypes.NA, join="outer", + combine_attrs="no_conflicts", ): """ Attempt to auto-magically combine the given datasets into one by using @@ -557,6 +589,16 @@ def combine_by_coords( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -700,6 +742,7 @@ def combine_by_coords( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) # Check the overall coordinates are monotonically increasing @@ -717,6 +760,7 @@ def combine_by_coords( compat=compat, fill_value=fill_value, join=join, + combine_attrs=combine_attrs, ) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 96b4be15d1b..7741cbb826b 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -3,7 +3,7 @@ from . import dtypes, utils from .alignment import align from .duck_array_ops import lazy_array_equiv -from .merge import _VALID_COMPAT, unique_variable +from .merge import _VALID_COMPAT, merge_attrs, unique_variable from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -17,6 +17,7 @@ def concat( positions=None, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): """Concatenate xarray objects along a new or existing dimension. @@ -92,15 +93,21 @@ def concat( - 'override': if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'override + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- concatenated : type of objs - Notes - ----- - Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``. - See also -------- merge @@ -132,7 +139,9 @@ def concat( "can only concatenate xarray Dataset and DataArray " "objects, got %s" % type(first_obj) ) - return f(objs, dim, data_vars, coords, compat, positions, fill_value, join) + return f( + objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs + ) def _calc_concat_dim_coord(dim): @@ -306,6 +315,7 @@ def _dataset_concat( positions, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): """ Concatenate a sequence of datasets along a new or existing dimension @@ -362,7 +372,7 @@ def _dataset_concat( result_vars.update(dim_coords) # assign attrs and encoding from first dataset - result_attrs = datasets[0].attrs + result_attrs = merge_attrs([ds.attrs for ds in datasets], combine_attrs) result_encoding = datasets[0].encoding # check that global attributes are fixed across all datasets if necessary @@ -425,6 +435,7 @@ def _dataarray_concat( positions, fill_value=dtypes.NA, join="outer", + combine_attrs="override", ): arrays = list(arrays) @@ -453,5 +464,12 @@ def _dataarray_concat( positions, fill_value=fill_value, join=join, + combine_attrs="drop", ) - return arrays[0]._from_temp_dataset(ds, name) + + merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) + + result = arrays[0]._from_temp_dataset(ds, name) + result.attrs = merged_attrs + + return result diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 324e7ccd290..232fb86144e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -475,7 +475,13 @@ def _to_dataset_whole( dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes) return dataset - def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: + def to_dataset( + self, + dim: Hashable = None, + *, + name: Hashable = None, + promote_attrs: bool = False, + ) -> Dataset: """Convert a DataArray to a Dataset. Parameters @@ -487,6 +493,8 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: name : hashable, optional Name to substitute for this array's name. Only valid if ``dim`` is not provided. + promote_attrs : bool, default False + Set to True to shallow copy attrs of DataArray to returned Dataset. Returns ------- @@ -500,9 +508,14 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: if dim is not None: if name is not None: raise TypeError("cannot supply both dim and name arguments") - return self._to_dataset_split(dim) + result = self._to_dataset_split(dim) else: - return self._to_dataset_whole(name) + result = self._to_dataset_whole(name) + + if promote_attrs: + result.attrs = dict(self.attrs) + + return result @property def name(self) -> Optional[Hashable]: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b7ce0ec4e1e..6f96e4f469c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -536,7 +536,7 @@ def __init__( if isinstance(coords, Dataset): coords = coords.variables - variables, coord_names, dims, indexes = merge_data_and_coords( + variables, coord_names, dims, indexes, _ = merge_data_and_coords( data_vars, coords, compat="broadcast_equals" ) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 1d1b8d39a20..fea94246471 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, dict_equiv +from .utils import Frozen, compat_dict_union, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -491,17 +491,54 @@ def assert_valid_explicit_coords(variables, dims, explicit_coords): ) +def merge_attrs(variable_attrs, combine_attrs): + """Combine attributes from different variables according to combine_attrs + """ + if not variable_attrs: + # no attributes to merge + return None + + if combine_attrs == "drop": + return {} + elif combine_attrs == "override": + return variable_attrs[0] + elif combine_attrs == "no_conflicts": + result = dict(variable_attrs[0]) + for attrs in variable_attrs[1:]: + try: + result = compat_dict_union(result, attrs) + except ValueError: + raise MergeError( + "combine_attrs='no_conflicts', but some values are not " + "the same. Merging %s with %s" % (str(result), str(attrs)) + ) + return result + elif combine_attrs == "identical": + result = dict(variable_attrs[0]) + for attrs in variable_attrs[1:]: + if not dict_equiv(result, attrs): + raise MergeError( + "combine_attrs='identical', but attrs differ. First is %s " + ", other is %s." % (str(result), str(attrs)) + ) + return result + else: + raise ValueError("Unrecognised value for combine_attrs=%s" % combine_attrs) + + class _MergeResult(NamedTuple): variables: Dict[Hashable, Variable] coord_names: Set[Hashable] dims: Dict[Hashable, int] indexes: Dict[Hashable, pd.Index] + attrs: Dict[Hashable, Any] def merge_core( objects: Iterable["CoercibleMapping"], compat: str = "broadcast_equals", join: str = "outer", + combine_attrs: Optional[str] = "override", priority_arg: Optional[int] = None, explicit_coords: Optional[Sequence] = None, indexes: Optional[Mapping[Hashable, pd.Index]] = None, @@ -519,6 +556,8 @@ def merge_core( Compatibility checks to use when merging variables. join : {'outer', 'inner', 'left', 'right'}, optional How to combine objects with different indexes. + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, optional + How to combine attributes of objects priority_arg : integer, optional Optional argument in `objects` that takes precedence over the others. explicit_coords : set, optional @@ -536,12 +575,15 @@ def merge_core( Set of coordinate names. dims : dict Dictionary mapping from dimension names to sizes. + attrs : dict + Dictionary of attributes Raises ------ MergeError if the merge cannot be done successfully. """ - from .dataset import calculate_dimensions + from .dataarray import DataArray + from .dataset import Dataset, calculate_dimensions _assert_compat_valid(compat) @@ -571,7 +613,16 @@ def merge_core( "coordinates or not in the merged result: %s" % ambiguous_coords ) - return _MergeResult(variables, coord_names, dims, out_indexes) + attrs = merge_attrs( + [ + var.attrs + for var in coerced + if isinstance(var, Dataset) or isinstance(var, DataArray) + ], + combine_attrs, + ) + + return _MergeResult(variables, coord_names, dims, out_indexes, attrs) def merge( @@ -579,6 +630,7 @@ def merge( compat: str = "no_conflicts", join: str = "outer", fill_value: object = dtypes.NA, + combine_attrs: str = "drop", ) -> "Dataset": """Merge any number of xarray objects into a single Dataset as variables. @@ -614,6 +666,16 @@ def merge( dimension must have the same size in all objects. fill_value : scalar, optional Value to use for newly missing values + combine_attrs : {'drop', 'identical', 'no_conflicts', 'override'}, + default 'drop' + String indicating how to combine attrs of the objects being merged: + + - 'drop': empty attrs on returned Dataset. + - 'identical': all attrs must be the same on every object. + - 'no_conflicts': attrs from all objects are combined, any that have + the same name must also have the same value. + - 'override': skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -787,10 +849,16 @@ def merge( "Dataset(s), DataArray(s), and dictionaries." ) - obj = obj.to_dataset() if isinstance(obj, DataArray) else obj + obj = obj.to_dataset(promote_attrs=True) if isinstance(obj, DataArray) else obj dict_like_objects.append(obj) - merge_result = merge_core(dict_like_objects, compat, join, fill_value=fill_value) + merge_result = merge_core( + dict_like_objects, + compat, + join, + combine_attrs=combine_attrs, + fill_value=fill_value, + ) merged = Dataset._construct_direct(**merge_result._asdict()) return merged @@ -861,4 +929,9 @@ def dataset_update_method( if coord_names: other[key] = value.drop_vars(coord_names) - return merge_core([dataset, other], priority_arg=1, indexes=dataset.indexes) + return merge_core( + [dataset, other], + priority_arg=1, + indexes=dataset.indexes, + combine_attrs="override", + ) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index e335365d5ca..5570f9e9a80 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -184,7 +184,7 @@ def peek_at(iterable: Iterable[T]) -> Tuple[T, Iterator[T]]: def update_safety_check( - first_dict: MutableMapping[K, V], + first_dict: Mapping[K, V], second_dict: Mapping[K, V], compat: Callable[[V, V], bool] = equivalent, ) -> None: @@ -361,6 +361,9 @@ def ordered_dict_intersection( Binary operator to determine if two values are compatible. By default, checks for equivalence. + # TODO: Rename to compat_dict_intersection, as we do not use OrderedDicts + # any more. + Returns ------- intersection : dict @@ -371,6 +374,35 @@ def ordered_dict_intersection( return new_dict +def compat_dict_union( + first_dict: Mapping[K, V], + second_dict: Mapping[K, V], + compat: Callable[[V, V], bool] = equivalent, +) -> MutableMapping[K, V]: + """Return the union of two dictionaries as a new dictionary. + + An exception is raised if any keys are found in both dictionaries and the + values are not compatible. + + Parameters + ---------- + first_dict, second_dict : dict-like + Mappings to merge. + compat : function, optional + Binary operator to determine if two values are compatible. By default, + checks for equivalence. + + Returns + ------- + union : dict + union of the contents. + """ + new_dict = dict(first_dict) + update_safety_check(first_dict, second_dict, compat) + new_dict.update(second_dict) + return new_dict + + class Frozen(Mapping[K, V]): """Wrapper around an object implementing the mapping interface to make it immutable. If you really want to modify the mapping, the mutable version is diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index eb2c6e1dbf7..c3f981f10d1 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -503,6 +503,49 @@ def test_auto_combine_2d(self): result = combine_nested(datasets, concat_dim=["dim1", "dim2"]) assert_equal(result, expected) + def test_auto_combine_2d_combine_attrs_kwarg(self): + ds = create_test_data + + partway1 = concat([ds(0), ds(3)], dim="dim1") + partway2 = concat([ds(1), ds(4)], dim="dim1") + partway3 = concat([ds(2), ds(5)], dim="dim1") + expected = concat([partway1, partway2, partway3], dim="dim2") + + expected_dict = {} + expected_dict["drop"] = expected.copy(deep=True) + expected_dict["drop"].attrs = {} + expected_dict["no_conflicts"] = expected.copy(deep=True) + expected_dict["no_conflicts"].attrs = { + "a": 1, + "b": 2, + "c": 3, + "d": 4, + "e": 5, + "f": 6, + } + expected_dict["override"] = expected.copy(deep=True) + expected_dict["override"].attrs = {"a": 1} + + datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] + + datasets[0][0].attrs = {"a": 1} + datasets[0][1].attrs = {"a": 1, "b": 2} + datasets[0][2].attrs = {"a": 1, "c": 3} + datasets[1][0].attrs = {"a": 1, "d": 4} + datasets[1][1].attrs = {"a": 1, "e": 5} + datasets[1][2].attrs = {"a": 1, "f": 6} + + with raises_regex(ValueError, "combine_attrs='identical'"): + result = combine_nested( + datasets, concat_dim=["dim1", "dim2"], combine_attrs="identical" + ) + + for combine_attrs in expected_dict: + result = combine_nested( + datasets, concat_dim=["dim1", "dim2"], combine_attrs=combine_attrs + ) + assert_identical(result, expected_dict[combine_attrs]) + def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. @@ -642,6 +685,52 @@ def test_combine_coords_join_exact(self): with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact") + @pytest.mark.parametrize( + "combine_attrs, expected", + [ + ("drop", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={})), + ( + "no_conflicts", + Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "b": 2}), + ), + ("override", Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1})), + ], + ) + def test_combine_coords_combine_attrs(self, combine_attrs, expected): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 2}), + ] + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs=combine_attrs + ) + assert_identical(expected, actual) + + if combine_attrs == "no_conflicts": + objs[1].attrs["a"] = 2 + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs=combine_attrs + ) + + def test_combine_coords_combine_attrs_identical(self): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1}), + ] + expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1}) + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="identical" + ) + assert_identical(expected, actual) + + objs[1].attrs["b"] = 2 + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="identical" + ) + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 1a498496c03..e5038dd4af2 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -256,6 +256,28 @@ def test_concat_join_kwarg(self): ) assert_identical(actual, expected) + def test_concat_combine_attrs_kwarg(self): + ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs={"b": 42}) + ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs={"b": 42, "c": 43}) + + expected = {} + expected["drop"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}) + expected["no_conflicts"] = Dataset( + {"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42, "c": 43} + ) + expected["override"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42}) + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = concat([ds1, ds2], dim="x", combine_attrs="identical") + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + ds3 = ds2.copy(deep=True) + ds3.attrs["b"] = 44 + actual = concat([ds1, ds3], dim="x", combine_attrs="no_conflicts") + + for combine_attrs in expected: + actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + assert_identical(actual, expected[combine_attrs]) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] @@ -469,6 +491,30 @@ def test_concat_join_kwarg(self): actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join].to_array()) + def test_concat_combine_attrs_kwarg(self): + da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) + da2 = DataArray([0], coords=[("x", [1])], attrs={"b": 42, "c": 43}) + + expected = {} + expected["drop"] = DataArray([0, 0], coords=[("x", [0, 1])]) + expected["no_conflicts"] = DataArray( + [0, 0], coords=[("x", [0, 1])], attrs={"b": 42, "c": 43} + ) + expected["override"] = DataArray( + [0, 0], coords=[("x", [0, 1])], attrs={"b": 42} + ) + + with raises_regex(ValueError, "combine_attrs='identical'"): + actual = concat([da1, da2], dim="x", combine_attrs="identical") + with raises_regex(ValueError, "combine_attrs='no_conflicts'"): + da3 = da2.copy(deep=True) + da3.attrs["b"] = 44 + actual = concat([da1, da3], dim="x", combine_attrs="no_conflicts") + + for combine_attrs in expected: + actual = concat([da1, da2], dim="x", combine_attrs=combine_attrs) + assert_identical(actual, expected[combine_attrs]) + @pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) @pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fbd9810f285..4f19dc2a9cf 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3750,9 +3750,16 @@ def test_to_dataset_whole(self): expected = Dataset({"foo": ("x", [1, 2])}) assert_identical(expected, actual) - named = DataArray([1, 2], dims="x", name="foo") + named = DataArray([1, 2], dims="x", name="foo", attrs={"y": "testattr"}) actual = named.to_dataset() - expected = Dataset({"foo": ("x", [1, 2])}) + expected = Dataset({"foo": ("x", [1, 2], {"y": "testattr"})}) + assert_identical(expected, actual) + + # Test promoting attrs + actual = named.to_dataset(promote_attrs=True) + expected = Dataset( + {"foo": ("x", [1, 2], {"y": "testattr"})}, attrs={"y": "testattr"} + ) assert_identical(expected, actual) with pytest.raises(TypeError): diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 6c8f3f65657..9057575b38c 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -3,6 +3,7 @@ import xarray as xr from xarray.core import dtypes, merge +from xarray.core.merge import MergeError from xarray.testing import assert_identical from . import raises_regex @@ -49,6 +50,65 @@ def test_merge_dataarray_unnamed(self): with raises_regex(ValueError, "without providing an explicit name"): xr.merge([data]) + def test_merge_arrays_attrs_default(self): + var1_attrs = {"a": 1, "b": 2} + var2_attrs = {"a": 1, "c": 3} + expected_attrs = {} + + data = create_test_data() + data.var1.attrs = var1_attrs + data.var2.attrs = var2_attrs + actual = xr.merge([data.var1, data.var2]) + expected = data[["var1", "var2"]] + expected.attrs = expected_attrs + assert actual.identical(expected) + + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, " "expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ], + ) + def test_merge_arrays_attrs( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + data = create_test_data() + data.var1.attrs = var1_attrs + data.var2.attrs = var2_attrs + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) + else: + actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) + expected = data[["var1", "var2"]] + expected.attrs = expected_attrs + assert actual.identical(expected) + def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index af87b94393d..ddca6c57064 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,7 +9,7 @@ from xarray.core import duck_array_ops, utils from xarray.core.utils import either_dict_or_kwargs -from . import assert_array_equal, requires_cftime, requires_dask +from . import assert_array_equal, raises_regex, requires_cftime, requires_dask from .test_coding_times import _all_cftime_date_types @@ -124,6 +124,15 @@ def test_ordered_dict_intersection(self): assert {"b": "B"} == utils.ordered_dict_intersection(self.x, self.y) assert {} == utils.ordered_dict_intersection(self.x, self.z) + def test_compat_dict_union(self): + assert {"a": "A", "b": "B", "c": "C"} == utils.compat_dict_union(self.x, self.y) + with raises_regex( + ValueError, + "unsafe to merge dictionaries without " + "overriding values; conflicting key", + ): + utils.compat_dict_union(self.x, self.z) + def test_dict_equiv(self): x = {} x["a"] = 3 From 009aa66620b3437cf0de675013fa7d1ff231963c Mon Sep 17 00:00:00 2001 From: johnomotani Date: Tue, 24 Mar 2020 22:59:06 +0000 Subject: [PATCH 49/54] Rename ordered_dict_intersection -> compat_dict_intersection (#3887) Do not use OrderedDicts any more, so name did not make sense. --- xarray/core/utils.py | 5 +---- xarray/tests/test_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 5570f9e9a80..896ee31ab5c 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -343,7 +343,7 @@ def dict_equiv( return True -def ordered_dict_intersection( +def compat_dict_intersection( first_dict: Mapping[K, V], second_dict: Mapping[K, V], compat: Callable[[V, V], bool] = equivalent, @@ -361,9 +361,6 @@ def ordered_dict_intersection( Binary operator to determine if two values are compatible. By default, checks for equivalence. - # TODO: Rename to compat_dict_intersection, as we do not use OrderedDicts - # any more. - Returns ------- intersection : dict diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index ddca6c57064..5f8b1770bd3 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -120,9 +120,9 @@ def test_unsafe(self): with pytest.raises(ValueError): utils.update_safety_check(self.x, self.z) - def test_ordered_dict_intersection(self): - assert {"b": "B"} == utils.ordered_dict_intersection(self.x, self.y) - assert {} == utils.ordered_dict_intersection(self.x, self.z) + def test_compat_dict_intersection(self): + assert {"b": "B"} == utils.compat_dict_intersection(self.x, self.y) + assert {} == utils.compat_dict_intersection(self.x, self.z) def test_compat_dict_union(self): assert {"a": "A", "b": "B", "c": "C"} == utils.compat_dict_union(self.x, self.y) From f583ac7b125cf33f11dba9d948d6cfffac47317e Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 25 Mar 2020 11:34:31 -0400 Subject: [PATCH 50/54] misplaced quote in whatsnew (#3889) --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4515f552812..594dcad5a19 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -789,12 +789,13 @@ Bug fixes - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. - Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when - `keep_attrs` was True (:issue:`3304`). By David Huard ``_. + `keep_attrs` was True (:issue:`3304`). By `David Huard `_. Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR +- Created a `PR checklist `_ + as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. From ec215daecec642db94102dc24156448f8440f52d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 25 Mar 2020 13:17:44 -0400 Subject: [PATCH 51/54] Implementation of polyfit and polyval (#3733) * [WIP] Implementation of polyfit and polyval - minimum testing - no docs * Formatting with black, flake8 * Fix failing test * More intelligent skipna switching * Add docs | Change coeff order to fit numpy | move polyval * Move doc patching to class * conditional doc patching * Fix windows fail - more efficient nan skipping * Fix typo in least_squares * Move polyfit to dataset * Add more tests | fix some edge cases * Skip test without dask * Fix 1D case | add docs * skip polyval test without dask * Explicit docs | More restrictive polyval * Small typo in polyfit docstrings * Apply suggestions from code review Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Polyfit : fix style in docstring | add see also section * Clean up docstrings and documentation. * Move whats new entry to 0.16 | fix PEP8 issue in test_dataarray Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/api.rst | 3 + doc/computation.rst | 26 ++++ doc/whats-new.rst | 2 + xarray/__init__.py | 3 +- xarray/core/computation.py | 32 +++++ xarray/core/dask_array_ops.py | 27 +++++ xarray/core/dataarray.py | 62 ++++++++++ xarray/core/dataset.py | 179 ++++++++++++++++++++++++++++ xarray/core/duck_array_ops.py | 9 ++ xarray/core/nputils.py | 33 +++++ xarray/tests/test_computation.py | 32 +++++ xarray/tests/test_dataarray.py | 50 ++++++++ xarray/tests/test_dataset.py | 13 ++ xarray/tests/test_duck_array_ops.py | 18 +++ 14 files changed, 488 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index b9c3e3bdd33..216f47f988f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -30,6 +30,7 @@ Top-level functions zeros_like ones_like dot + polyval map_blocks show_versions set_options @@ -172,6 +173,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.polyfit **Aggregation**: :py:attr:`~Dataset.all` @@ -352,6 +354,7 @@ Computation DataArray.quantile DataArray.differentiate DataArray.integrate + DataArray.polyfit DataArray.str **Aggregation**: diff --git a/doc/computation.rst b/doc/computation.rst index 5309f27e9b6..4b8014c4782 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -401,6 +401,32 @@ trapezoidal rule using their coordinates, and integration along multidimensional coordinate are not supported. +.. _compute.polyfit: + +Fitting polynomials +=================== + +Xarray objects provide an interface for performing linear or polynomial regressions +using the least-squares method. :py:meth:`~xarray.DataArray.polyfit` computes the +best fitting coefficients along a given dimension and for a given order, + +.. ipython:: python + + x = xr.DataArray(np.arange(10), dims=['x'], name='x') + a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) + out = a.polyfit(dim='x', deg=1, full=True) + out + +The method outputs a dataset containing the coefficients (and more if `full=True`). +The inverse operation is done with :py:meth:`~xarray.polyval`, + +.. ipython:: python + + xr.polyval(coord=x, coeffs=out.polyfit_coefficients) + +.. note:: + These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`. + .. _compute.broadcasting: Broadcasting by dimension name diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 594dcad5a19..eebd04123d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,8 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`) + By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) diff --git a/xarray/__init__.py b/xarray/__init__.py index 331d8ecb09a..0fead57e5fb 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.alignment import align, broadcast from .core.combine import auto_combine, combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like -from .core.computation import apply_ufunc, dot, where +from .core.computation import apply_ufunc, dot, polyval, where from .core.concat import concat from .core.dataarray import DataArray from .core.dataset import Dataset @@ -65,6 +65,7 @@ "open_mfdataset", "open_rasterio", "open_zarr", + "polyval", "register_dataarray_accessor", "register_dataset_accessor", "save_mfdataset", diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f2941a3d0ba..13bf6248331 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1306,3 +1306,35 @@ def where(cond, x, y): dataset_join="exact", dask="allowed", ) + + +def polyval(coord, coeffs, degree_dim="degree"): + """Evaluate a polynomial at specific values + + Parameters + ---------- + coord : DataArray + The 1D coordinate along which to evaluate the polynomial. + coeffs : DataArray + Coefficients of the polynomials. + degree_dim : str, default "degree" + Name of the polynomial degree dimension in `coeffs`. + + See also + -------- + xarray.DataArray.polyfit + numpy.polyval + """ + from .dataarray import DataArray + from .missing import get_clean_interp_index + + x = get_clean_interp_index(coord, coord.name) + + deg_coord = coeffs[degree_dim] + + lhs = DataArray( + np.vander(x, int(deg_coord.max()) + 1), + dims=(coord.name, degree_dim), + coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, + ) + return (lhs * coeffs).sum(degree_dim) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 37f261cc3ad..87f646352eb 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -95,3 +95,30 @@ def func(x, window, axis=-1): # crop boundary. index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),) return out[index] + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + import dask.array as da + + lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1])) + if skipna: + added_dim = rhs.ndim == 1 + if added_dim: + rhs = rhs.reshape(rhs.shape[0], 1) + results = da.apply_along_axis( + nputils._nanpolyfit_1d, + 0, + rhs, + lhs_da, + dtype=float, + shape=(lhs.shape[1] + 1,), + rcond=rcond, + ) + coeffs = results[:-1, ...] + residuals = results[-1, ...] + if added_dim: + coeffs = coeffs.reshape(coeffs.shape[0]) + residuals = residuals.reshape(residuals.shape[0]) + else: + coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs) + return coeffs, residuals diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 232fb86144e..070886cfc34 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3275,6 +3275,68 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def polyfit( + self, + dim: Hashable, + deg: int, + skipna: bool = None, + rcond: float = None, + w: Union[Hashable, Any] = None, + full: bool = False, + cov: bool = False, + ): + """ + Least squares polynomial fit. + + This replicates the behaviour of `numpy.polyfit` but differs by skipping + invalid values when `skipna = True`. + + Parameters + ---------- + dim : hashable + Coordinate along which to fit the polynomials. + deg : int + Degree of the fitting polynomial. + skipna : bool, optional + If True, removes all invalid values before fitting each 1D slices of the array. + Default is True if data is stored in a dask.array or if there is any + invalid values, False otherwise. + rcond : float, optional + Relative condition number to the fit. + w : Union[Hashable, Any], optional + Weights to apply to the y-coordinate of the sample points. + Can be an array-like object or the name of a coordinate in the dataset. + full : bool, optional + Whether to return the residuals, matrix rank and singular values in addition + to the coefficients. + cov : Union[bool, str], optional + Whether to return to the covariance matrix in addition to the coefficients. + The matrix is not scaled if `cov='unscaled'`. + + Returns + ------- + polyfit_results : Dataset + A single dataset which contains: + + polyfit_coefficients + The coefficients of the best fit. + polyfit_residuals + The residuals of the least-square computation (only included if `full=True`) + [dim]_matrix_rank + The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [dim]_singular_value + The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`) + polyfit_covariance + The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) + + See also + -------- + numpy.polyfit + """ + return self._to_temp_dataset().polyfit( + dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov + ) + def pad( self, pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6f96e4f469c..c49694b1fc0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -76,6 +76,7 @@ merge_coordinates_without_align, merge_data_and_coords, ) +from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import ( @@ -5748,6 +5749,184 @@ def map_blocks( return map_blocks(func, self, args, kwargs) + def polyfit( + self, + dim: Hashable, + deg: int, + skipna: bool = None, + rcond: float = None, + w: Union[Hashable, Any] = None, + full: bool = False, + cov: Union[bool, str] = False, + ): + """ + Least squares polynomial fit. + + This replicates the behaviour of `numpy.polyfit` but differs by skipping + invalid values when `skipna = True`. + + Parameters + ---------- + dim : hashable + Coordinate along which to fit the polynomials. + deg : int + Degree of the fitting polynomial. + skipna : bool, optional + If True, removes all invalid values before fitting each 1D slices of the array. + Default is True if data is stored in a dask.array or if there is any + invalid values, False otherwise. + rcond : float, optional + Relative condition number to the fit. + w : Union[Hashable, Any], optional + Weights to apply to the y-coordinate of the sample points. + Can be an array-like object or the name of a coordinate in the dataset. + full : bool, optional + Whether to return the residuals, matrix rank and singular values in addition + to the coefficients. + cov : Union[bool, str], optional + Whether to return to the covariance matrix in addition to the coefficients. + The matrix is not scaled if `cov='unscaled'`. + + + Returns + ------- + polyfit_results : Dataset + A single dataset which contains (for each "var" in the input dataset): + + [var]_polyfit_coefficients + The coefficients of the best fit for each variable in this dataset. + [var]_polyfit_residuals + The residuals of the least-square computation for each variable (only included if `full=True`) + [dim]_matrix_rank + The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [dim]_singular_values + The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`) + [var]_polyfit_covariance + The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) + + See also + -------- + numpy.polyfit + """ + variables = {} + skipna_da = skipna + + x = get_clean_interp_index(self, dim) + xname = "{}_".format(self[dim].name) + order = int(deg) + 1 + lhs = np.vander(x, order) + + if rcond is None: + rcond = x.shape[0] * np.core.finfo(x.dtype).eps + + # Weights: + if w is not None: + if isinstance(w, Hashable): + w = self.coords[w] + w = np.asarray(w) + if w.ndim != 1: + raise TypeError("Expected a 1-d array for weights.") + if w.shape[0] != lhs.shape[0]: + raise TypeError("Expected w and {} to have the same length".format(dim)) + lhs *= w[:, np.newaxis] + + # Scaling + scale = np.sqrt((lhs * lhs).sum(axis=0)) + lhs /= scale + + degree_dim = utils.get_temp_dimname(self.dims, "degree") + + rank = np.linalg.matrix_rank(lhs) + if rank != order and not full: + warnings.warn( + "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4 + ) + + if full: + rank = xr.DataArray(rank, name=xname + "matrix_rank") + variables[rank.name] = rank + sing = np.linalg.svd(lhs, compute_uv=False) + sing = xr.DataArray( + sing, + dims=(degree_dim,), + coords={degree_dim: np.arange(order)[::-1]}, + name=xname + "singular_values", + ) + variables[sing.name] = sing + + for name, da in self.data_vars.items(): + if dim not in da.dims: + continue + + if skipna is None: + if isinstance(da.data, dask_array_type): + skipna_da = True + else: + skipna_da = np.any(da.isnull()) + + dims_to_stack = [dimname for dimname in da.dims if dimname != dim] + stacked_coords = {} + if dims_to_stack: + stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") + rhs = da.transpose(dim, *dims_to_stack).stack( + {stacked_dim: dims_to_stack} + ) + stacked_coords = {stacked_dim: rhs[stacked_dim]} + scale_da = scale[:, np.newaxis] + else: + rhs = da + scale_da = scale + + if w is not None: + rhs *= w[:, np.newaxis] + + coeffs, residuals = duck_array_ops.least_squares( + lhs, rhs.data, rcond=rcond, skipna=skipna_da + ) + + if isinstance(name, str): + name = "{}_".format(name) + else: + # Thus a ReprObject => polyfit was called on a DataArray + name = "" + + coeffs = xr.DataArray( + coeffs / scale_da, + dims=[degree_dim] + list(stacked_coords.keys()), + coords={degree_dim: np.arange(order)[::-1], **stacked_coords}, + name=name + "polyfit_coefficients", + ) + if dims_to_stack: + coeffs = coeffs.unstack(stacked_dim) + variables[coeffs.name] = coeffs + + if full or (cov is True): + residuals = xr.DataArray( + residuals if dims_to_stack else residuals.squeeze(), + dims=list(stacked_coords.keys()), + coords=stacked_coords, + name=name + "polyfit_residuals", + ) + if dims_to_stack: + residuals = residuals.unstack(stacked_dim) + variables[residuals.name] = residuals + + if cov: + Vbase = np.linalg.inv(np.dot(lhs.T, lhs)) + Vbase /= np.outer(scale, scale) + if cov == "unscaled": + fac = 1 + else: + if x.shape[0] <= order: + raise ValueError( + "The number of data points must exceed order to scale the covariance matrix." + ) + fac = residuals / (x.shape[0] - order) + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + variables[name + "polyfit_covariance"] = covariance + + return Dataset(data_vars=variables, attrs=self.attrs.copy()) + def pad( self, pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index ff2d0af63ed..4047a1e68e1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -597,3 +597,12 @@ def rolling_window(array, axis, window, center, fill_value): return dask_array_ops.rolling_window(array, axis, window, center, fill_value) else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + """Return the coefficients and residuals of a least-squares fit. + """ + if isinstance(rhs, dask_array_type): + return dask_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) + else: + return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 5dd8219ebca..fa6df63e0ea 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -220,6 +220,39 @@ def f(values, axis=None, **kwargs): return f +def _nanpolyfit_1d(arr, x, rcond=None): + out = np.full((x.shape[1] + 1,), np.nan) + mask = np.isnan(arr) + if not np.all(mask): + out[:-1], out[-1], _, _ = np.linalg.lstsq(x[~mask, :], arr[~mask], rcond=rcond) + return out + + +def least_squares(lhs, rhs, rcond=None, skipna=False): + if skipna: + added_dim = rhs.ndim == 1 + if added_dim: + rhs = rhs.reshape(rhs.shape[0], 1) + nan_cols = np.any(np.isnan(rhs), axis=0) + out = np.empty((lhs.shape[1] + 1, rhs.shape[1])) + if np.any(nan_cols): + out[:, nan_cols] = np.apply_along_axis( + _nanpolyfit_1d, 0, rhs[:, nan_cols], lhs + ) + if np.any(~nan_cols): + out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq( + lhs, rhs[:, ~nan_cols], rcond=rcond + ) + coeffs = out[:-1, :] + residuals = out[-1, :] + if added_dim: + coeffs = coeffs.reshape(coeffs.shape[0]) + residuals = residuals.reshape(residuals.shape[0]) + else: + coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs, rcond=rcond) + return coeffs, residuals + + nanmin = _create_bottleneck_method("nanmin") nanmax = _create_bottleneck_method("nanmax") nanmean = _create_bottleneck_method("nanmean") diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 369903552ad..4eed464d2dc 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1120,3 +1120,35 @@ def test_where(): actual = xr.where(cond, 1, 0) expected = xr.DataArray([1, 0], dims="x") assert_identical(expected, actual) + + +@pytest.mark.parametrize("use_dask", [True, False]) +@pytest.mark.parametrize("use_datetime", [True, False]) +def test_polyval(use_dask, use_datetime): + if use_dask and not has_dask: + pytest.skip("requires dask") + + if use_datetime: + xcoord = xr.DataArray( + pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x" + ) + x = xr.core.missing.get_clean_interp_index(xcoord, "x") + else: + xcoord = x = np.arange(10) + + da = xr.DataArray( + np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)), + dims=("d", "x"), + coords={"x": xcoord, "d": [0, 1]}, + ) + coeffs = xr.DataArray( + [[2, 1, 1], [3, 2, 1]], + dims=("d", "degree"), + coords={"d": [0, 1], "degree": [2, 1, 0]}, + ) + if use_dask: + coeffs = coeffs.chunk({"d": 2}) + + da_pv = xr.polyval(da.x, coeffs) + + xr.testing.assert_allclose(da, da_pv.T) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4f19dc2a9cf..e23ff2f7e31 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -23,6 +23,7 @@ assert_array_equal, assert_equal, assert_identical, + has_dask, raises_regex, requires_bottleneck, requires_dask, @@ -4191,6 +4192,55 @@ def test_rank(self): y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",)) assert_equal(y.rank("z", pct=True), y) + @pytest.mark.parametrize("use_dask", [True, False]) + @pytest.mark.parametrize("use_datetime", [True, False]) + def test_polyfit(self, use_dask, use_datetime): + if use_dask and not has_dask: + pytest.skip("requires dask") + xcoord = xr.DataArray( + pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x" + ) + x = xr.core.missing.get_clean_interp_index(xcoord, "x") + if not use_datetime: + xcoord = x + + da_raw = DataArray( + np.stack( + (10 + 1e-15 * x + 2e-28 * x ** 2, 30 + 2e-14 * x + 1e-29 * x ** 2) + ), + dims=("d", "x"), + coords={"x": xcoord, "d": [0, 1]}, + ) + + if use_dask: + da = da_raw.chunk({"d": 1}) + else: + da = da_raw + + out = da.polyfit("x", 2) + expected = DataArray( + [[2e-28, 1e-15, 10], [1e-29, 2e-14, 30]], + dims=("d", "degree"), + coords={"degree": [2, 1, 0], "d": [0, 1]}, + ).T + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + + # With NaN + da_raw[0, 1] = np.nan + if use_dask: + da = da_raw.chunk({"d": 1}) + else: + da = da_raw + out = da.polyfit("x", 2, skipna=True, cov=True) + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + assert "polyfit_covariance" in out + + # Skipna + Full output + out = da.polyfit("x", 2, skipna=True, full=True) + assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3) + assert out.x_matrix_rank == 3 + np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0]) + def test_pad_constant(self): ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad(dim_0=(1, 3)) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 20b814a25c7..02698253e5d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5499,6 +5499,19 @@ def test_ipython_key_completion(self): ds.data_vars[item] # should not raise assert sorted(actual) == sorted(expected) + def test_polyfit_output(self): + ds = create_test_data(seed=1) + + out = ds.polyfit("dim2", 2, full=False) + assert "var1_polyfit_coefficients" in out + + out = ds.polyfit("dim1", 2, full=True) + assert "var1_polyfit_coefficients" in out + assert "dim1_matrix_rank" in out + + out = ds.polyfit("time", 2) + assert len(out.data_vars) == 0 + def test_pad(self): ds = create_test_data(seed=1) padded = ds.pad(dim2=(1, 1), constant_values=42) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 157cd16cba6..e61881cfce3 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -16,6 +16,7 @@ first, gradient, last, + least_squares, mean, np_timedelta64_to_float, pd_timedelta_to_float, @@ -761,3 +762,20 @@ def test_timedelta_to_numeric(td): out = timedelta_to_numeric(td, "ns") np.testing.assert_allclose(out, 86400 * 1e9) assert isinstance(out, float) + + +@pytest.mark.parametrize("use_dask", [True, False]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_least_squares(use_dask, skipna): + if use_dask and not has_dask: + pytest.skip("requires dask") + lhs = np.array([[1, 2], [1, 2], [3, 2]]) + rhs = DataArray(np.array([3, 5, 7]), dims=("y",)) + + if use_dask: + rhs = rhs.chunk({"y": 1}) + + coeffs, residuals = least_squares(lhs, rhs.data, skipna=skipna) + + np.testing.assert_allclose(coeffs, [1.5, 1.25]) + np.testing.assert_allclose(residuals, [2.0]) From 6378a711d50ba7f1ba9b2a451d4d1f5e1fb37353 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Wed, 25 Mar 2020 20:47:42 -0400 Subject: [PATCH 52/54] Use drawstyle instead of linestyle in plot.step. (#3274) Mixing the two is deprecated in Matplotlib 3.1, and breaks the doc build if warnings are set to errors (which they are in new IPython sphinx extensions.) --- doc/whats-new.rst | 7 ++++++- xarray/plot/plot.py | 18 +++++++++--------- xarray/plot/utils.py | 4 ++-- xarray/tests/test_plot.py | 4 ++++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eebd04123d1..14941228c88 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -20,7 +20,12 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ - +- Alternate draw styles for :py:meth:`plot.step` must be passed using the + ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or + ``ls``) keyword argument, in line with the `upstream change in Matplotlib + `_. + (:pull:`3274`) + By `Elliott Sales de Andrade `_ New Features ~~~~~~~~~~~~ diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 98131887e28..302cac05b05 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -329,7 +329,7 @@ def line( return primitive -def step(darray, *args, where="pre", linestyle=None, ls=None, **kwargs): +def step(darray, *args, where="pre", drawstyle=None, ds=None, **kwargs): """ Step plot of DataArray index against values @@ -359,16 +359,16 @@ def step(darray, *args, where="pre", linestyle=None, ls=None, **kwargs): if where not in {"pre", "post", "mid"}: raise ValueError("'where' argument to step must be " "'pre', 'post' or 'mid'") - if ls is not None: - if linestyle is None: - linestyle = ls + if ds is not None: + if drawstyle is None: + drawstyle = ds else: - raise TypeError("ls and linestyle are mutually exclusive") - if linestyle is None: - linestyle = "" - linestyle = "steps-" + where + linestyle + raise TypeError("ds and drawstyle are mutually exclusive") + if drawstyle is None: + drawstyle = "" + drawstyle = "steps-" + where + drawstyle - return line(darray, *args, linestyle=linestyle, **kwargs) + return line(darray, *args, drawstyle=drawstyle, **kwargs) def hist( diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index cb3bef6d409..e6c15037cb8 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -465,7 +465,7 @@ def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): """ # Is it a step plot? (see matplotlib.Axes.step) - if kwargs.get("linestyle", "").startswith("steps-"): + if kwargs.get("drawstyle", "").startswith("steps-"): # Convert intervals to double points if _valid_other_type(np.array([xval, yval]), [pd.Interval]): @@ -476,7 +476,7 @@ def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): yval, xval = _interval_to_double_bound_points(yval, xval) # Remove steps-* to be sure that matplotlib is not confused - del kwargs["linestyle"] + del kwargs["drawstyle"] # Is it another kind of plot? else: diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index c1549c62038..7f3f1620133 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -591,6 +591,10 @@ def setUp(self): def test_step(self): self.darray[0, 0].plot.step() + @pytest.mark.parametrize("ds", ["pre", "post", "mid"]) + def test_step_with_drawstyle(self, ds): + self.darray[0, 0].plot.step(drawstyle=ds) + def test_coord_with_interval_step(self): """Test step plot with intervals.""" bins = [-1, 0, 1, 2] From b3066746efd412cbc9b6c6aafd64229f4c9122f3 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 27 Mar 2020 15:37:25 -0700 Subject: [PATCH 53/54] expose a few zarr backend functions as semi-public api (#3897) * expose a few zarr backend functions as semi-public api * black * update equality check for chunks --- xarray/backends/zarr.py | 44 +++++++++++++++++++++++--------- xarray/tests/test_backends.py | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2469a31a3d9..cdc74e06882 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -10,13 +10,20 @@ from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name # need some special secret attributes to tell us the dimensions -_DIMENSION_KEY = "_ARRAY_DIMENSIONS" +DIMENSION_KEY = "_ARRAY_DIMENSIONS" -# zarr attributes have to be serializable as json -# many xarray datasets / variables have numpy arrays and values -# these functions handle encoding / decoding of such items -def _encode_zarr_attr_value(value): +def encode_zarr_attr_value(value): + """ + Encode a attribute value as something that can be serialized as json + + Many xarray datasets / variables have numpy arrays and values. This + function handles encoding / decoding of such items. + + ndarray -> list + scalar array -> scalar + other -> other (no change) + """ if isinstance(value, np.ndarray): encoded = value.tolist() # this checks if it's a scalar number @@ -170,7 +177,20 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def _extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False): + """ + Extract zarr encoding dictionary from xarray Variable + + Parameters + ---------- + variable : xarray.Variable + raise_on_invalid : bool, optional + + Returns + ------- + encoding : dict + Zarr encoding for `variable` + """ encoding = variable.encoding.copy() valid_encodings = {"chunks", "compressor", "filters", "cache_metadata"} @@ -271,7 +291,7 @@ def __init__(self, zarr_group, consolidate_on_close=False): def open_store_variable(self, name, zarr_array): data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self)) - dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY) + dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY) attributes = dict(attributes) encoding = { "chunks": zarr_array.chunks, @@ -298,7 +318,7 @@ def get_dimensions(self): dimensions = {} for k, v in self.ds.arrays(): try: - for d, s in zip(v.attrs[_DIMENSION_KEY], v.shape): + for d, s in zip(v.attrs[DIMENSION_KEY], v.shape): if d in dimensions and dimensions[d] != s: raise ValueError( "found conflicting lengths for dimension %s " @@ -310,7 +330,7 @@ def get_dimensions(self): raise KeyError( "Zarr object is missing the attribute `%s`, " "which is required for xarray to determine " - "variable dimensions." % (_DIMENSION_KEY) + "variable dimensions." % (DIMENSION_KEY) ) return dimensions @@ -328,7 +348,7 @@ def encode_variable(self, variable): return variable def encode_attribute(self, a): - return _encode_zarr_attr_value(a) + return encode_zarr_attr_value(a) def store( self, @@ -433,10 +453,10 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No writer.add(v.data, zarr_array, region=tuple(new_region)) else: # new variable - encoding = _extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) encoded_attrs = {} # the magic for storing the hidden dimension data - encoded_attrs[_DIMENSION_KEY] = dims + encoded_attrs[DIMENSION_KEY] = dims for k2, v2 in attrs.items(): encoded_attrs[k2] = self.encode_attribute(v2) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4585985bdc..82fe1b38149 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4498,3 +4498,50 @@ def test_invalid_netcdf_raises(engine): data = create_test_data() with raises_regex(ValueError, "unrecognized option 'invalid_netcdf'"): data.to_netcdf("foo.nc", engine=engine, invalid_netcdf=True) + + +@requires_zarr +def test_encode_zarr_attr_value(): + # array -> list + arr = np.array([1, 2, 3]) + expected = [1, 2, 3] + actual = backends.zarr.encode_zarr_attr_value(arr) + assert isinstance(actual, list) + assert actual == expected + + # scalar array -> scalar + sarr = np.array(1)[()] + expected = 1 + actual = backends.zarr.encode_zarr_attr_value(sarr) + assert isinstance(actual, int) + assert actual == expected + + # string -> string (no change) + expected = "foo" + actual = backends.zarr.encode_zarr_attr_value(expected) + assert isinstance(actual, str) + assert actual == expected + + +@requires_zarr +def test_extract_zarr_variable_encoding(): + + var = xr.Variable("x", [1, 2]) + actual = backends.zarr.extract_zarr_variable_encoding(var) + assert "chunks" in actual + assert actual["chunks"] is None + + var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)}) + actual = backends.zarr.extract_zarr_variable_encoding(var) + assert actual["chunks"] == (1,) + + # does not raise on invalid + var = xr.Variable("x", [1, 2], encoding={"foo": (1,)}) + actual = backends.zarr.extract_zarr_variable_encoding(var) + + # raises on invalid + var = xr.Variable("x", [1, 2], encoding={"foo": (1,)}) + with raises_regex(ValueError, "unexpected encoding parameters"): + actual = backends.zarr.extract_zarr_variable_encoding( + var, raise_on_invalid=True + ) From acf7d4157ca44f05c85a92d1b914b68738988773 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 27 Mar 2020 22:22:11 -0400 Subject: [PATCH 54/54] Limit repr of arrays containing long strings (#3900) * limit repr of arrays containing long strings * whatsnew --- doc/whats-new.rst | 3 +++ xarray/core/formatting.py | 34 ++++++++++++++++++++++++--------- xarray/tests/test_formatting.py | 25 +++++++++++++++++------- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 14941228c88..a138dee4128 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,9 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- Limited the length of array items with long string reprs to a + reasonable width (:pull:`3900`) + By `Maximilian Roos `_ Bug fixes diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 89246ff228d..534d253ecc8 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -4,6 +4,7 @@ import functools from datetime import datetime, timedelta from itertools import zip_longest +from typing import Hashable import numpy as np import pandas as pd @@ -14,7 +15,7 @@ from .pycompat import dask_array_type, sparse_array_type -def pretty_print(x, numchars): +def pretty_print(x, numchars: int): """Given an object `x`, call `str(x)` and format the returned string so that it is numchars long, padding with trailing spaces or truncating with ellipses as necessary @@ -163,7 +164,7 @@ def format_items(x): return formatted -def format_array_flat(array, max_width): +def format_array_flat(array, max_width: int): """Return a formatted string for as many items in the flattened version of array that will fit within max_width characters. """ @@ -198,11 +199,20 @@ def format_array_flat(array, max_width): num_back = count - num_front # note that num_back is 0 <--> array.size is 0 or 1 # <--> relevant_back_items is [] - pprint_str = ( - " ".join(relevant_front_items[:num_front]) - + padding - + " ".join(relevant_back_items[-num_back:]) + pprint_str = "".join( + [ + " ".join(relevant_front_items[:num_front]), + padding, + " ".join(relevant_back_items[-num_back:]), + ] ) + + # As a final check, if it's still too long even with the limit in values, + # replace the end with an ellipsis + # NB: this will still returns a full 3-character ellipsis when max_width < 3 + if len(pprint_str) > max_width: + pprint_str = pprint_str[: max(max_width - 3, 0)] + "..." + return pprint_str @@ -258,10 +268,16 @@ def inline_variable_array_repr(var, max_width): return "..." -def summarize_variable(name, var, col_width, marker=" ", max_width=None): +def summarize_variable( + name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None +): """Summarize a variable in one line, e.g., for the Dataset.__repr__.""" if max_width is None: - max_width = OPTIONS["display_width"] + max_width_options = OPTIONS["display_width"] + if not isinstance(max_width_options, int): + raise TypeError(f"`max_width` value of `{max_width}` is not a valid int") + else: + max_width = max_width_options first_col = pretty_print(f" {marker} {name} ", col_width) if var.dims: dims_str = "({}) ".format(", ".join(map(str, var.dims))) @@ -295,7 +311,7 @@ def summarize_datavar(name, var, col_width): return summarize_variable(name, var.variable, col_width) -def summarize_coord(name, var, col_width): +def summarize_coord(name: Hashable, var, col_width: int): is_index = name in var.dims marker = "*" if is_index else " " if is_index: diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 61ecf46b79b..6881c0bc0ff 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -115,7 +115,7 @@ def test_format_items(self): def test_format_array_flat(self): actual = formatting.format_array_flat(np.arange(100), 2) - expected = "0 ... 99" + expected = "..." assert expected == actual actual = formatting.format_array_flat(np.arange(100), 9) @@ -134,11 +134,13 @@ def test_format_array_flat(self): expected = "0 1 2 ... 98 99" assert expected == actual + # NB: Probably not ideal; an alternative would be cutting after the + # first ellipsis actual = formatting.format_array_flat(np.arange(100.0), 11) - expected = "0.0 ... 99.0" + expected = "0.0 ... ..." assert expected == actual - actual = formatting.format_array_flat(np.arange(100.0), 1) + actual = formatting.format_array_flat(np.arange(100.0), 12) expected = "0.0 ... 99.0" assert expected == actual @@ -154,16 +156,25 @@ def test_format_array_flat(self): expected = "" assert expected == actual - actual = formatting.format_array_flat(np.arange(1), 0) + actual = formatting.format_array_flat(np.arange(1), 1) expected = "0" assert expected == actual - actual = formatting.format_array_flat(np.arange(2), 0) + actual = formatting.format_array_flat(np.arange(2), 3) expected = "0 1" assert expected == actual - actual = formatting.format_array_flat(np.arange(4), 0) - expected = "0 ... 3" + actual = formatting.format_array_flat(np.arange(4), 7) + expected = "0 1 2 3" + assert expected == actual + + actual = formatting.format_array_flat(np.arange(5), 7) + expected = "0 ... 4" + assert expected == actual + + long_str = [" ".join(["hello world" for _ in range(100)])] + actual = formatting.format_array_flat(np.asarray([long_str]), 21) + expected = "'hello world hello..." assert expected == actual def test_pretty_print(self):