diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 37dbcd2ebb0..c712cf27979 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -27,7 +27,7 @@ assignees: '' #### Versions -
Output of `xr.show_versions()` +
Output of xr.show_versions() diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c30202ac046..a921bddaa23 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - - [ ] Fixes #xxxx + - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9df95648774..26bf4803ef6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,13 +5,14 @@ repos: rev: 4.3.21-2 hooks: - id: isort + files: .+\.py$ # https://github.com/python/black#version-control-integration - repo: https://github.com/python/black rev: stable hooks: - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.2.3 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.9 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8d43de7b1d5..ff85501c555 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,6 +20,8 @@ jobs: conda_env: py37 py38: conda_env: py38 + py38-all-but-dask: + conda_env: py38-all-but-dask py38-upstream-dev: conda_env: py38 upstream_dev: true @@ -32,16 +34,15 @@ jobs: steps: - template: ci/azure/unit-tests.yml -# excluded while waiting for https://github.com/conda-forge/libwebp-feedstock/issues/26 -# - job: MacOSX -# strategy: -# matrix: -# py38: -# conda_env: py38 -# pool: -# vmImage: 'macOS-10.15' -# steps: -# - template: ci/azure/unit-tests.yml +- job: MacOSX + strategy: + matrix: + py38: + conda_env: py38 + pool: + vmImage: 'macOS-10.15' + steps: + - template: ci/azure/unit-tests.yml - job: Windows strategy: diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml new file mode 100644 index 00000000000..a375d9e1e5a --- /dev/null +++ b/ci/requirements/py38-all-but-dask.yml @@ -0,0 +1,44 @@ +name: xarray-tests +channels: + - conda-forge +dependencies: + - python=3.8 + - black + - boto3 + - bottleneck + - cartopy + - cdms2 + - cfgrib + - cftime + - coveralls + - flake8 + - h5netcdf + - h5py + - hdf5 + - hypothesis + - isort + - lxml # Optional dep of pydap + - matplotlib + - mypy=0.761 # Must match .pre-commit-config.yaml + - nc-time-axis + - netcdf4 + - numba + - numpy + - pandas + - pint + - pip + - pseudonetcdf + - pydap + - pynio + - pytest + - pytest-cov + - pytest-env + - rasterio + - scipy + - seaborn + - setuptools + - sparse + - toolz + - zarr + - pip: + - numbagg diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cc9517a98ba..313428c29d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -18,6 +18,8 @@ Dataset.any Dataset.argmax Dataset.argmin + Dataset.idxmax + Dataset.idxmin Dataset.max Dataset.min Dataset.mean @@ -160,6 +162,8 @@ DataArray.any DataArray.argmax DataArray.argmin + DataArray.idxmax + DataArray.idxmin DataArray.max DataArray.min DataArray.mean diff --git a/doc/api.rst b/doc/api.rst index fe01f495e24..8ec6843d24a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -181,6 +181,8 @@ Computation :py:attr:`~Dataset.any` :py:attr:`~Dataset.argmax` :py:attr:`~Dataset.argmin` +:py:attr:`~Dataset.idxmax` +:py:attr:`~Dataset.idxmin` :py:attr:`~Dataset.max` :py:attr:`~Dataset.mean` :py:attr:`~Dataset.median` @@ -365,6 +367,8 @@ Computation :py:attr:`~DataArray.any` :py:attr:`~DataArray.argmax` :py:attr:`~DataArray.argmin` +:py:attr:`~DataArray.idxmax` +:py:attr:`~DataArray.idxmin` :py:attr:`~DataArray.max` :py:attr:`~DataArray.mean` :py:attr:`~DataArray.median` diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..51dba2bb0cc 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -261,13 +261,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -467,7 +467,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +477,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +508,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +532,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index f11edcdbcf0..f9d6769d856 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -311,24 +321,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/examples/monthly-means.ipynb b/doc/examples/monthly-means.ipynb index fad40e019de..bc88f4a9fc9 100644 --- a/doc/examples/monthly-means.ipynb +++ b/doc/examples/monthly-means.ipynb @@ -29,89 +29,9 @@ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", - "from netCDF4 import num2date\n", "import matplotlib.pyplot as plt " ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Some calendar information so we can support any netCDF calendar. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:35.991620Z", - "start_time": "2018-11-28T20:51:35.960336Z" - } - }, - "outputs": [], - "source": [ - "dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### A few calendar functions to determine the number of days in each month\n", - "If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.015151Z", - "start_time": "2018-11-28T20:51:35.994079Z" - } - }, - "outputs": [], - "source": [ - "def leap_year(year, calendar='standard'):\n", - " \"\"\"Determine if year is a leap year\"\"\"\n", - " leap = False\n", - " if ((calendar in ['standard', 'gregorian',\n", - " 'proleptic_gregorian', 'julian']) and\n", - " (year % 4 == 0)):\n", - " leap = True\n", - " if ((calendar == 'proleptic_gregorian') and\n", - " (year % 100 == 0) and\n", - " (year % 400 != 0)):\n", - " leap = False\n", - " elif ((calendar in ['standard', 'gregorian']) and\n", - " (year % 100 == 0) and (year % 400 != 0) and\n", - " (year < 1583)):\n", - " leap = False\n", - " return leap\n", - "\n", - "def get_dpm(time, calendar='standard'):\n", - " \"\"\"\n", - " return a array of days per month corresponding to the months provided in `months`\n", - " \"\"\"\n", - " month_length = np.zeros(len(time), dtype=np.int)\n", - " \n", - " cal_days = dpm[calendar]\n", - " \n", - " for i, (month, year) in enumerate(zip(time.month, time.year)):\n", - " month_length[i] = cal_days[month]\n", - " if leap_year(year, calendar=calendar) and month == 2:\n", - " month_length[i] += 1\n", - " return month_length" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -131,7 +51,7 @@ "outputs": [], "source": [ "ds = xr.tutorial.open_dataset('rasm').load()\n", - "print(ds)" + "ds" ] }, { @@ -143,7 +63,17 @@ "- calculate the month lengths for each monthly data record\n", "- calculate weights using `groupby('time.season')`\n", "\n", - "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. " + "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. Creating a `DataArray` for the month length is as easy as using the `days_in_month` accessor on the time coordinate. The calendar type, in this case `'noleap'`, is automatically considered in this operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "month_length = ds.time.dt.days_in_month\n", + "month_length" ] }, { @@ -157,13 +87,8 @@ }, "outputs": [], "source": [ - "# Make a DataArray with the number of days in each month, size = len(time)\n", - "month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n", - " coords=[ds.time], name='month_length')\n", - "\n", "# Calculate the weights by grouping by 'time.season'.\n", - "# Conversion to float type ('astype(float)') only necessary for Python 2.x\n", - "weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n", + "weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", "\n", "# Test that the sum of the weights for each season is 1.0\n", "np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", @@ -183,7 +108,7 @@ }, "outputs": [], "source": [ - "print(ds_weighted)" + "ds_weighted" ] }, { @@ -262,13 +187,9 @@ "source": [ "# Wrap it into a simple function\n", "def season_mean(ds, calendar='standard'):\n", - " # Make a DataArray of season/year groups\n", - " year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n", - " coords=[ds.time], name='year_season')\n", - "\n", " # Make a DataArray with the number of days in each month, size = len(time)\n", - " month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n", - " coords=[ds.time], name='month_length')\n", + " month_length = ds.time.dt.days_in_month\n", + "\n", " # Calculate the weights by grouping by 'time.season'\n", " weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", "\n", @@ -278,13 +199,6 @@ " # Calculate the weighted average\n", " return (ds * weights).groupby('time.season').sum(dim='time')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -304,7 +218,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.3" }, "toc": { "base_numbering": 1, @@ -321,5 +235,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 927e192eb6c..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,15 +151,15 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: results = [] for label, group in ds.groupby('letters'): - results.append(group - alt.sel(x=label)) + results.append(group - alt.sel(letters=label)) xr.concat(results, dim='x') Squeezing @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..c23aab8c5d7 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 63e9a7cd35e..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,9 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) - # extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) + # 1-dimensional extrapolation + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) + # multi-dimensional extrapolation + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) + + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -175,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -197,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -218,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -240,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -248,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -262,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -285,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 6064aa3568a..738d8d2b7ab 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -281,7 +286,7 @@ library:: # glob expands paths with * to a list of files, like the unix shell paths = sorted(glob(files)) datasets = [xr.open_dataset(p) for p in paths] - combined = xr.concat(dataset, dim) + combined = xr.concat(datasets, dim) return combined combined = read_netcdfs('/all/my/files/*.nc', dim='time') @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -799,7 +809,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -831,17 +841,21 @@ xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` meth To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +868,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +900,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +934,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,11 +982,12 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- @@ -975,7 +999,7 @@ to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing ecCodes via conda:: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..fb30417e2c6 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -219,7 +220,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +241,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +254,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +262,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +285,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +351,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +366,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +384,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +476,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +523,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +546,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +560,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +571,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +605,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +643,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +652,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +670,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,11 +702,15 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created @@ -703,13 +718,16 @@ by faceting are accessible in the object returned by ``plot``: .. ipython:: python - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +750,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +785,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +819,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +838,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +870,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/roadmap.rst b/doc/roadmap.rst index e336b35b2bc..401dac779ad 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -219,6 +219,11 @@ Current core developers - Fabien Maussion - Keisuke Fujii - Maximilian Roos +- Deepak Cherian +- Spencer Clark +- Tom Nicholas +- Guido Imperiale +- Justus Magin NumFOCUS ~~~~~~~~ diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 9e7c0f1d51d..1eb63d24630 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,8 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +83,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,72 +93,74 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", - "season", "dayofyear", and "dayofweek"): + "season", "dayofyear", "dayofweek", and "days_in_month"): .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -167,13 +172,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a138dee4128..051a41a57e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -35,25 +36,99 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- 'missing_dims' argument to :py:meth:`Dataset.isel`, + `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing + the exception when a dimension passed to ``isel`` is not present with a + warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) + By `John Omotani `_ - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ +- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) + By `Todd Jennings `_ +- More support for unit aware arrays with pint (:pull:`3643`) + By `Justus Magin `_. +- Allow plotting of boolean arrays. (:pull:`3766`) + By `Marek Jacob `_ +- A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to + the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which + returns the days in the month each datetime in the index. Now days in month + weights for both standard and non-standard calendars can be obtained using + the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This + feature requires cftime version 1.1.0 or greater. By + `Spencer Clark `_. Bug fixes ~~~~~~~~~ - +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) + By `Huite Bootsma `_. +- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) + By `Keisuke Fujii `_. +- Fix renaming of coords when one or more stacked coords is not in + sorted order during stack+groupby+apply operations. (:issue:`3287`, + :pull:`3906`) By `Spencer Hill `_ +- Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` + can affect the original :py:class:`DataArray`. (:issue:`3899`, :pull:`3871`) + By `Todd Jennings `_ +- Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). + By `Deepak Cherian `_ +- Use divergent colormap if ``levels`` spans 0. (:issue:`3524`) + By `Deepak Cherian `_ +- Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) + By `Deepak Cherian `_ +- Fix bug where plotting line plots with 2D coordinates depended on dimension + order. (:issue:`3933`) + By `Tom Nicholas `_. +- Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) + By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. +- Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, + and added `keep_attrs` argument. (:issue:`3968`) + By `Tom Nicholas `_. +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. Documentation ~~~~~~~~~~~~~ - +- update the docstring of :py:meth:`DataArray.assign_coords` : clarify how to + add a new coordinate to an existing dimension and illustrative example + (:issue:`3952`, :pull:`3958`) By + `Etienne Combrisson `_. +- update the docstring of :py:meth:`Dataset.diff` and + :py:meth:`DataArray.diff` so it does document the ``dim`` + parameter as required. (:issue:`1040`, :pull:`3909`) + By `Justus Magin `_. +- Updated :doc:`Calculating Seasonal Averages from Timeseries of Monthly Means + ` example notebook to take advantage of the new + ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex` + (:pull:`3935`). By `Spencer Clark `_. +- Updated the list of current core developers. (:issue:`3892`) + By `Tom Nicholas `_. +- Add example for multi-dimensional extrapolation and note different behavior + of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` + for 1-d and n-d interpolation (:pull:`3956`). + By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ -- Use ``Fixes`` rather than ``Closes`` in GitHub Pull Request template, allowing - linking to issues. - By `Maximilian Roos `_ - +- Raise more informative error messages for chunk size conflicts when writing to zarr files. + By `Deepak Cherian `_. +- Run the ``isort`` pre-commit hook only on python source files + and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) + By `Justus Magin `_. +- Add a CI job that runs the tests with every optional dependency + except ``dask``. (:issue:`3794`, :pull:`3919`) + By `Justus Magin `_. +- Use ``async`` / ``await`` for the asynchronous distributed + tests. (:issue:`3987`, :pull:`3989`) + By `Justus Magin `_. .. _whats-new.0.15.1: @@ -86,13 +161,13 @@ New Features - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. - By `Spencer Clark `_ + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. - Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. -- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments often means a user is attempting to pass multiple dimensions as separate arguments and should instead pass a single list of dimensions. @@ -110,7 +185,7 @@ New Features By `Maximilian Roos `_. - ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` - (:issue:`3843`, :pull:`3844`) + (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. Bug fixes @@ -140,13 +215,13 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. -- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` +- ``coarsen`` and ``rolling`` now respect ``xr.set_options(keep_attrs=True)`` to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - Delete associated indexes when deleting coordinate variables. (:issue:`3746`). By `Deepak Cherian `_. -- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` +- Fix :py:meth:`Dataset.to_zarr` when using ``append_dim`` and ``group`` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). By `Maximilian Roos `_. @@ -184,7 +259,7 @@ Internal Changes By `Maximilian Roos `_ - Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to ``pandas.Panel``, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ @@ -804,7 +879,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ +- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. @@ -1896,8 +1971,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -1988,8 +2063,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2143,7 +2218,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2153,7 +2228,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2223,9 +2298,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2746,7 +2821,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3203,10 +3278,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3345,7 +3420,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3357,7 +3432,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3380,13 +3455,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3395,13 +3468,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3410,7 +3483,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3438,9 +3511,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3455,8 +3528,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3526,9 +3599,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3608,10 +3681,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3628,7 +3702,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3638,7 +3712,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3652,11 +3726,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3666,9 +3740,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3726,9 +3800,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3801,9 +3875,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3815,13 +3889,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3834,17 +3908,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3857,9 +3934,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3872,7 +3949,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3881,7 +3958,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3907,7 +3984,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -3945,42 +4022,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -3988,9 +4065,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4036,8 +4113,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4045,14 +4122,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4075,8 +4152,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4087,9 +4164,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4099,8 +4177,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4123,8 +4201,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4143,15 +4221,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4219,7 +4297,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4235,8 +4314,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4246,7 +4325,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index d041e430db9..77beffd09b1 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -224,7 +224,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, - dst_nodata=vrt.dst_nodata, + nodata=vrt.nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index cdc74e06882..973c167911e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -56,7 +56,7 @@ def __getitem__(self, key): return array[key.tuple] elif isinstance(key, indexing.VectorizedIndexer): return array.vindex[ - indexing._arrayize_vectorized_indexer(key.tuple, self.shape).tuple + indexing._arrayize_vectorized_indexer(key, self.shape).tuple ] else: assert isinstance(key, indexing.OuterIndexer) @@ -65,7 +65,7 @@ def __getitem__(self, key): # could possibly have a work-around for 0d data here -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): """ Given encoding chunks (possibly None) and variable chunks (possibly None) """ @@ -88,15 +88,16 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk." - " Variable dask chunks %r are incompatible. Consider " - "rechunking using `chunk()`." % (var_chunks,) + "Zarr requires uniform chunk sizes except for final chunk. " + f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " + "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( "Final chunk of Zarr array must be the same size or smaller " - "than the first. Variable Dask chunks %r are incompatible. " - "Consider rechunking using `chunk()`." % var_chunks + f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -114,13 +115,15 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if len(enc_chunks_tuple) != ndim: # throw away encoding chunks, start over - return _determine_zarr_chunks(None, var_chunks, ndim) + return _determine_zarr_chunks(None, var_chunks, ndim, name) for x in enc_chunks_tuple: if not isinstance(x, int): raise TypeError( - "zarr chunks must be an int or a tuple of ints. " - "Instead found %r" % (enc_chunks_tuple,) + "zarr chunk sizes specified in `encoding['chunks']` " + "must be an int or a tuple of ints. " + f"Instead found encoding['chunks']={enc_chunks_tuple!r} " + f"for variable named {name!r}." ) # if there are chunks in encoding and the variable data is a numpy array, @@ -142,19 +145,22 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( - "Specified zarr chunks %r would overlap multiple dask " - "chunks %r. This is not implemented in xarray yet. " - " Consider rechunking the data using " - "`chunk()` or specifying different chunks in encoding." - % (enc_chunks_tuple, var_chunks) + f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for " + f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. " + "This is not implemented in xarray yet. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) if dchunks[-1] > zchunk: raise ValueError( "Final chunk of Zarr array must be the same size or " - "smaller than the first. The specified Zarr chunk " - "encoding is %r, but %r in variable Dask chunks %r is " - "incompatible. Consider rechunking using `chunk()`." - % (enc_chunks_tuple, dchunks, var_chunks) + "smaller than the first. " + f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, " + f"for variable named {name!r} " + f"but {dchunks} in the variable's Dask chunks {var_chunks} is " + "incompatible with this encoding. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) return enc_chunks_tuple @@ -177,7 +183,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False, name=None): """ Extract zarr encoding dictionary from xarray Variable @@ -207,7 +213,7 @@ def extract_zarr_variable_encoding(variable, raise_on_invalid=False): del encoding[k] chunks = _determine_zarr_chunks( - encoding.get("chunks"), variable.chunks, variable.ndim + encoding.get("chunks"), variable.chunks, variable.ndim, name ) encoding["chunks"] = chunks return encoding @@ -453,7 +459,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No writer.add(v.data, zarr_array, region=tuple(new_region)) else: # new variable - encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding( + v, raise_on_invalid=check, name=vn + ) encoded_attrs = {} # the magic for storing the hidden dimension data encoded_attrs[DIMENSION_KEY] = dims @@ -586,6 +594,12 @@ def open_zarr( "Instead found %s. " % chunks ) + if chunks == "auto": + try: + import dask.array # noqa + except ImportError: + chunks = None + if not decode_cf: mask_and_scale = False decode_times = False diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 2e42702caac..6fc28d213dd 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -243,6 +243,9 @@ class CFTimeIndex(pd.Index): "dayofyr", "The ordinal day of year of the datetime", "1.0.2.1" ) dayofweek = _field_accessor("dayofwk", "The day of week of the datetime", "1.0.2.1") + days_in_month = _field_accessor( + "daysinmonth", "The number of days in the month of the datetime", "1.1.0.0" + ) date_type = property(get_date_type) def __new__(cls, data, name=None): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..d923f1ad088 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -155,7 +155,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar ) diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/common.py b/xarray/core/common.py index a003642076f..e343f342040 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -399,10 +399,14 @@ def assign_coords(self, coords=None, **coords_kwargs): Parameters ---------- coords : dict, optional - A dict with keys which are variables names. If the values are - callable, they are computed on this object and assigned to new - coordinate variables. If the values are not callable, - (e.g. a ``DataArray``, scalar, or array), they are simply assigned. + A dict where the keys are the names of the coordinates + with the new values to assign. If the values are callable, they are + computed on this object and assigned to new coordinate variables. + If the values are not callable, (e.g. a ``DataArray``, scalar, or + array), they are simply assigned. A new coordinate can also be + defined and attached to an existing dimension using a tuple with + the first element the dimension name and the second element the + values for this new coordinate. **coords_kwargs : keyword, value pairs, optional The keyword arguments form of ``coords``. @@ -440,10 +444,24 @@ def assign_coords(self, coords=None, **coords_kwargs): Coordinates: * lon (lon) int64 -2 -1 0 1 + New coordinate can also be attached to an existing dimension: + + >>> lon_2 = np.array([300, 289, 0, 1]) + >>> da.assign_coords(lon_2=("lon", lon_2)) + + array([0.28298 , 0.667347, 0.657938, 0.177683]) + Coordinates: + * lon (lon) int64 358 359 0 1 + lon_2 (lon) int64 300 289 0 1 + + Note that the same result can also be obtained with a dict e.g. + + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) + Notes ----- - Since ``coords_kwargs`` is a dictionary, the order of your arguments may - not be preserved, and so the order of the new variables is not well + Since ``coords_kwargs`` is a dictionary, the order of your arguments + may not be preserved, and so the order of the new variables is not well defined. Assigning multiple variables within the same ``assign_coords`` is possible, but you cannot reference other variables created within the same ``assign_coords`` call. @@ -1379,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 13bf6248331..a3723ea9db9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -23,9 +23,10 @@ import numpy as np -from . import duck_array_ops, utils +from . import dtypes, duck_array_ops, utils from .alignment import deep_align from .merge import merge_coordinates_without_align +from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1191,10 +1192,10 @@ def dot(*arrays, dims=None, **kwargs): # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") @@ -1338,3 +1339,66 @@ def polyval(coord, coeffs, degree_dim="degree"): coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, ) return (lhs * coeffs).sum(degree_dim) + + +def _calc_idxminmax( + *, + array, + func: Callable, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, +): + """Apply common operations for idxmin and idxmax.""" + # This function doesn't make sense for scalars so don't try + if not array.ndim: + raise ValueError("This function does not apply for scalars") + + if dim is not None: + pass # Use the dim if available + elif array.ndim == 1: + # it is okay to guess the dim if there is only 1 + dim = array.dims[0] + else: + # The dim is not specified and ambiguous. Don't guess. + raise ValueError("Must supply 'dim' argument for multidimensional arrays") + + if dim not in array.dims: + raise KeyError(f'Dimension "{dim}" not in dimension') + if dim not in array.coords: + raise KeyError(f'Dimension "{dim}" does not have coordinates') + + # These are dtypes with NaN values argmin and argmax can handle + na_dtypes = "cfO" + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Need to skip NaN values since argmin and argmax can't handle them + allna = array.isnull().all(dim) + array = array.where(~allna, 0) + + # This will run argmin or argmax. + indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) + + # Get the coordinate we want. + coordarray = array[dim] + + # Handle dask arrays. + if isinstance(array, dask_array_type): + res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + else: + res = coordarray[ + indx, + ] + + if skipna or (skipna is None and array.dtype.kind in na_dtypes): + # Put the NaN values back in after removing them + res = res.where(~allna, fill_value) + + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] + + # Copy attributes from argmin/argmax, if any + res.attrs = indx.attrs + + return res diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bc6c730c879..45eee2d89bf 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -930,7 +930,10 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """ variable = self.variable.copy(deep=deep, data=data) coords = {k: v.copy(deep=deep) for k, v in self._coords.items()} - indexes = self._indexes + if self._indexes is None: + indexes = self._indexes + else: + indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": @@ -1004,25 +1007,51 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by integer indexing along the specified dimension(s). + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by integers, slice objects or arrays. + indexer can be a integer, slice, array-like or DataArray. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + drop : bool, optional + If ``drop=True``, drop coordinates variables indexed by integers + instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + **indexers_kwargs : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + See Also -------- Dataset.isel DataArray.sel """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): - ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + ds = self._to_temp_dataset()._isel_fancy( + indexers, drop=drop, missing_dims=missing_dims + ) return self._from_temp_dataset(ds) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - variable = self._variable.isel(indexers) + variable = self._variable.isel(indexers, missing_dims=missing_dims) coords = {} for coord_name, coord_value in self._coords.items(): @@ -1337,7 +1366,9 @@ def interp( first. If True, x has to be an array of monotonically increasing values. kwargs: dictionary - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. ``**coords_kwargs`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -2069,6 +2100,7 @@ def interpolate_na( max_gap: Union[ int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta ] = None, + keep_attrs: bool = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. @@ -2123,6 +2155,10 @@ def interpolate_na( * x (x) int64 0 1 2 3 4 5 6 7 8 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + keep_attrs : bool, default True + If True, the dataarray's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new + object will be returned without attributes. kwargs : dict, optional parameters passed verbatim to the underlying interpolation function @@ -2145,6 +2181,7 @@ def interpolate_na( limit=limit, use_coordinate=use_coordinate, max_gap=max_gap, + keep_attrs=keep_attrs, **kwargs, ) @@ -2696,7 +2733,7 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr Parameters ---------- - dim : hashable, optional + dim : hashable Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. @@ -3462,17 +3499,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3509,6 +3547,200 @@ def pad( ) return self._from_temp_dataset(ds) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmin`, this returns the + coordinate label while :py:meth:`~DataArray.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmin, DataArray.idxmax, DataArray.min, DataArray.argmin + + Examples + -------- + + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array.min() + + array(-2) + >>> array.argmin() + + array(4) + >>> array.idxmin() + + array('e', dtype='>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) + >>> array.min(dim="x") + + array([-2., -4., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmin(dim="x") + + array([4, 0, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmin(dim="x") + + array([16., 0., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmin(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + + def idxmax( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "DataArray": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `DataArray` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~DataArray.argmax`, this returns the + coordinate label while :py:meth:`~DataArray.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + arrays, but required for arrays with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : DataArray + New `DataArray` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + Dataset.idxmax, DataArray.idxmin, DataArray.max, DataArray.argmax + + Examples + -------- + + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array.max() + + array(2) + >>> array.argmax() + + array(1) + >>> array.idxmax() + + array('b', dtype='>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) + >>> array.max(dim="x") + + array([2., 2., 1.]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.argmax(dim="x") + + array([0, 2, 2]) + Coordinates: + * y (y) int64 -1 0 1 + >>> array.idxmax(dim="x") + + array([0., 4., 4.]) + Coordinates: + * y (y) int64 -1 0 1 + """ + return computation._calc_idxminmax( + array=self, + func=lambda x, *args, **kwargs: x.argmax(*args, **kwargs), + dim=dim, + skipna=skipna, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c1b02aa7969..2ff585acb7f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6,6 +6,7 @@ from collections import defaultdict from html import escape from numbers import Number +from operator import methodcaller from pathlib import Path from typing import ( TYPE_CHECKING, @@ -86,6 +87,7 @@ _check_inplace, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, hashable, infix_dims, @@ -1053,9 +1055,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -1766,7 +1766,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any] + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -1776,9 +1776,7 @@ def _validate_indexers( """ from .dataarray import DataArray - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) # all indexers should be int, slice, np.ndarrays, or Variable for k, v in indexers.items(): @@ -1874,6 +1872,7 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "Dataset": """Returns a new dataset with each array indexed along the specified @@ -1895,6 +1894,12 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + Dataset: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1917,13 +1922,11 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): - return self._isel_fancy(indexers, drop=drop) + return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) variables = {} dims: Dict[Hashable, Tuple[int, ...]] = {} @@ -1957,10 +1960,16 @@ def isel( file_obj=self._file_obj, ) - def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": + def _isel_fancy( + self, + indexers: Mapping[Hashable, Any], + *, + drop: bool, + missing_dims: str = "raise", + ) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below - indexers_list = list(self._validate_indexers(indexers)) + indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} indexes: Dict[Hashable, pd.Index] = {} @@ -2559,7 +2568,9 @@ def interp( coordinates are assumed to be an array of monotonically increasing values. kwargs: dictionary, optional - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -4585,6 +4596,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, @@ -4593,8 +4605,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas if not dataframe.columns.is_unique: raise ValueError("cannot convert DataFrame with non-unique columns") - idx = remove_unused_levels_categories(dataframe.index) - dataframe = dataframe.set_index(idx) + idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe) obj = cls() if isinstance(idx, pd.MultiIndex): @@ -4901,7 +4912,7 @@ def diff(self, dim, n=1, label="upper"): Parameters ---------- - dim : str, optional + dim : str Dimension over which to calculate the finite difference. n : int, optional The number of times values are differenced. @@ -6052,8 +6063,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6097,5 +6108,201 @@ def pad( return self._replace_vars_and_dims(variables) + def idxmin( + self, + dim: Hashable = None, + skipna: bool = None, + fill_value: Any = dtypes.NA, + keep_attrs: bool = None, + ) -> "Dataset": + """Return the coordinate label of the minimum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to minimum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmin`, this returns the + coordinate label while :py:meth:`~Dataset.argmin` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmin`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmin` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmin, Dataset.idxmax, Dataset.min, Dataset.argmin + + Examples + -------- + + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 -2 + float (y) float64 -2.0 -4.0 1.0 + >>> ds.argmin(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 4 + float (y) int64 4 0 2 + >>> ds.idxmin(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int "Dataset": + """Return the coordinate label of the maximum value along a dimension. + + Returns a new `Dataset` named after the dimension with the values of + the coordinate labels along that dimension corresponding to maximum + values along that dimension. + + In comparison to :py:meth:`~Dataset.argmax`, this returns the + coordinate label while :py:meth:`~Dataset.argmax` returns the index. + + Parameters + ---------- + dim : str, optional + Dimension over which to apply `idxmax`. This is optional for 1D + variables, but required for variables with 2 or more dimensions. + skipna : bool or None, default None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for ``float``, ``complex``, and ``object`` + dtypes; other dtypes either do not have a sentinel missing value + (``int``) or ``skipna=True`` has not been implemented + (``datetime64`` or ``timedelta64``). + fill_value : Any, default NaN + Value to be filled in case all of the values along a dimension are + null. By default this is NaN. The fill value and result are + automatically converted to a compatible dtype if possible. + Ignored if ``skipna`` is False. + keep_attrs : bool, default False + If True, the attributes (``attrs``) will be copied from the + original object to the new one. If False (default), the new object + will be returned without attributes. + + Returns + ------- + reduced : Dataset + New `Dataset` object with `idxmax` applied to its data and the + indicated dimension removed. + + See also + -------- + DataArray.idxmax, Dataset.idxmin, Dataset.max, Dataset.argmax + + Examples + -------- + + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 2 + float (y) float64 2.0 2.0 1.0 + >>> ds.argmax(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int int64 1 + float (y) int64 0 2 2 + >>> ds.idxmax(dim="x") + + Dimensions: (y: 3) + Coordinates: + * y (y) int64 -1 0 1 + Data variables: + int Hashable: return new_dim +def drop_dims_from_indexers( + indexers: Mapping[Hashable, Any], + dims: Union[list, Mapping[Hashable, int]], + missing_dims: str, +) -> Mapping[Hashable, Any]: + """ Depending on the setting of missing_dims, drop any dimensions from indexers that + are not present in dims. + + Parameters + ---------- + indexers : dict + dims : sequence + missing_dims : {"raise", "warn", "ignore"} + """ + + if missing_dims == "raise": + invalid = indexers.keys() - set(dims) + if invalid: + raise ValueError( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return indexers + + elif missing_dims == "warn": + + # don't modify input + indexers = dict(indexers) + + invalid = indexers.keys() - set(dims) + if invalid: + warnings.warn( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + for key in invalid: + indexers.pop(key) + + return indexers + + elif missing_dims == "ignore": + return {key: val for key, val in indexers.items() if key in dims} + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token = 0 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c9addeefb04..68e823ca426 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,6 +28,7 @@ OrderedSet, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, @@ -1030,6 +1031,7 @@ def _to_dense(self): def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> VariableType: """Return a new array indexed along the specified dimension(s). @@ -1039,6 +1041,12 @@ def isel( **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions Returns ------- @@ -1050,11 +1058,7 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - invalid = indexers.keys() - set(self.dims) - if invalid: - raise ValueError( - f"dimensions {invalid} do not exist. Expected one or more of {self.dims}" - ) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) return self[key] diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 4f3268c1203..819eded694e 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -273,7 +273,9 @@ def map_dataarray(self, func, x, y, **kwargs): # None is the sentinel value if d is not None: subset = self.data.loc[d] - mappable = func(subset, x=x, y=y, ax=ax, **func_kwargs) + mappable = func( + subset, x=x, y=y, ax=ax, **func_kwargs, _is_facetgrid=True + ) self._mappables.append(mappable) self._finalize_grid(x, y) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 302cac05b05..4d6033bf00d 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -30,7 +30,7 @@ def _infer_line_data(darray, x, y, hue): error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) + ", ".join(repr(dd) for dd in darray.dims) ) ndims = len(darray.dims) @@ -93,6 +93,7 @@ def _infer_line_data(darray, x, y, hue): otherindex = 1 if darray.dims.index(huename) == 0 else 0 otherdim = darray.dims[otherindex] xplt = darray.transpose(otherdim, huename, transpose_coords=False) + yplt = yplt.transpose(otherdim, huename, transpose_coords=False) else: raise ValueError( "For 2D inputs, hue must be a dimension" @@ -689,10 +690,13 @@ def newplotfunc( xplt, xlab_extra = _resolve_intervals_2dplot(xval, plotfunc.__name__) yplt, ylab_extra = _resolve_intervals_2dplot(yval, plotfunc.__name__) - _ensure_plottable(xplt, yplt) + _ensure_plottable(xplt, yplt, zval) cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( - plotfunc, zval.data, **locals() + plotfunc, + zval.data, + **locals(), + _is_facetgrid=kwargs.pop("_is_facetgrid", False), ) if "contour" in plotfunc.__name__: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index e6c15037cb8..c3512828888 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -153,6 +153,7 @@ def _determine_cmap_params( levels=None, filled=True, norm=None, + _is_facetgrid=False, ): """ Use some heuristics to set good defaults for colorbar and range. @@ -169,6 +170,9 @@ def _determine_cmap_params( """ import matplotlib as mpl + if isinstance(levels, Iterable): + levels = sorted(levels) + calc_data = np.ravel(plot_data[np.isfinite(plot_data)]) # Handle all-NaN input data gracefully @@ -216,8 +220,13 @@ def _determine_cmap_params( vlim = abs(vmax - center) if possibly_divergent: + levels_are_divergent = ( + isinstance(levels, Iterable) and levels[0] * levels[-1] < 0 + ) # kwargs not specific about divergent or not: infer defaults from data - divergent = ((vmin < 0) and (vmax > 0)) or not center_is_none + divergent = ( + ((vmin < 0) and (vmax > 0)) or not center_is_none or levels_are_divergent + ) else: divergent = False @@ -237,18 +246,14 @@ def _determine_cmap_params( norm.vmin = vmin else: if not vmin_was_none and vmin != norm.vmin: - raise ValueError( - "Cannot supply vmin and a norm" + " with a different vmin." - ) + raise ValueError("Cannot supply vmin and a norm with a different vmin.") vmin = norm.vmin if norm.vmax is None: norm.vmax = vmax else: if not vmax_was_none and vmax != norm.vmax: - raise ValueError( - "Cannot supply vmax and a norm" + " with a different vmax." - ) + raise ValueError("Cannot supply vmax and a norm with a different vmax.") vmax = norm.vmax # if BoundaryNorm, then set levels @@ -275,6 +280,10 @@ def _determine_cmap_params( levels = ticker.tick_values(vmin, vmax) vmin, vmax = levels[0], levels[-1] + # GH3734 + if vmin == vmax: + vmin, vmax = mpl.ticker.LinearLocator(2).tick_values(vmin, vmax) + if extend is None: extend = _determine_extend(calc_data, vmin, vmax) @@ -534,7 +543,7 @@ def _ensure_plottable(*args): Raise exception if there is anything in args that can't be plotted on an axis by matplotlib. """ - numpy_types = [np.floating, np.integer, np.timedelta64, np.datetime64] + numpy_types = [np.floating, np.integer, np.timedelta64, np.datetime64, np.bool_] other_types = [datetime] try: import cftime @@ -549,10 +558,10 @@ def _ensure_plottable(*args): or _valid_other_type(np.array(x), other_types) ): raise TypeError( - "Plotting requires coordinates to be numeric " - "or dates of type np.datetime64, " + "Plotting requires coordinates to be numeric, boolean, " + "or dates of type numpy.datetime64, " "datetime.datetime, cftime.datetime or " - "pd.Interval." + f"pandas.Interval. Received data of type {np.array(x).dtype} instead." ) if ( _valid_other_type(np.array(x), cftime_datetime) @@ -573,14 +582,15 @@ def _is_numeric(arr): def _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params): - plt = import_matplotlib_pyplot() + cbar_kwargs.setdefault("extend", cmap_params["extend"]) if cbar_ax is None: cbar_kwargs.setdefault("ax", ax) else: cbar_kwargs.setdefault("cax", cbar_ax) - cbar = plt.colorbar(primitive, **cbar_kwargs) + fig = ax.get_figure() + cbar = fig.colorbar(primitive, **cbar_kwargs) return cbar @@ -727,6 +737,7 @@ def _process_cmap_cbar_kwargs( colors=None, cbar_kwargs: Union[Iterable[Tuple[str, Any]], Mapping[str, Any]] = None, levels=None, + _is_facetgrid=False, **kwargs, ): """ @@ -773,6 +784,12 @@ def _process_cmap_cbar_kwargs( cmap_args = getfullargspec(_determine_cmap_params).args cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) - cmap_params = _determine_cmap_params(**cmap_kwargs) + if not _is_facetgrid: + cmap_params = _determine_cmap_params(**cmap_kwargs) + else: + cmap_params = { + k: cmap_kwargs[k] + for k in ["vmin", "vmax", "cmap", "extend", "levels", "norm"] + } return cmap_params, cbar_kwargs diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index df86b5715e9..40c5cfa267c 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -62,6 +62,7 @@ def LooseVersion(vstring): has_pynio, requires_pynio = _importorskip("Nio") has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") +has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 82fe1b38149..916c29ba7bd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -581,6 +581,10 @@ def test_orthogonal_indexing(self): actual = on_disk.isel(**indexers) assert_identical(expected, actual) + @pytest.mark.xfail( + not has_dask, + reason="the code for indexing without dask handles negative steps in slices incorrectly", + ) def test_vectorized_indexing(self): in_memory = create_test_data() with self.roundtrip(in_memory) as on_disk: @@ -1539,6 +1543,7 @@ def test_roundtrip_consolidated(self): self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) + @requires_dask def test_auto_chunk(self): original = create_test_data().chunk() @@ -1556,6 +1561,7 @@ def test_auto_chunk(self): # chunk size should be the same as original assert v.chunks == original[k].chunks + @requires_dask @pytest.mark.filterwarnings("ignore:Specified Dask chunks") def test_manual_chunk(self): original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) @@ -1598,6 +1604,7 @@ def test_manual_chunk(self): assert_identical(actual, auto) assert_identical(actual.load(), auto.load()) + @requires_dask def test_warning_on_bad_chunks(self): original = create_test_data().chunk({"dim1": 4, "dim2": 3, "dim3": 5}) @@ -1620,6 +1627,7 @@ def test_warning_on_bad_chunks(self): assert v._in_memory == (k in actual.dims) assert len(record) == 0 + @requires_dask def test_deprecate_auto_chunk(self): original = create_test_data().chunk() with pytest.warns(FutureWarning): @@ -1638,6 +1646,7 @@ def test_deprecate_auto_chunk(self): # there should be no chunks assert v.chunks is None + @requires_dask def test_write_uneven_dask_chunks(self): # regression for GH#2225 original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) @@ -1662,6 +1671,7 @@ def test_chunk_encoding(self): with self.roundtrip(data) as actual: pass + @requires_dask def test_chunk_encoding_with_dask(self): # These datasets DO have dask chunks. Need to check for various # interactions between dask and zarr chunks @@ -1675,11 +1685,27 @@ def test_chunk_encoding_with_dask(self): # should fail if dask_chunks are irregular... ds_chunk_irreg = ds.chunk({"x": (5, 4, 3)}) - with pytest.raises(ValueError) as e_info: + with raises_regex(ValueError, "uniform chunk sizes."): with self.roundtrip(ds_chunk_irreg) as actual: pass - # make sure this error message is correct and not some other error - assert e_info.match("chunks") + + # should fail if encoding["chunks"] clashes with dask_chunks + badenc = ds.chunk({"x": 4}) + badenc.var1.encoding["chunks"] = (6,) + with raises_regex(NotImplementedError, "named 'var1' would overlap"): + with self.roundtrip(badenc) as actual: + pass + + badenc.var1.encoding["chunks"] = (2,) + with raises_regex(ValueError, "Specified Zarr chunk encoding"): + with self.roundtrip(badenc) as actual: + pass + + badenc = badenc.chunk({"x": (3, 3, 6)}) + badenc.var1.encoding["chunks"] = (3,) + with raises_regex(ValueError, "incompatible with this encoding"): + with self.roundtrip(badenc) as actual: + pass # ... except if the last chunk is smaller than the first ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)}) @@ -1896,6 +1922,7 @@ def test_append_with_new_variable(self): combined["new_var"] = ds_with_new_var["new_var"] assert_identical(combined, xr.open_zarr(store_target)) + @requires_dask def test_to_zarr_compute_false_roundtrip(self): from dask.delayed import Delayed @@ -1915,6 +1942,7 @@ def test_to_zarr_compute_false_roundtrip(self): with self.open(store) as actual: assert_identical(original, actual) + @requires_dask def test_to_zarr_append_compute_false_roundtrip(self): from dask.delayed import Delayed @@ -1951,6 +1979,7 @@ def test_to_zarr_append_compute_false_roundtrip(self): with self.open(store) as actual: assert_identical(xr.concat([ds, ds_to_append], dim="time"), actual) + @requires_dask def test_encoding_chunksizes(self): # regression test for GH2278 # see also test_encoding_chunksizes_unlimited @@ -3513,6 +3542,7 @@ def test_uamiv_format_read(self): assert_allclose(expected, actual) camxfile.close() + @requires_dask def test_uamiv_format_mfread(self): """ Open a CAMx file and test data variables @@ -3939,6 +3969,9 @@ def test_chunks(self): ex = expected.sel(band=1).mean(dim="x") assert_allclose(ac, ex) + @pytest.mark.xfail( + not has_dask, reason="without dask, a non-serializable lock is used" + ) def test_pickle_rasterio(self): # regression test for https://github.com/pydata/xarray/issues/2121 with create_tmp_geotiff() as (tmp_file, expected): @@ -4012,6 +4045,7 @@ def test_geotiff_tags(self): with xr.open_rasterio(tmp_file) as rioda: assert isinstance(rioda.attrs["AREA_OR_POINT"], str) + @requires_dask def test_no_mftime(self): # rasterio can accept "filename" urguments that are actually urls, # including paths to remote files. @@ -4334,6 +4368,12 @@ def test_source_encoding_always_present(): assert ds.encoding["source"] == tmp +def _assert_no_dates_out_of_range_warning(record): + undesired_message = "dates out of range" + for warning in record: + assert undesired_message not in str(warning.message) + + @requires_scipy_or_netCDF4 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_use_cftime_standard_calendar_default_in_range(calendar): @@ -4360,7 +4400,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar): with open_dataset(tmp_file) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_cftime @@ -4423,7 +4463,7 @@ def test_use_cftime_true(calendar, units_year): with open_dataset(tmp_file, use_cftime=True) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_scipy_or_netCDF4 @@ -4452,7 +4492,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar): with open_dataset(tmp_file, use_cftime=False) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) - assert not record + _assert_no_dates_out_of_range_warning(record) @requires_scipy_or_netCDF4 diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 2352f9e8cdd..343e059f53c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1176,7 +1176,6 @@ def test_dayofweek_after_cftime_range(freq): np.testing.assert_array_equal(result, expected) -@pytest.mark.xfail(reason="See GH3885") @pytest.mark.parametrize("freq", ["A", "M", "D"]) def test_dayofyear_after_cftime_range(freq): pytest.importorskip("cftime", minversion="1.0.2.1") diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index d31bf9471ea..b30e32c92ad 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -15,7 +15,7 @@ ) from xarray.tests import assert_array_equal, assert_identical -from . import raises_regex, requires_cftime +from . import raises_regex, requires_cftime, requires_cftime_1_1_0 from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, @@ -229,6 +229,13 @@ def test_cftimeindex_dayofweek_accessor(index): assert_array_equal(result, expected) +@requires_cftime_1_1_0 +def test_cftimeindex_days_in_month_accessor(index): + result = index.days_in_month + expected = [date.daysinmonth for date in index] + assert_array_equal(result, expected) + + @requires_cftime @pytest.mark.parametrize( ("string", "date_args", "reso"), diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 00c34940ce4..1efd4b02bf8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -432,6 +432,18 @@ def test_decode_360_day_calendar(): assert_array_equal(actual, expected) +@requires_cftime +def test_decode_abbreviation(): + """Test making sure we properly fall back to cftime on abbreviated units.""" + import cftime + + val = np.array([1586628000000.0]) + units = "msecs since 1970-01-01T00:00:00Z" + actual = coding.times.decode_cf_datetime(val, units) + expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) + assert_array_equal(actual, expected) + + @arm_xfail @requires_cftime @pytest.mark.parametrize( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e23ff2f7e31..c3e5aafabfe 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -781,6 +781,19 @@ def test_isel(self): assert_identical(self.dv, self.dv.isel(x=slice(None))) assert_identical(self.dv[:3], self.dv.isel(x=slice(3))) assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0, missing_dims="warn") + assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_isel_types(self): # regression test for #1405 @@ -1202,6 +1215,25 @@ def test_selection_multiindex_from_level(self): expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop_vars("y") assert_equal(actual, expected) + def test_stack_groupby_unsorted_coord(self): + data = [[0, 1], [2, 3]] + data_flat = [0, 1, 2, 3] + dims = ["x", "y"] + y_vals = [2, 3] + + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals}) + actual1 = arr.stack(z=dims).groupby("z").first() + midx1 = pd.MultiIndex.from_product([[0, 1], [2, 3]], names=dims) + expected1 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx1}) + xr.testing.assert_equal(actual1, expected1) + + # GH: 3287. Note that y coord values are not in sorted order. + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals[::-1]}) + actual2 = arr.stack(z=dims).groupby("z").first() + midx2 = pd.MultiIndex.from_product([[0, 1], [3, 2]], names=dims) + expected2 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx2}) + xr.testing.assert_equal(actual2, expected2) + def test_virtual_default_coords(self): array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") @@ -3483,6 +3515,14 @@ def test_to_and_from_series(self): expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) ) + def test_from_series_multiindex(self): + # GH:3951 + df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]}) + df = df.rename_axis("num").rename_axis("alpha", axis=1) + actual = df.stack("alpha").to_xarray() + assert (actual.sel(alpha="B") == [1, 2, 3]).all() + assert (actual.sel(alpha="A") == [4, 5, 6]).all() + @requires_sparse def test_from_series_sparse(self): import sparse @@ -4349,6 +4389,783 @@ def test_pad_reflect(self, mode, reflect_type): assert_identical(actual, expected) +class TestReduce: + @pytest.fixture(autouse=True) + def setup(self): + self.attrs = {"attr1": "value1", "attr2": 2929} + + +@pytest.mark.parametrize( + "x, minindex, maxindex, nanindex", + [ + (np.array([0, 1, 2, 0, -2, -4, 2]), 5, 2, None), + (np.array([0.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0]), 5, 2, None), + (np.array([1.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0]), 5, 2, 1), + ( + np.array([1.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0]).astype("object"), + 5, + 2, + 1, + ), + (np.array([np.NaN, np.NaN]), np.NaN, np.NaN, 0), + ( + np.array( + ["2015-12-31", "2020-01-02", "2020-01-01", "2016-01-01"], + dtype="datetime64[ns]", + ), + 0, + 1, + None, + ), + ], +) +class TestReduce1D(TestReduce): + def test_min(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + + if np.isnan(minindex): + minindex = 0 + + expected0 = ar.isel(x=minindex, drop=True) + result0 = ar.min(keep_attrs=True) + assert_identical(result0, expected0) + + result1 = ar.min() + expected1 = expected0.copy() + expected1.attrs = {} + assert_identical(result1, expected1) + + result2 = ar.min(skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = ar.isel(x=nanindex, drop=True) + expected2.attrs = {} + else: + expected2 = expected1 + + assert_identical(result2, expected2) + + def test_max(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + + if np.isnan(minindex): + maxindex = 0 + + expected0 = ar.isel(x=maxindex, drop=True) + result0 = ar.max(keep_attrs=True) + assert_identical(result0, expected0) + + result1 = ar.max() + expected1 = expected0.copy() + expected1.attrs = {} + assert_identical(result1, expected1) + + result2 = ar.max(skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = ar.isel(x=nanindex, drop=True) + expected2.attrs = {} + else: + expected2 = expected1 + + assert_identical(result2, expected2) + + def test_argmin(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"]) + + if np.isnan(minindex): + with pytest.raises(ValueError): + ar.argmin() + return + + expected0 = indarr[minindex] + result0 = ar.argmin() + assert_identical(result0, expected0) + + result1 = ar.argmin(keep_attrs=True) + expected1 = expected0.copy() + expected1.attrs = self.attrs + assert_identical(result1, expected1) + + result2 = ar.argmin(skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = indarr.isel(x=nanindex, drop=True) + expected2.attrs = {} + else: + expected2 = expected0 + + assert_identical(result2, expected2) + + def test_argmax(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"]) + + if np.isnan(maxindex): + with pytest.raises(ValueError): + ar.argmax() + return + + expected0 = indarr[maxindex] + result0 = ar.argmax() + assert_identical(result0, expected0) + + result1 = ar.argmax(keep_attrs=True) + expected1 = expected0.copy() + expected1.attrs = self.attrs + assert_identical(result1, expected1) + + result2 = ar.argmax(skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = indarr.isel(x=nanindex, drop=True) + expected2.attrs = {} + else: + expected2 = expected0 + + assert_identical(result2, expected2) + + def test_idxmin(self, x, minindex, maxindex, nanindex): + ar0 = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + + # dim doesn't exist + with pytest.raises(KeyError): + ar0.idxmin(dim="spam") + + # Scalar Dataarray + with pytest.raises(ValueError): + xr.DataArray(5).idxmin() + + coordarr0 = xr.DataArray(ar0.coords["x"], dims=["x"]) + coordarr1 = coordarr0.copy() + + hasna = np.isnan(minindex) + if np.isnan(minindex): + minindex = 0 + + if hasna: + coordarr1[...] = 1 + fill_value_0 = np.NaN + else: + fill_value_0 = 1 + + expected0 = ( + (coordarr1 * fill_value_0).isel(x=minindex, drop=True).astype("float") + ) + expected0.name = "x" + + # Default fill value (NaN) + result0 = ar0.idxmin() + assert_identical(result0, expected0) + + # Manually specify NaN fill_value + result1 = ar0.idxmin(fill_value=np.NaN) + assert_identical(result1, expected0) + + # keep_attrs + result2 = ar0.idxmin(keep_attrs=True) + expected2 = expected0.copy() + expected2.attrs = self.attrs + assert_identical(result2, expected2) + + # skipna=False + if nanindex is not None and ar0.dtype.kind != "O": + expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float") + expected3.name = "x" + expected3.attrs = {} + else: + expected3 = expected0.copy() + + result3 = ar0.idxmin(skipna=False) + assert_identical(result3, expected3) + + # fill_value should be ignored with skipna=False + result4 = ar0.idxmin(skipna=False, fill_value=-100j) + assert_identical(result4, expected3) + + # Float fill_value + if hasna: + fill_value_5 = -1.1 + else: + fill_value_5 = 1 + + expected5 = (coordarr1 * fill_value_5).isel(x=minindex, drop=True) + expected5.name = "x" + + result5 = ar0.idxmin(fill_value=-1.1) + assert_identical(result5, expected5) + + # Integer fill_value + if hasna: + fill_value_6 = -1 + else: + fill_value_6 = 1 + + expected6 = (coordarr1 * fill_value_6).isel(x=minindex, drop=True) + expected6.name = "x" + + result6 = ar0.idxmin(fill_value=-1) + assert_identical(result6, expected6) + + # Complex fill_value + if hasna: + fill_value_7 = -1j + else: + fill_value_7 = 1 + + expected7 = (coordarr1 * fill_value_7).isel(x=minindex, drop=True) + expected7.name = "x" + + result7 = ar0.idxmin(fill_value=-1j) + assert_identical(result7, expected7) + + def test_idxmax(self, x, minindex, maxindex, nanindex): + ar0 = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + + # dim doesn't exist + with pytest.raises(KeyError): + ar0.idxmax(dim="spam") + + # Scalar Dataarray + with pytest.raises(ValueError): + xr.DataArray(5).idxmax() + + coordarr0 = xr.DataArray(ar0.coords["x"], dims=["x"]) + coordarr1 = coordarr0.copy() + + hasna = np.isnan(maxindex) + if np.isnan(maxindex): + maxindex = 0 + + if hasna: + coordarr1[...] = 1 + fill_value_0 = np.NaN + else: + fill_value_0 = 1 + + expected0 = ( + (coordarr1 * fill_value_0).isel(x=maxindex, drop=True).astype("float") + ) + expected0.name = "x" + + # Default fill value (NaN) + result0 = ar0.idxmax() + assert_identical(result0, expected0) + + # Manually specify NaN fill_value + result1 = ar0.idxmax(fill_value=np.NaN) + assert_identical(result1, expected0) + + # keep_attrs + result2 = ar0.idxmax(keep_attrs=True) + expected2 = expected0.copy() + expected2.attrs = self.attrs + assert_identical(result2, expected2) + + # skipna=False + if nanindex is not None and ar0.dtype.kind != "O": + expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float") + expected3.name = "x" + expected3.attrs = {} + else: + expected3 = expected0.copy() + + result3 = ar0.idxmax(skipna=False) + assert_identical(result3, expected3) + + # fill_value should be ignored with skipna=False + result4 = ar0.idxmax(skipna=False, fill_value=-100j) + assert_identical(result4, expected3) + + # Float fill_value + if hasna: + fill_value_5 = -1.1 + else: + fill_value_5 = 1 + + expected5 = (coordarr1 * fill_value_5).isel(x=maxindex, drop=True) + expected5.name = "x" + + result5 = ar0.idxmax(fill_value=-1.1) + assert_identical(result5, expected5) + + # Integer fill_value + if hasna: + fill_value_6 = -1 + else: + fill_value_6 = 1 + + expected6 = (coordarr1 * fill_value_6).isel(x=maxindex, drop=True) + expected6.name = "x" + + result6 = ar0.idxmax(fill_value=-1) + assert_identical(result6, expected6) + + # Complex fill_value + if hasna: + fill_value_7 = -1j + else: + fill_value_7 = 1 + + expected7 = (coordarr1 * fill_value_7).isel(x=maxindex, drop=True) + expected7.name = "x" + + result7 = ar0.idxmax(fill_value=-1j) + assert_identical(result7, expected7) + + +@pytest.mark.parametrize( + "x, minindex, maxindex, nanindex", + [ + ( + np.array( + [ + [0, 1, 2, 0, -2, -4, 2], + [1, 1, 1, 1, 1, 1, 1], + [0, 0, -10, 5, 20, 0, 0], + ] + ), + [5, 0, 2], + [2, 0, 4], + [None, None, None], + ), + ( + np.array( + [ + [2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0], + [-4.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0], + [np.NaN] * 7, + ] + ), + [5, 0, np.NaN], + [0, 2, np.NaN], + [None, 1, 0], + ), + ( + np.array( + [ + [2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0], + [-4.0, np.NaN, 2.0, np.NaN, -2.0, -4.0, 2.0], + [np.NaN] * 7, + ] + ).astype("object"), + [5, 0, np.NaN], + [0, 2, np.NaN], + [None, 1, 0], + ), + ( + np.array( + [ + ["2015-12-31", "2020-01-02", "2020-01-01", "2016-01-01"], + ["2020-01-02", "2020-01-02", "2020-01-02", "2020-01-02"], + ["1900-01-01", "1-02-03", "1900-01-02", "1-02-03"], + ], + dtype="datetime64[ns]", + ), + [0, 0, 1], + [1, 0, 2], + [None, None, None], + ), + ], +) +class TestReduce2D(TestReduce): + def test_min(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + + minindex = [x if not np.isnan(x) else 0 for x in minindex] + expected0 = [ + ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) + ] + expected0 = xr.concat(expected0, dim="y") + + result0 = ar.min(dim="x", keep_attrs=True) + assert_identical(result0, expected0) + + result1 = ar.min(dim="x") + expected1 = expected0 + expected1.attrs = {} + assert_identical(result1, expected1) + + result2 = ar.min(axis=1) + assert_identical(result2, expected1) + + minindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(minindex, nanindex) + ] + expected2 = [ + ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) + ] + expected2 = xr.concat(expected2, dim="y") + expected2.attrs = {} + + result3 = ar.min(dim="x", skipna=False) + + assert_identical(result3, expected2) + + def test_max(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + + maxindex = [x if not np.isnan(x) else 0 for x in maxindex] + expected0 = [ + ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) + ] + expected0 = xr.concat(expected0, dim="y") + + result0 = ar.max(dim="x", keep_attrs=True) + assert_identical(result0, expected0) + + result1 = ar.max(dim="x") + expected1 = expected0.copy() + expected1.attrs = {} + assert_identical(result1, expected1) + + result2 = ar.max(axis=1) + assert_identical(result2, expected1) + + maxindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(maxindex, nanindex) + ] + expected2 = [ + ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) + ] + expected2 = xr.concat(expected2, dim="y") + expected2.attrs = {} + + result3 = ar.max(dim="x", skipna=False) + + assert_identical(result3, expected2) + + def test_argmin(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + + if np.isnan(minindex).any(): + with pytest.raises(ValueError): + ar.argmin(dim="x") + return + + expected0 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex) + ] + expected0 = xr.concat(expected0, dim="y") + + result0 = ar.argmin(dim="x") + assert_identical(result0, expected0) + + result1 = ar.argmin(axis=1) + assert_identical(result1, expected0) + + result2 = ar.argmin(dim="x", keep_attrs=True) + expected1 = expected0.copy() + expected1.attrs = self.attrs + assert_identical(result2, expected1) + + minindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(minindex, nanindex) + ] + expected2 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex) + ] + expected2 = xr.concat(expected2, dim="y") + expected2.attrs = {} + + result3 = ar.argmin(dim="x", skipna=False) + + assert_identical(result3, expected2) + + def test_argmax(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + + if np.isnan(maxindex).any(): + with pytest.raises(ValueError): + ar.argmax(dim="x") + return + + expected0 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex) + ] + expected0 = xr.concat(expected0, dim="y") + + result0 = ar.argmax(dim="x") + assert_identical(result0, expected0) + + result1 = ar.argmax(axis=1) + assert_identical(result1, expected0) + + result2 = ar.argmax(dim="x", keep_attrs=True) + expected1 = expected0.copy() + expected1.attrs = self.attrs + assert_identical(result2, expected1) + + maxindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(maxindex, nanindex) + ] + expected2 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex) + ] + expected2 = xr.concat(expected2, dim="y") + expected2.attrs = {} + + result3 = ar.argmax(dim="x", skipna=False) + + assert_identical(result3, expected2) + + def test_idxmin(self, x, minindex, maxindex, nanindex): + ar0 = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + + assert_identical(ar0, ar0) + + # No dimension specified + with pytest.raises(ValueError): + ar0.idxmin() + + # dim doesn't exist + with pytest.raises(KeyError): + ar0.idxmin(dim="Y") + + assert_identical(ar0, ar0) + + coordarr0 = xr.DataArray( + np.tile(ar0.coords["x"], [x.shape[0], 1]), dims=ar0.dims, coords=ar0.coords + ) + + hasna = [np.isnan(x) for x in minindex] + coordarr1 = coordarr0.copy() + coordarr1[hasna, :] = 1 + minindex0 = [x if not np.isnan(x) else 0 for x in minindex] + + nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] + expected0 = [ + (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex0) + ] + expected0 = xr.concat(expected0, dim="y") + expected0.name = "x" + + # Default fill value (NaN) + result0 = ar0.idxmin(dim="x") + assert_identical(result0, expected0) + + # Manually specify NaN fill_value + result1 = ar0.idxmin(dim="x", fill_value=np.NaN) + assert_identical(result1, expected0) + + # keep_attrs + result2 = ar0.idxmin(dim="x", keep_attrs=True) + expected2 = expected0.copy() + expected2.attrs = self.attrs + assert_identical(result2, expected2) + + # skipna=False + minindex3 = [ + x if y is None or ar0.dtype.kind == "O" else y + for x, y in zip(minindex0, nanindex) + ] + expected3 = [ + coordarr0.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex3) + ] + expected3 = xr.concat(expected3, dim="y") + expected3.name = "x" + expected3.attrs = {} + + result3 = ar0.idxmin(dim="x", skipna=False) + assert_identical(result3, expected3) + + # fill_value should be ignored with skipna=False + result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j) + assert_identical(result4, expected3) + + # Float fill_value + nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None] + expected5 = [ + (coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex0) + ] + expected5 = xr.concat(expected5, dim="y") + expected5.name = "x" + + result5 = ar0.idxmin(dim="x", fill_value=-1.1) + assert_identical(result5, expected5) + + # Integer fill_value + nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None] + expected6 = [ + (coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex0) + ] + expected6 = xr.concat(expected6, dim="y") + expected6.name = "x" + + result6 = ar0.idxmin(dim="x", fill_value=-1) + assert_identical(result6, expected6) + + # Complex fill_value + nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None] + expected7 = [ + (coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex0) + ] + expected7 = xr.concat(expected7, dim="y") + expected7.name = "x" + + result7 = ar0.idxmin(dim="x", fill_value=-5j) + assert_identical(result7, expected7) + + def test_idxmax(self, x, minindex, maxindex, nanindex): + ar0 = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + + # No dimension specified + with pytest.raises(ValueError): + ar0.idxmax() + + # dim doesn't exist + with pytest.raises(KeyError): + ar0.idxmax(dim="Y") + + ar1 = ar0.copy() + del ar1.coords["y"] + with pytest.raises(KeyError): + ar1.idxmax(dim="y") + + coordarr0 = xr.DataArray( + np.tile(ar0.coords["x"], [x.shape[0], 1]), dims=ar0.dims, coords=ar0.coords + ) + + hasna = [np.isnan(x) for x in maxindex] + coordarr1 = coordarr0.copy() + coordarr1[hasna, :] = 1 + maxindex0 = [x if not np.isnan(x) else 0 for x in maxindex] + + nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] + expected0 = [ + (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex0) + ] + expected0 = xr.concat(expected0, dim="y") + expected0.name = "x" + + # Default fill value (NaN) + result0 = ar0.idxmax(dim="x") + assert_identical(result0, expected0) + + # Manually specify NaN fill_value + result1 = ar0.idxmax(dim="x", fill_value=np.NaN) + assert_identical(result1, expected0) + + # keep_attrs + result2 = ar0.idxmax(dim="x", keep_attrs=True) + expected2 = expected0.copy() + expected2.attrs = self.attrs + assert_identical(result2, expected2) + + # skipna=False + maxindex3 = [ + x if y is None or ar0.dtype.kind == "O" else y + for x, y in zip(maxindex0, nanindex) + ] + expected3 = [ + coordarr0.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex3) + ] + expected3 = xr.concat(expected3, dim="y") + expected3.name = "x" + expected3.attrs = {} + + result3 = ar0.idxmax(dim="x", skipna=False) + assert_identical(result3, expected3) + + # fill_value should be ignored with skipna=False + result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j) + assert_identical(result4, expected3) + + # Float fill_value + nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None] + expected5 = [ + (coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex0) + ] + expected5 = xr.concat(expected5, dim="y") + expected5.name = "x" + + result5 = ar0.idxmax(dim="x", fill_value=-1.1) + assert_identical(result5, expected5) + + # Integer fill_value + nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None] + expected6 = [ + (coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex0) + ] + expected6 = xr.concat(expected6, dim="y") + expected6.name = "x" + + result6 = ar0.idxmax(dim="x", fill_value=-1) + assert_identical(result6, expected6) + + # Complex fill_value + nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None] + expected7 = [ + (coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex0) + ] + expected7 = xr.concat(expected7, dim="y") + expected7.name = "x" + + result7 = ar0.idxmax(dim="x", fill_value=-5j) + assert_identical(result7, expected7) + + @pytest.fixture(params=[1]) def da(request): if request.param == 1: @@ -4974,3 +5791,28 @@ def test_weakref(): a = DataArray(1) r = ref(a) assert r() is a + + +def test_delete_coords(): + """Make sure that deleting a coordinate doesn't corrupt the DataArray. + See issue #3899. + + Also test that deleting succeeds and produces the expected output. + """ + a0 = DataArray( + np.array([[1, 2, 3], [4, 5, 6]]), + dims=["y", "x"], + coords={"x": ["a", "b", "c"], "y": [-1, 1]}, + ) + assert_identical(a0, a0) + + a1 = a0.copy() + del a1.coords["y"] + + # This test will detect certain sorts of corruption in the DataArray + assert_identical(a0, a0) + + assert a0.dims == ("y", "x") + assert a1.dims == ("y", "x") + assert set(a0.coords.keys()) == {"x", "y"} + assert set(a1.coords.keys()) == {"x"} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 02698253e5d..a1cb7361e77 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1023,6 +1023,21 @@ def test_isel(self): with pytest.raises(ValueError): data.isel(not_a_dim=slice(0, 2)) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2)) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2), missing_dims="warn") + assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims @@ -4584,6 +4599,7 @@ def test_reduce_non_numeric(self): def test_reduce_strings(self): expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", ["a", "b"])}) + ds.coords["y"] = [-10, 10] actual = ds.min() assert_identical(expected, actual) @@ -4599,6 +4615,14 @@ def test_reduce_strings(self): actual = ds.argmax() assert_identical(expected, actual) + expected = Dataset({"x": -10}) + actual = ds.idxmin() + assert_identical(expected, actual) + + expected = Dataset({"x": 10}) + actual = ds.idxmax() + assert_identical(expected, actual) + expected = Dataset({"x": b"a"}) ds = Dataset({"x": ("y", np.array(["a", "b"], "S1"))}) actual = ds.min() diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index b3c0ce37a54..8011171d223 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -186,7 +186,7 @@ def test_dask_distributed_cfgrib_integration_test(loop): reason="Need recent distributed version to clean up get", ) @gen_cluster(client=True, timeout=None) -def test_async(c, s, a, b): +async def test_async(c, s, a, b): x = create_test_data() assert not dask.is_dask_collection(x) y = x.chunk({"dim2": 4}) + 10 @@ -206,7 +206,7 @@ def test_async(c, s, a, b): assert futures_of(z) future = c.compute(z) - w = yield future + w = await future assert not dask.is_dask_collection(w) assert_allclose(x + 10, w) @@ -218,7 +218,7 @@ def test_hdf5_lock(): @gen_cluster(client=True) -def test_serializable_locks(c, s, a, b): +async def test_serializable_locks(c, s, a, b): def f(x, lock=None): with lock: return x + 1 @@ -233,7 +233,7 @@ def f(x, lock=None): ]: futures = c.map(f, list(range(10)), lock=lock) - yield c.gather(futures) + await c.gather(futures) lock2 = pickle.loads(pickle.dumps(lock)) assert type(lock) == type(lock2) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 239f339208d..94653016416 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -137,3 +137,15 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_variable_repr_html(): + v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) + assert hasattr(v, "_repr_html_") + with xr.set_options(display_style="html"): + html = v._repr_html_().strip() + # We don't do a complete string identity since + # html output is probably subject to change, is long and... reasons. + # Just test that something reasonable was produced. + assert html.startswith("") + assert "xarray.Variable" in html diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 35c71c2854c..731cd165244 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -231,6 +231,17 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) +def test_interpolate_keep_attrs(): + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + mvals = vals.copy() + mvals[2] = np.nan + missing = xr.DataArray(mvals, dims="x") + missing.attrs = {"test": "value"} + + actual = missing.interpolate_na(dim="x", keep_attrs=True) + assert actual.attrs == {"test": "value"} + + def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 7f3f1620133..bf1f9ed60bb 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -139,6 +139,12 @@ def test1d(self): with raises_regex(ValueError, "None"): self.darray[:, 0, 0].plot(x="dim_1") + with raises_regex(TypeError, "complex128"): + (self.darray[:, 0, 0] + 1j).plot() + + def test_1d_bool(self): + xr.ones_like(self.darray[:, 0, 0], dtype=np.bool).plot() + def test_1d_x_y_kw(self): z = np.arange(10) da = DataArray(np.cos(z), dims=["z"], coords=[z], name="f") @@ -251,6 +257,17 @@ def test_2d_coords_line_plot(self): with pytest.raises(ValueError, match="For 2D inputs, hue must be a dimension"): da.plot.line(x="lon", hue="lat") + def test_2d_coord_line_plot_coords_transpose_invariant(self): + # checks for bug reported in GH #3933 + x = np.arange(10) + y = np.arange(20) + ds = xr.Dataset(coords={"x": x, "y": y}) + + for z in [ds.y + ds.x, ds.x + ds.y]: + ds = ds.assign_coords(z=z) + ds["v"] = ds.x + ds.y + ds["v"].plot.line(y="z", hue="x") + def test_2d_before_squeeze(self): a = DataArray(easy_array((1, 5))) a.plot() @@ -827,16 +844,22 @@ def test_divergentcontrol(self): assert cmap_params["vmax"] == 0.6 assert cmap_params["cmap"] == "viridis" + # regression test for GH3524 + # infer diverging colormap from divergent levels + cmap_params = _determine_cmap_params(pos, levels=[-0.1, 0, 1]) + # specifying levels makes cmap a Colormap object + assert cmap_params["cmap"].name == "RdBu_r" + def test_norm_sets_vmin_vmax(self): vmin = self.data.min() vmax = self.data.max() for norm, extend in zip( [ - mpl.colors.LogNorm(), - mpl.colors.LogNorm(vmin + 1, vmax - 1), - mpl.colors.LogNorm(None, vmax - 1), - mpl.colors.LogNorm(vmin + 1, None), + mpl.colors.Normalize(), + mpl.colors.Normalize(vmin + 0.1, vmax - 0.1), + mpl.colors.Normalize(None, vmax - 0.1), + mpl.colors.Normalize(vmin + 0.1, None), ], ["neither", "both", "max", "min"], ): @@ -989,6 +1012,13 @@ def test_1d_raises_valueerror(self): with raises_regex(ValueError, r"DataArray must be 2d"): self.plotfunc(self.darray[0, :]) + def test_bool(self): + xr.ones_like(self.darray, dtype=np.bool).plot() + + def test_complex_raises_typeerror(self): + with raises_regex(TypeError, "complex128"): + (self.darray + 1j).plot() + def test_3d_raises_valueerror(self): a = DataArray(easy_array((2, 3, 4))) if self.plotfunc.__name__ == "imshow": @@ -1752,6 +1782,13 @@ def test_can_set_vmin_vmax(self): clim = np.array(image.get_clim()) assert np.allclose(expected, clim) + @pytest.mark.slow + def test_vmin_vmax_equal(self): + # regression test for GH3734 + fg = self.g.map_dataarray(xplt.imshow, "x", "y", vmin=50, vmax=50) + for mappable in fg._mappables: + assert mappable.norm.vmin != mappable.norm.vmax + @pytest.mark.slow @pytest.mark.filterwarnings("ignore") def test_can_set_norm(self): @@ -2282,3 +2319,15 @@ def test_plot_transposes_properly(plotfunc): # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary # to ravel() on the LHS assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel()) + + +@requires_matplotlib +def test_facetgrid_single_contour(): + # regression test for GH3569 + x, y = np.meshgrid(np.arange(12), np.arange(12)) + z = xr.DataArray(np.sqrt(x ** 2 + y ** 2)) + z2 = xr.DataArray(np.sqrt(x ** 2 + y ** 2) + 1) + ds = xr.concat([z, z2], dim="time") + ds["time"] = [0, 1] + + ds.plot.contour(col="time", levels=[4], colors=["k"]) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 09ab1be9af9..f3c09ba6a5f 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -837,6 +837,7 @@ def test_sparse_coords(self): ) +@requires_dask def test_chunk(): s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) a = DataArray(s) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 2826dc2479c..5dd4a42cff0 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1660,7 +1660,7 @@ def test_missing_value_fillna(self, unit, error): method("equals"), pytest.param( method("identical"), - marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, @@ -1885,7 +1885,10 @@ def test_squeeze(self, dtype): method("coarsen", windows={"y": 2}, func=np.mean), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), pytest.param( method("rank", dim="x"), @@ -2161,8 +2164,8 @@ class TestDataArray: "with_dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "with_coords", + "without_coords", ), ) def test_init(self, variant, dtype): @@ -2224,21 +2227,17 @@ def test_repr(self, func, variant, dtype): @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail(reason="not implemented by xarray"), + marks=pytest.mark.xfail( + reason="median does not work with dataarrays yet" + ), ), function("min"), pytest.param( @@ -2249,18 +2248,9 @@ def test_repr(self, func, variant, dtype): function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -2269,18 +2259,13 @@ def test_repr(self, func, variant, dtype): method("min"), pytest.param( method("prod"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), + marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), method("sum"), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), - marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"), - ), + method("cumprod"), ), ids=repr, ) @@ -2296,7 +2281,8 @@ def test_aggregation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", @@ -2314,7 +2300,8 @@ def test_unary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2333,7 +2320,8 @@ def test_binary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", @@ -2383,7 +2371,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype): strip_units(convert_units(to_compare_with, expected_units)), ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2411,9 +2400,10 @@ def test_univariate_ufunc(self, units, error, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( "unit,error", ( @@ -2422,7 +2412,12 @@ def test_univariate_ufunc(self, units, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="pint converts to the wrong units"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2433,7 +2428,7 @@ def test_bivariate_ufunc(self, unit, error, dtype): if error is not None: with pytest.raises(error): - np.maximum(data_array, 0 * unit) + np.maximum(data_array, 1 * unit) return @@ -2441,16 +2436,18 @@ def test_bivariate_ufunc(self, unit, error, dtype): expected = attach_units( np.maximum( strip_units(data_array), - strip_units(convert_units(0 * unit, expected_units)), + strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) - actual = np.maximum(data_array, 0 * unit) - assert_equal_with_units(expected, actual) + actual = np.maximum(data_array, 1 * unit) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - actual = np.maximum(0 * unit, data_array) - assert_equal_with_units(expected, actual) + actual = np.maximum(1 * unit, data_array) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2466,7 +2463,8 @@ def test_numpy_properties(self, property, dtype): ) actual = getattr(data_array, property) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2481,16 +2479,86 @@ def test_numpy_methods(self, func, dtype): expected = attach_units(strip_units(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) + + def test_item(self, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + func = method("item", 2) + + expected = func(strip_units(data_array)) * unit_registry.m + actual = func(data_array) + + np.testing.assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("searchsorted", 5), + pytest.param( + function("searchsorted", 5), + marks=pytest.mark.xfail( + reason="xarray does not implement __array_function__" + ), + ), + ), + ids=repr, + ) + def test_searchsorted(self, func, unit, error, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + scalar_types = (int, float) + args = list(value * unit for value in func.args) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(data_array, *args, **kwargs) + + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, + ) + actual = func(data_array, *args, **kwargs) + + assert_units_equal(expected, actual) + np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( - method("searchsorted", v=5), + function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( - reason="searchsorted somehow requires a undocumented `keys` argument" + reason="xarray does not implement __array_function__" ), ), ), @@ -2513,28 +2581,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) + args = list(value * unit for value in func.args) kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): - func(data_array, **kwargs) + func(data_array, *args, **kwargs) return units = extract_units(data_array) - expected_units = extract_units(func(array, **kwargs)) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), expected_units + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, ) - actual = func(data_array, **kwargs) + actual = func(data_array, *args, **kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2551,15 +2623,13 @@ def test_missing_value_detection(self, func, dtype): ) * unit_registry.degK ) - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.m - - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2576,7 +2646,8 @@ def test_missing_value_filling(self, func, dtype): ) actual = func(data_array, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2586,12 +2657,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="fillna converts to value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2629,7 +2695,8 @@ def test_fillna(self, fill_value, unit, error, dtype): ) actual = func(data_array, value=value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2643,18 +2710,13 @@ def test_dropna(self, dtype): expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( - pytest.param( - 1, - id="no_unit", - marks=pytest.mark.xfail( - reason="pint's isin implementation does not work well with mixed args" - ), - ), + pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), @@ -2677,22 +2739,11 @@ def test_isin(self, unit, dtype): ) & array.check(unit) actual = data_array.isin(values) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - "replacing_scalar", - "replacing_array", - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -2742,22 +2793,24 @@ def test_where(self, variant, unit, error, dtype): ) actual = data_array.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") - def test_interpolate_na(self, dtype): + @pytest.mark.xfail(reason="uses numpy.vectorize") + def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2767,18 +2820,8 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), - pytest.param( - unit_registry.m, - None, - id="identical_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), + pytest.param(unit_registry.m, None, id="identical_unit",), ), ) def test_combine_first(self, unit, error, dtype): @@ -2807,7 +2850,8 @@ def test_combine_first(self, unit, error, dtype): ) actual = data_array.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2829,7 +2873,17 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="the behavior of identical is undecided"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 @@ -2903,7 +2957,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2950,7 +3005,6 @@ def test_broadcast_equals(self, unit, dtype): method("reset_coords", names="x2"), method("copy"), method("astype", np.float32), - method("item", 1), ), ids=repr, ) @@ -2978,7 +3032,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr @@ -3004,7 +3059,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ) actual = func(data_array, **kwargs) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3024,7 +3081,8 @@ def test_isel(self, indices, dtype): ) actual = data_array.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3067,7 +3125,9 @@ def test_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3110,7 +3170,9 @@ def test_loc(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3153,7 +3215,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "shape", @@ -3181,7 +3245,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(), extract_units(data_array) ) actual = data_array.squeeze() - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) @@ -3190,7 +3256,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) actual = data_array.squeeze(dim=name) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3212,49 +3280,42 @@ def test_head_tail_thin(self, func, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + y = np.arange(10) * coord_unit - if error is not None: - with pytest.raises(error): - data_array.interp(x=new_coords) - - return + x = np.arange(10) + new_x = np.arange(10) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) - expected = attach_units( - strip_units(data_array).interp( - x=strip_units(convert_units(new_coords, {None: unit_registry.m})) - ), - units, - ) - actual = data_array.interp(x=new_coords) + expected = attach_units(func(strip_units(data_array), x=new_x), units) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3267,79 +3328,66 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } - - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + @pytest.mark.parametrize( + "func", (method("interp"), method("reindex")), ids=repr, + ) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(10) + 0.5) * unit + data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): - data_array.interp_like(other) + func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).interp_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) - actual = data_array.interp_like(other) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex_like(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - func = method("reindex") - - if error is not None: - with pytest.raises(error): - func(data_array, x=new_coords) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + coord = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(-2, 2) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") + other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") - expected = attach_units( - func( - strip_units(data_array), - x=strip_units(convert_units(new_coords, {None: unit_registry.m})), - ), - {None: unit_registry.degK}, - ) - actual = func(data_array, x=new_coords) + units = extract_units(data_array) + expected = attach_units(func(strip_units(data_array), other), units) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3354,38 +3402,35 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr, + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(-2, 2) + 0.5) * unit - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + data_array = xr.DataArray(array, coords={"x": x}, dims="x") + other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): - data_array.reindex_like(other) + func(data_array, other) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).reindex_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) - actual = data_array.reindex_like(other) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3407,7 +3452,8 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3430,7 +3476,8 @@ def test_to_unstacked_dataset(self, dtype): ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3438,9 +3485,7 @@ def test_to_unstacked_dataset(self, dtype): method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -3466,7 +3511,8 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3476,16 +3522,13 @@ def test_stacking_reordering(self, func, dtype): method("integrate", dim="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), - ), - method("reduce", func=np.sum, dim="x"), - pytest.param( - lambda x: x.dot(x), - id="method_dot", marks=pytest.mark.xfail( - reason="pint does not implement the dot method" + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", ), ), + method("reduce", func=np.sum, dim="x"), + pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) @@ -3512,7 +3555,8 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3522,7 +3566,9 @@ def test_computation(self, func, dtype): method("coarsen", y=2), pytest.param( method("rolling", y=3), - marks=pytest.mark.xfail(reason="rolling strips units"), + marks=pytest.mark.xfail( + reason="numpy.lib.stride_tricks.as_strided converts to ndarray" + ), ), pytest.param( method("rolling_exp", y=3), @@ -3545,7 +3591,8 @@ def test_computation_objects(self, func, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3559,7 +3606,8 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3569,7 +3617,10 @@ def test_resample(self, dtype): method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), ), ids=repr, @@ -3598,7 +3649,8 @@ def test_grouped_operations(self, func, dtype): ) actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) class TestDataset: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 116466e112d..3003e0d66f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1254,8 +1254,19 @@ def test_isel(self): assert_identical(v.isel(x=0), v[:, 0]) assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) assert_identical(v.isel(time=[]), v[[]]) - with raises_regex(ValueError, "do not exist"): + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): v.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): + v.isel(not_a_dim=0, missing_dims="warn") + assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings @@ -2202,6 +2213,10 @@ def test_full_like(self): assert expect.dtype == bool assert_identical(expect, full_like(orig, True, dtype=bool)) + # raise error on non-scalar fill_value + with raises_regex(ValueError, "must be scalar"): + full_like(orig, [1.0, 2.0]) + @requires_dask def test_full_like_dask(self): orig = Variable( diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 6a0e62cc9dc..32051bb6843 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -9,7 +9,7 @@ def get_sys_info(): - "Returns system information as a dict" + """Returns system information as a dict""" blob = [] @@ -22,7 +22,7 @@ def get_sys_info(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - so, serr = pipe.communicate() + so, _ = pipe.communicate() except Exception: pass else: @@ -37,7 +37,7 @@ def get_sys_info(): blob.append(("commit", commit)) try: - (sysname, nodename, release, version, machine, processor) = platform.uname() + (sysname, _nodename, release, _version, machine, processor) = platform.uname() blob.extend( [ ("python", sys.version), @@ -118,6 +118,7 @@ def show_versions(file=sys.stdout): ("cartopy", lambda mod: mod.__version__), ("seaborn", lambda mod: mod.__version__), ("numbagg", lambda mod: mod.__version__), + ("pint", lambda mod: mod.__version__), # xarray setup/test ("setuptools", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__),