From 482ecf6ccee820f04484a3f983ead45624e0f969 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 28 Jul 2018 18:38:12 -0700 Subject: [PATCH 01/16] Fix some warnings. --- xarray/coding/times.py | 7 +++++-- xarray/core/formatting.py | 2 +- xarray/plot/plot.py | 5 +---- xarray/plot/utils.py | 4 +++- xarray/tests/test_backends.py | 1 + xarray/tests/test_coding_times.py | 3 ++- xarray/tests/test_dask.py | 7 +++++-- xarray/tests/test_dataarray.py | 6 ++++-- xarray/tests/test_dataset.py | 5 ++++- xarray/tests/test_plot.py | 10 ++++++++++ 10 files changed, 36 insertions(+), 14 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index d946e2ed378..6edbedce54c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -183,8 +183,11 @@ def decode_cf_datetime(num_dates, units, calendar=None, # fixes: https://github.com/pydata/pandas/issues/14068 # these lines check if the the lowest or the highest value in dates # cause an OutOfBoundsDatetime (Overflow) error - pd.to_timedelta(flat_num_dates.min(), delta) + ref_date - pd.to_timedelta(flat_num_dates.max(), delta) + ref_date + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'invalid value encountered', + RuntimeWarning) + pd.to_timedelta(flat_num_dates.min(), delta) + ref_date + pd.to_timedelta(flat_num_dates.max(), delta) + ref_date # Cast input dates to integers of nanoseconds because `pd.to_datetime` # works much faster when dealing with integers diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 65f3c91ca26..042c8c5324d 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -183,7 +183,7 @@ def format_items(x): day_part = (x[~pd.isnull(x)] .astype('timedelta64[D]') .astype('timedelta64[ns]')) - time_needed = x != day_part + time_needed = x[~pd.isnull(x)] != day_part day_needed = day_part != np.timedelta64(0, 'ns') if np.logical_not(day_needed).all(): timedelta_format = 'time' diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 179f41e9e42..d51e6b7f908 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -479,9 +479,6 @@ def line(self, *args, **kwargs): def _rescale_imshow_rgb(darray, vmin, vmax, robust): assert robust or vmin is not None or vmax is not None - # There's a cyclic dependency via DataArray, so we can't import from - # xarray.ufuncs in global scope. - from xarray.ufuncs import maximum, minimum # Calculate vmin and vmax automatically for `robust=True` if robust: if vmax is None: @@ -507,7 +504,7 @@ def _rescale_imshow_rgb(darray, vmin, vmax, robust): # After scaling, downcast to 32-bit float. This substantially reduces # memory usage after we hand `darray` off to matplotlib. darray = ((darray.astype('f8') - vmin) / (vmax - vmin)).astype('f4') - return minimum(maximum(darray, 0), 1) + return np.minimum(np.maximum(darray, 0), 1) def _plot2d(plotfunc): diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 1ddb02352be..6221bfe9153 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -213,8 +213,10 @@ def _determine_cmap_params(plot_data, vmin=None, vmax=None, cmap=None, # Handle discrete levels if levels is not None: if is_scalar(levels): - if user_minmax or levels == 1: + if user_minmax: levels = np.linspace(vmin, vmax, levels) + elif levels == 1: + levels = np.asarray([(vmin + vmax) / 2]) else: # N in MaxNLocator refers to bins, not ticks ticker = mpl.ticker.MaxNLocator(levels - 1) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3801225299f..8be5e69de1b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1789,6 +1789,7 @@ def create_store(self): with create_tmp_file() as tmp_file: yield backends.H5NetCDFStore(tmp_file, 'w') + @pytest.mark.filterwarnings('ignore:complex dtypes are supported by h5py') def test_complex(self): expected = Dataset({'x': ('y', np.ones(5) + 1j * np.ones(5))}) with self.roundtrip(expected) as actual: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index e763af4984c..7d3a4930b44 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -538,7 +538,8 @@ def test_cf_datetime_nan(num_dates, units, expected_list): with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'All-NaN') actual = coding.times.decode_cf_datetime(num_dates, units) - expected = np.array(expected_list, dtype='datetime64[ns]') + # use pandas because numpy will deprecate timezone-aware conversions + expected = pd.to_datetime(expected_list) assert_array_equal(expected, actual) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index f6c47cce8d8..e64d114296d 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,7 +24,7 @@ class DaskTestCase(TestCase): def assertLazyAnd(self, expected, actual, test): - with dask.set_options(get=dask.get): + with dask.config.set(get=dask.get): test(actual, expected) if isinstance(actual, Dataset): for k, v in actual.variables.items(): @@ -196,11 +196,13 @@ def test_missing_methods(self): except NotImplementedError as err: assert 'dask' in str(err) + @pytest.mark.filterwarnings('ignore::PendingDeprecationWarning') def test_univariate_ufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) + @pytest.mark.filterwarnings('ignore::PendingDeprecationWarning') def test_bivariate_ufunc(self): u = self.eager_var v = self.lazy_var @@ -421,6 +423,7 @@ def duplicate_and_merge(array): actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) + @pytest.mark.filterwarnings('ignore::PendingDeprecationWarning') def test_ufuncs(self): u = self.eager_array v = self.lazy_array @@ -821,7 +824,7 @@ def test_basic_compute(): dask.multiprocessing.get, dask.local.get_sync, None]: - with dask.set_options(get=get): + with dask.config.set(get=get): ds.compute() ds.foo.compute() ds.foo.variable.compute() diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 3619688d091..bd23f7928db 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -693,7 +693,7 @@ def test_isel_fancy(self): da.isel(time=(('points',), [1, 2]), x=(('points',), [2, 2]), y=(('points',), [3, 4])) np.testing.assert_allclose( - da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), + da.isel(time=[1], x=[2], y=[4]).values.squeeze(), np_array[1, 4, 2].squeeze()) da.isel(time=(('points', ), [1, 2])) y = [-1, 0] @@ -845,6 +845,7 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + @pytest.mark.filterwarnings("ignore:Dataset.isel_points") def test_isel_points(self): shape = (10, 5, 6) np_array = np.random.random(shape) @@ -1237,6 +1238,7 @@ def test_reindex_like_no_index(self): ValueError, 'different size for unlabeled'): foo.reindex_like(bar) + @pytest.mark.filterwarnings('ignore:Indexer has dimensions') def test_reindex_regressions(self): # regression test for #279 expected = DataArray(np.random.randn(5), coords=[("time", range(5))]) @@ -1286,7 +1288,7 @@ def test_swap_dims(self): def test_expand_dims_error(self): array = DataArray(np.random.randn(3, 4), dims=['x', 'dim_0'], - coords={'x': np.linspace(0.0, 1.0, 3.0)}, + coords={'x': np.linspace(0.0, 1.0, 3)}, attrs={'key': 'entry'}) with raises_regex(ValueError, 'dim should be str or'): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d73632c10a7..9ad8b9c2baa 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1240,6 +1240,7 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + @pytest.mark.filterwarnings("ignore:Dataset.isel_points") def test_isel_points(self): data = create_test_data() @@ -1317,6 +1318,8 @@ def test_isel_points(self): dim2=stations['dim2s'], dim=np.array([4, 5, 6])) + @pytest.mark.filterwarnings("ignore:Dataset.sel_points") + @pytest.mark.filterwarnings("ignore:Dataset.isel_points") def test_sel_points(self): data = create_test_data() @@ -1419,7 +1422,7 @@ def test_sel_fancy(self): assert_identical(actual['b'].drop('y'), idx_y['b']) with pytest.raises(KeyError): - data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + data.sel(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) def test_sel_method(self): data = create_test_data() diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 4e5ea8fc623..e7caf3d6ca2 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -267,6 +267,7 @@ def test_datetime_dimension(self): assert ax.has_data() @pytest.mark.slow + @pytest.mark.filterwarnings('ignore:tight_layout cannot') def test_convenient_facetgrid(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) @@ -328,6 +329,7 @@ def test_plot_size(self): self.darray.plot(aspect=1) @pytest.mark.slow + @pytest.mark.filterwarnings('ignore:tight_layout cannot') def test_convenient_facetgrid_4d(self): a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=['y', 'x', 'columns', 'rows']) @@ -775,10 +777,13 @@ def test_plot_nans(self): clim2 = self.plotfunc(x2).get_clim() assert clim1 == clim2 + @pytest.mark.filterwarnings('ignore::UserWarning') + @pytest.mark.filterwarnings('ignore:invalid value encountered') def test_can_plot_all_nans(self): # regression test for issue #1780 self.plotfunc(DataArray(np.full((2, 2), np.nan))) + @pytest.mark.filterwarnings('ignore: Attempting to set') def test_can_plot_axis_size_one(self): if self.plotfunc.__name__ not in ('contour', 'contourf'): self.plotfunc(DataArray(np.ones((1, 1)))) @@ -970,6 +975,7 @@ def test_2d_function_and_method_signature_same(self): del func_sig['darray'] assert func_sig == method_sig + @pytest.mark.filterwarnings('ignore:tight_layout cannot') def test_convenient_facetgrid(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) @@ -1001,6 +1007,7 @@ def test_convenient_facetgrid(self): else: assert '' == ax.get_xlabel() + @pytest.mark.filterwarnings('ignore:tight_layout cannot') def test_convenient_facetgrid_4d(self): a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=['y', 'x', 'columns', 'rows']) @@ -1279,6 +1286,7 @@ def test_imshow_rgb_values_in_valid_range(self): assert out.dtype == np.uint8 assert (out[..., :3] == da.values).all() # Compare without added alpha + @pytest.mark.filterwarnings('ignore:Several dimensions of this array') def test_regression_rgb_imshow_dim_size_one(self): # Regression: https://github.com/pydata/xarray/issues/1966 da = DataArray(easy_array((1, 3, 3), start=0.0, stop=1.0)) @@ -1511,6 +1519,7 @@ def test_facetgrid_polar(self): sharey=False) +@pytest.mark.filterwarnings('ignore:tight_layout cannot') class TestFacetGrid4d(PlotTestCase): def setUp(self): a = easy_array((10, 15, 3, 2)) @@ -1538,6 +1547,7 @@ def test_default_labels(self): assert substring_in_axes(label, ax) +@pytest.mark.filterwarnings('ignore:tight_layout cannot') class TestFacetedLinePlots(PlotTestCase): def setUp(self): self.darray = DataArray(np.random.randn(10, 6, 3, 4), From 6b46b159791ee0028b59d158302df8253129d3ee Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 28 Jul 2018 19:21:50 -0700 Subject: [PATCH 02/16] Make sure dask tests work with dask=0.16 --- xarray/tests/test_dask.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index e64d114296d..c92b14f754c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,8 +24,11 @@ class DaskTestCase(TestCase): def assertLazyAnd(self, expected, actual, test): - with dask.config.set(get=dask.get): + + with (dask.config.set(get=dask.get) if hasattr(dask, 'config') + else dask.set_options(get=dask.get)): test(actual, expected) + if isinstance(actual, Dataset): for k, v in actual.variables.items(): if k in actual.dims: @@ -824,7 +827,8 @@ def test_basic_compute(): dask.multiprocessing.get, dask.local.get_sync, None]: - with dask.config.set(get=get): + with (dask.config.set(get=get) if hasattr(dask, 'config') + else dask.set_options(get=get)): ds.compute() ds.foo.compute() ds.foo.variable.compute() From 75cce6cb201aa52f5ff27e838ecf3ef6b7bda3e9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 28 Jul 2018 22:59:08 -0700 Subject: [PATCH 03/16] Silence some pnetcdf warnings. --- xarray/tests/test_backends.py | 48 +++++++++++++++-------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8be5e69de1b..8b469761ccd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2528,6 +2528,7 @@ class PyNioTestAutocloseTrue(PyNioTest): @requires_pseudonetcdf +@pytest.mark.filterwarnings('ignore:IOAPI_ISPH is assumed to be 6370000') class PseudoNetCDFFormatTest(TestCase): autoclose = True @@ -2659,14 +2660,11 @@ def test_uamiv_format_read(self): """ Open a CAMx file and test data variables """ - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=UserWarning, - message=('IOAPI_ISPH is assumed to be ' + - '6370000.; consistent with WRF')) - camxfile = open_example_dataset('example.uamiv', - engine='pseudonetcdf', - autoclose=True, - backend_kwargs={'format': 'uamiv'}) + + camxfile = open_example_dataset('example.uamiv', + engine='pseudonetcdf', + autoclose=True, + backend_kwargs={'format': 'uamiv'}) data = np.arange(20, dtype='f').reshape(1, 1, 4, 5) expected = xr.Variable(('TSTEP', 'LAY', 'ROW', 'COL'), data, dict(units='ppm', long_name='O3'.ljust(16), @@ -2688,17 +2686,14 @@ def test_uamiv_format_mfread(self): """ Open a CAMx file and test data variables """ - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=UserWarning, - message=('IOAPI_ISPH is assumed to be ' + - '6370000.; consistent with WRF')) - camxfile = open_example_mfdataset( - ['example.uamiv', - 'example.uamiv'], - engine='pseudonetcdf', - autoclose=True, - concat_dim='TSTEP', - backend_kwargs={'format': 'uamiv'}) + + camxfile = open_example_mfdataset( + ['example.uamiv', + 'example.uamiv'], + engine='pseudonetcdf', + autoclose=True, + concat_dim='TSTEP', + backend_kwargs={'format': 'uamiv'}) data1 = np.arange(20, dtype='f').reshape(1, 1, 4, 5) data = np.concatenate([data1] * 2, axis=0) @@ -2721,19 +2716,18 @@ def test_uamiv_format_mfread(self): def test_uamiv_format_write(self): fmtkw = {'format': 'uamiv'} - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=UserWarning, - message=('IOAPI_ISPH is assumed to be ' + - '6370000.; consistent with WRF')) - expected = open_example_dataset('example.uamiv', - engine='pseudonetcdf', - autoclose=False, - backend_kwargs=fmtkw) + + expected = open_example_dataset('example.uamiv', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs=fmtkw) with self.roundtrip(expected, save_kwargs=fmtkw, open_kwargs={'backend_kwargs': fmtkw}) as actual: assert_identical(expected, actual) + expected.close() + def save(self, dataset, path, **save_kwargs): import PseudoNetCDF as pnc pncf = pnc.PseudoNetCDFFile() From 7518bd7e13611a467d54a6b9dcc1a456f2b0887c Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 29 Jul 2018 11:28:59 -0700 Subject: [PATCH 04/16] fix sel_points, isel_points fancy indexing tests --- xarray/tests/test_dataarray.py | 3 ++- xarray/tests/test_dataset.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index bd23f7928db..5d20a6cfec3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -672,6 +672,7 @@ def test_isel_types(self): assert_identical(da.isel(x=np.array([0], dtype="int64")), da.isel(x=np.array([0]))) + @pytest.mark.filterwarnings('ignore::DeprecationWarning') def test_isel_fancy(self): shape = (10, 7, 6) np_array = np.random.random(shape) @@ -693,7 +694,7 @@ def test_isel_fancy(self): da.isel(time=(('points',), [1, 2]), x=(('points',), [2, 2]), y=(('points',), [3, 4])) np.testing.assert_allclose( - da.isel(time=[1], x=[2], y=[4]).values.squeeze(), + da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), np_array[1, 4, 2].squeeze()) da.isel(time=(('points', ), [1, 2])) y = [-1, 0] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 9ad8b9c2baa..8643a56bbec 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1350,6 +1350,7 @@ def test_sel_points(self): with pytest.raises(KeyError): data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + @pytest.mark.filterwarnings('ignore::DeprecationWarning') def test_sel_fancy(self): data = create_test_data() @@ -1422,7 +1423,7 @@ def test_sel_fancy(self): assert_identical(actual['b'].drop('y'), idx_y['b']) with pytest.raises(KeyError): - data.sel(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) def test_sel_method(self): data = create_test_data() From 71866440dcc38bff224a94596612d1cc58a97936 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 29 Jul 2018 11:55:33 -0700 Subject: [PATCH 05/16] Revert to using xr.ufuncs --- xarray/plot/plot.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index d51e6b7f908..0b3ab6f1bde 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -479,6 +479,11 @@ def line(self, *args, **kwargs): def _rescale_imshow_rgb(darray, vmin, vmax, robust): assert robust or vmin is not None or vmax is not None + # TODO: remove when min numpy version is bumped to 1.13 + # There's a cyclic dependency via DataArray, so we can't import from + # xarray.ufuncs in global scope. + from xarray.ufuncs import maximum, minimum + # Calculate vmin and vmax automatically for `robust=True` if robust: if vmax is None: @@ -504,7 +509,10 @@ def _rescale_imshow_rgb(darray, vmin, vmax, robust): # After scaling, downcast to 32-bit float. This substantially reduces # memory usage after we hand `darray` off to matplotlib. darray = ((darray.astype('f8') - vmin) / (vmax - vmin)).astype('f4') - return np.minimum(np.maximum(darray, 0), 1) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'xarray.ufuncs', + PendingDeprecationWarning) + return minimum(maximum(darray, 0), 1) def _plot2d(plotfunc): From 1f1ec52707f8b2349461e41b68a7bc3918deb9f1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 29 Jul 2018 12:40:31 -0700 Subject: [PATCH 06/16] Fix overflow/underflow warnings in interpolate_na These were being triggered by casting datetime64[ns] to float32. We now rescale the co-ordinate before interpolating, except for nearest-neighbour interpolation. The rescaling can change the nearest neighbour, and so is avoided in this case to maintain pandas compatibility. --- xarray/core/missing.py | 14 ++++++++++++-- xarray/tests/test_missing.py | 14 +++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 232fa185c07..e7c663db84d 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -153,7 +153,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): +def get_clean_interp_index(arr, dim, method, use_coordinate=True, **kwargs): '''get index to use for x values in interpolation. If use_coordinate is True, the coordinate that shares the name of the @@ -176,6 +176,15 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): # raise if index cannot be cast to a float (e.g. MultiIndex) try: index = index.values.astype(np.float64) + if method != 'nearest': + # rescale index to avoid overflow/underflow + # The division can change the nearest-neighbour + # when compared to pandas (which does not divide). + # Let's keep that compatitibility + index = (index - index.min()) + if len(index) > 1: + index /= index.std() + except (TypeError, ValueError): # pandas raises a TypeError # xarray/nuppy raise a ValueError @@ -202,7 +211,8 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, valids = _get_valid_fill_mask(self, dim, limit) # method - index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate, + index = get_clean_interp_index(self, dim, method=method, + use_coordinate=use_coordinate, **kwargs) interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 5c7e384c789..446ed23363f 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -11,8 +11,8 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator) from xarray.core.pycompat import dask_array_type from xarray.tests import ( - assert_array_equal, assert_equal, raises_regex, requires_bottleneck, - requires_dask, requires_scipy) + assert_array_equal, assert_equal, assert_allclose, raises_regex, + requires_bottleneck, requires_dask, requires_scipy) @pytest.fixture @@ -89,7 +89,7 @@ def test_interpolate_pd_compat(): # only checks that interpolated values are the same (not nans) expected.values[pd.isnull(actual.values)] = np.nan - np.testing.assert_allclose(actual.values, expected.values) + np.testing.assert_almost_equal(actual.values, expected.values) @requires_scipy @@ -244,7 +244,7 @@ def test_interpolate_limits(): expected = xr.DataArray(np.array([1, 2, 3, 4, np.nan, 6], dtype=np.float64), dims='x') - assert_equal(actual, expected) + assert_allclose(actual, expected) @requires_scipy @@ -284,17 +284,17 @@ def test_interpolate_use_coordinate(): # use_coordinate == False is same as using the default index actual = da.interpolate_na(dim='x', use_coordinate=False) expected = da.interpolate_na(dim='x') - assert_equal(actual, expected) + assert_allclose(actual, expected) # possible to specify non index coordinate actual = da.interpolate_na(dim='x', use_coordinate='xc') expected = da.interpolate_na(dim='x') - assert_equal(actual, expected) + assert_allclose(actual, expected) # possible to specify index coordinate by name actual = da.interpolate_na(dim='x', use_coordinate='x') expected = da.interpolate_na(dim='x') - assert_equal(actual, expected) + assert_allclose(actual, expected) @requires_dask From 9ac15ef677c4d21230b7aab40a65c1d7b0530ece Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 29 Jul 2018 12:45:36 -0700 Subject: [PATCH 07/16] Rescale datetime for interp() too. --- xarray/core/missing.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index e7c663db84d..26dd8c5e450 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -412,15 +412,16 @@ def _floatize_x(x, new_x): x = list(x) new_x = list(new_x) for i in range(len(x)): - if x[i].dtype.kind in 'Mm': - # Scipy casts coordinates to np.float64, which is not accurate - # enough for datetime64 (uses 64bit integer). - # We assume that the most of the bits are used to represent the - # offset (min(x)) and the variation (x - min(x)) can be - # represented by float. - xmin = np.min(x[i]) - x[i] = (x[i] - xmin).astype(np.float64) - new_x[i] = (new_x[i] - xmin).astype(np.float64) + # Scipy casts coordinates to np.float64, which is not accurate + # enough for datetime64 (uses 64bit integer). + # We assume that the most of the bits are used to represent the + # offset (min(x)) and the variation (x - min(x)) can be + # represented by float. + # Let's be defensive and always rescale (x) + xmin = np.min(x[i]) + xstd = np.std(x[i].astype(np.float64)) + x[i] = (x[i] - xmin).astype(np.float64) / xstd + new_x[i] = (new_x[i] - xmin).astype(np.float64) / xstd return x, new_x From 76f988f594aea23d3acde1d603db5460c9010c1e Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 29 Jul 2018 17:28:18 -0700 Subject: [PATCH 08/16] Better rescaling. --- xarray/core/missing.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 26dd8c5e450..a8bc876dab3 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -181,9 +181,9 @@ def get_clean_interp_index(arr, dim, method, use_coordinate=True, **kwargs): # The division can change the nearest-neighbour # when compared to pandas (which does not divide). # Let's keep that compatitibility - index = (index - index.min()) + index = (index - index.mean()) if len(index) > 1: - index /= index.std() + index /= np.max(np.abs(index)) except (TypeError, ValueError): # pandas raises a TypeError @@ -417,11 +417,13 @@ def _floatize_x(x, new_x): # We assume that the most of the bits are used to represent the # offset (min(x)) and the variation (x - min(x)) can be # represented by float. - # Let's be defensive and always rescale (x) - xmin = np.min(x[i]) - xstd = np.std(x[i].astype(np.float64)) - x[i] = (x[i] - xmin).astype(np.float64) / xstd - new_x[i] = (new_x[i] - xmin).astype(np.float64) / xstd + # Let's be defensive and always rescale x to be in [0, 1] + # Remove minimum instead of mean allows us to handle datetime + xfloat = (x[i] - np.min(x[i])).astype(np.float64) + newxfloat = (new_x[i] - np.min(x[i])).astype(np.float64) + xmax = np.max(xfloat) + x[i] = xfloat / xmax + new_x[i] = newxfloat / xmax return x, new_x From f605d6e0d8e210d0731dd7c2dab0e232e7efa3f5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Aug 2018 17:39:45 +0530 Subject: [PATCH 09/16] Revert "Better rescaling." This reverts commit 76f988f594aea23d3acde1d603db5460c9010c1e. --- xarray/core/missing.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index a8bc876dab3..26dd8c5e450 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -181,9 +181,9 @@ def get_clean_interp_index(arr, dim, method, use_coordinate=True, **kwargs): # The division can change the nearest-neighbour # when compared to pandas (which does not divide). # Let's keep that compatitibility - index = (index - index.mean()) + index = (index - index.min()) if len(index) > 1: - index /= np.max(np.abs(index)) + index /= index.std() except (TypeError, ValueError): # pandas raises a TypeError @@ -417,13 +417,11 @@ def _floatize_x(x, new_x): # We assume that the most of the bits are used to represent the # offset (min(x)) and the variation (x - min(x)) can be # represented by float. - # Let's be defensive and always rescale x to be in [0, 1] - # Remove minimum instead of mean allows us to handle datetime - xfloat = (x[i] - np.min(x[i])).astype(np.float64) - newxfloat = (new_x[i] - np.min(x[i])).astype(np.float64) - xmax = np.max(xfloat) - x[i] = xfloat / xmax - new_x[i] = newxfloat / xmax + # Let's be defensive and always rescale (x) + xmin = np.min(x[i]) + xstd = np.std(x[i].astype(np.float64)) + x[i] = (x[i] - xmin).astype(np.float64) / xstd + new_x[i] = (new_x[i] - xmin).astype(np.float64) / xstd return x, new_x From 9729f2905102f0fffe4fbf22b5000dbf3c4d5ef0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Aug 2018 17:39:50 +0530 Subject: [PATCH 10/16] Revert "Rescale datetime for interp() too." This reverts commit 9ac15ef677c4d21230b7aab40a65c1d7b0530ece. --- xarray/core/missing.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 26dd8c5e450..e7c663db84d 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -412,16 +412,15 @@ def _floatize_x(x, new_x): x = list(x) new_x = list(new_x) for i in range(len(x)): - # Scipy casts coordinates to np.float64, which is not accurate - # enough for datetime64 (uses 64bit integer). - # We assume that the most of the bits are used to represent the - # offset (min(x)) and the variation (x - min(x)) can be - # represented by float. - # Let's be defensive and always rescale (x) - xmin = np.min(x[i]) - xstd = np.std(x[i].astype(np.float64)) - x[i] = (x[i] - xmin).astype(np.float64) / xstd - new_x[i] = (new_x[i] - xmin).astype(np.float64) / xstd + if x[i].dtype.kind in 'Mm': + # Scipy casts coordinates to np.float64, which is not accurate + # enough for datetime64 (uses 64bit integer). + # We assume that the most of the bits are used to represent the + # offset (min(x)) and the variation (x - min(x)) can be + # represented by float. + xmin = np.min(x[i]) + x[i] = (x[i] - xmin).astype(np.float64) + new_x[i] = (new_x[i] - xmin).astype(np.float64) return x, new_x From cee7e2a587920575981d6eab99919db35cde1a1c Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Aug 2018 17:39:51 +0530 Subject: [PATCH 11/16] Revert "Fix overflow/underflow warnings in interpolate_na" This reverts commit 1f1ec52707f8b2349461e41b68a7bc3918deb9f1. --- xarray/core/missing.py | 14 ++------------ xarray/tests/test_missing.py | 14 +++++++------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index e7c663db84d..232fa185c07 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -153,7 +153,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim, method, use_coordinate=True, **kwargs): +def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): '''get index to use for x values in interpolation. If use_coordinate is True, the coordinate that shares the name of the @@ -176,15 +176,6 @@ def get_clean_interp_index(arr, dim, method, use_coordinate=True, **kwargs): # raise if index cannot be cast to a float (e.g. MultiIndex) try: index = index.values.astype(np.float64) - if method != 'nearest': - # rescale index to avoid overflow/underflow - # The division can change the nearest-neighbour - # when compared to pandas (which does not divide). - # Let's keep that compatitibility - index = (index - index.min()) - if len(index) > 1: - index /= index.std() - except (TypeError, ValueError): # pandas raises a TypeError # xarray/nuppy raise a ValueError @@ -211,8 +202,7 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, valids = _get_valid_fill_mask(self, dim, limit) # method - index = get_clean_interp_index(self, dim, method=method, - use_coordinate=use_coordinate, + index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate, **kwargs) interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 446ed23363f..5c7e384c789 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -11,8 +11,8 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator) from xarray.core.pycompat import dask_array_type from xarray.tests import ( - assert_array_equal, assert_equal, assert_allclose, raises_regex, - requires_bottleneck, requires_dask, requires_scipy) + assert_array_equal, assert_equal, raises_regex, requires_bottleneck, + requires_dask, requires_scipy) @pytest.fixture @@ -89,7 +89,7 @@ def test_interpolate_pd_compat(): # only checks that interpolated values are the same (not nans) expected.values[pd.isnull(actual.values)] = np.nan - np.testing.assert_almost_equal(actual.values, expected.values) + np.testing.assert_allclose(actual.values, expected.values) @requires_scipy @@ -244,7 +244,7 @@ def test_interpolate_limits(): expected = xr.DataArray(np.array([1, 2, 3, 4, np.nan, 6], dtype=np.float64), dims='x') - assert_allclose(actual, expected) + assert_equal(actual, expected) @requires_scipy @@ -284,17 +284,17 @@ def test_interpolate_use_coordinate(): # use_coordinate == False is same as using the default index actual = da.interpolate_na(dim='x', use_coordinate=False) expected = da.interpolate_na(dim='x') - assert_allclose(actual, expected) + assert_equal(actual, expected) # possible to specify non index coordinate actual = da.interpolate_na(dim='x', use_coordinate='xc') expected = da.interpolate_na(dim='x') - assert_allclose(actual, expected) + assert_equal(actual, expected) # possible to specify index coordinate by name actual = da.interpolate_na(dim='x', use_coordinate='x') expected = da.interpolate_na(dim='x') - assert_allclose(actual, expected) + assert_equal(actual, expected) @requires_dask From fbdb20656aa82ff1ad55f27863d606731051f11d Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Aug 2018 23:10:46 +0530 Subject: [PATCH 12/16] Silence overflow/underflow/invalid value warnings. --- xarray/core/missing.py | 19 ++++++++++++------- xarray/tests/test_missing.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 232fa185c07..90aa4ffaeda 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -3,6 +3,8 @@ from collections import Iterable from functools import partial +import warnings + import numpy as np import pandas as pd @@ -207,13 +209,16 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) - arr = apply_ufunc(interpolator, index, self, - input_core_dims=[[dim], [dim]], - output_core_dims=[[dim]], - output_dtypes=[self.dtype], - dask='parallelized', - vectorize=True, - keep_attrs=True).transpose(*self.dims) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'overflow', RuntimeWarning) + warnings.filterwarnings('ignore', 'invalid value', RuntimeWarning) + arr = apply_ufunc(interpolator, index, self, + input_core_dims=[[dim], [dim]], + output_core_dims=[[dim]], + output_dtypes=[self.dtype], + dask='parallelized', + vectorize=True, + keep_attrs=True).transpose(*self.dims) if limit is not None: arr = arr.where(valids) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 5c7e384c789..47224e55473 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -93,14 +93,14 @@ def test_interpolate_pd_compat(): @requires_scipy -def test_scipy_methods_function(): - for method in ['barycentric', 'krog', 'pchip', 'spline', 'akima']: - kwargs = {} - # Note: Pandas does some wacky things with these methods and the full - # integration tests wont work. - da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) - actual = da.interpolate_na(method=method, dim='time', **kwargs) - assert (da.count('time') <= actual.count('time')).all() +@pytest.mark.parametrize('method', ['barycentric', 'krog', + 'pchip', 'spline', 'akima']) +def test_scipy_methods_function(method): + # Note: Pandas does some wacky things with these methods and the full + # integration tests wont work. + da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) + actual = da.interpolate_na(method=method, dim='time') + assert (da.count('time') <= actual.count('time')).all() @requires_scipy From b9851275fdccd4c1cf8e662bffd5b1353b4ea048 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Aug 2018 23:18:25 +0530 Subject: [PATCH 13/16] Silence a bottleneck warning. --- xarray/tests/test_dataarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5d20a6cfec3..d63bc95f266 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3532,6 +3532,8 @@ def test_rolling_reduce(da, center, min_periods, window, name): @pytest.mark.parametrize('min_periods', (None, 1, 2, 3)) @pytest.mark.parametrize('window', (1, 2, 3, 4)) @pytest.mark.parametrize('name', ('sum', 'max')) +@pytest.mark.filterwarnings('ignore:Using a non-tuple sequence') +# root cause of the warning is bottleneck def test_rolling_reduce_nonnumeric(center, min_periods, window, name): da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims='time').isnull() From d9e802458df4656ff1b32a5be4c7f95a1aaf4f27 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 3 Sep 2018 12:40:21 +0530 Subject: [PATCH 14/16] Revert "Silence a bottleneck warning." This reverts commit b9851275fdccd4c1cf8e662bffd5b1353b4ea048. --- xarray/tests/test_dataarray.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d63bc95f266..5d20a6cfec3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3532,8 +3532,6 @@ def test_rolling_reduce(da, center, min_periods, window, name): @pytest.mark.parametrize('min_periods', (None, 1, 2, 3)) @pytest.mark.parametrize('window', (1, 2, 3, 4)) @pytest.mark.parametrize('name', ('sum', 'max')) -@pytest.mark.filterwarnings('ignore:Using a non-tuple sequence') -# root cause of the warning is bottleneck def test_rolling_reduce_nonnumeric(center, min_periods, window, name): da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims='time').isnull() From 2c0ed188e5c1f94cf60ec67e0d7d536d1d9cd9ca Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 4 Sep 2018 11:21:01 +0530 Subject: [PATCH 15/16] Dask: change from attribute check to version check. --- xarray/tests/test_dask.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index c92b14f754c..c4b23650df9 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -2,6 +2,7 @@ import pickle from textwrap import dedent +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -25,7 +26,8 @@ class DaskTestCase(TestCase): def assertLazyAnd(self, expected, actual, test): - with (dask.config.set(get=dask.get) if hasattr(dask, 'config') + with (dask.config.set(get=dask.get) + if dask.__version__ >= LooseVersion('0.18.0') else dask.set_options(get=dask.get)): test(actual, expected) @@ -827,7 +829,8 @@ def test_basic_compute(): dask.multiprocessing.get, dask.local.get_sync, None]: - with (dask.config.set(get=get) if hasattr(dask, 'config') + with (dask.config.set(get=get) + if dask.__version__ >= LooseVersion('0.18.0') else dask.set_options(get=get)): ds.compute() ds.foo.compute() From a74f4e0a109677aec34014a9b2b8bd20ae06681b Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 4 Sep 2018 17:10:03 +0530 Subject: [PATCH 16/16] Maybe this fixes python 2 failure? --- xarray/tests/test_dask.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index c4b23650df9..6ca83ab73ab 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -27,7 +27,7 @@ class DaskTestCase(TestCase): def assertLazyAnd(self, expected, actual, test): with (dask.config.set(get=dask.get) - if dask.__version__ >= LooseVersion('0.18.0') + if LooseVersion(dask.__version__) >= LooseVersion('0.18.0') else dask.set_options(get=dask.get)): test(actual, expected) @@ -830,7 +830,7 @@ def test_basic_compute(): dask.local.get_sync, None]: with (dask.config.set(get=get) - if dask.__version__ >= LooseVersion('0.18.0') + if LooseVersion(dask.__version__) >= LooseVersion('0.18.0') else dask.set_options(get=get)): ds.compute() ds.foo.compute()