From 4b88c735b28c4575d0b44dd2d0c39107de2543ae Mon Sep 17 00:00:00 2001 From: Keith Doore Date: Fri, 23 Feb 2024 09:50:00 -0600 Subject: [PATCH 1/5] Added mask to replace zeros outside of original data grid with NaNs --- src/xarray_regrid/methods/conservative.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/xarray_regrid/methods/conservative.py b/src/xarray_regrid/methods/conservative.py index 58db487..7f59722 100644 --- a/src/xarray_regrid/methods/conservative.py +++ b/src/xarray_regrid/methods/conservative.py @@ -90,7 +90,11 @@ def conservative_regrid_dataset( da_attrs = [da.attrs for da in dataarrays] coord_attrs = [data[coord].attrs for coord in data_coords] + mask = {} for coord in coords: + mask[coord] = ((coords[coord] <= data[coord].max()) + & (coords[coord] >= data[coord].min())) + target_coords = coords[coord].to_numpy() source_coords = data[coord].to_numpy() weights = get_weights(source_coords, target_coords) @@ -112,6 +116,10 @@ def conservative_regrid_dataset( da.attrs = attr regridded = xr.merge(dataarrays) + # Replace zeros outside of original data grid with NaNs + for coord in coords: + regridded = regridded.where(mask[coord]) + regridded.attrs = attrs new_coords = [regridded[coord] for coord in data_coords] @@ -133,6 +141,9 @@ def conservative_regrid_dataarray( coord_attrs = [data[coord].attrs for coord in data_coords] for coord in coords: + mask = ((coords[coord] <= data[coord].max()) + & (coords[coord] >= data[coord].min())) + if coord in data.coords: target_coords = coords[coord].to_numpy() source_coords = data[coord].to_numpy() @@ -150,6 +161,9 @@ def conservative_regrid_dataarray( data = data.transpose(coord, ...) data = apply_weights(data, weights, coord, target_coords) + # Replace zeros outside of original data grid with NaNs + data = data.where(mask) + new_coords = [data[coord] for coord in data_coords] for coord, attr in zip(new_coords, coord_attrs, strict=True): coord.attrs = attr From e09e09b1963af9ac33be53a98b3925a0a66b36ab Mon Sep 17 00:00:00 2001 From: Keith Doore Date: Fri, 23 Feb 2024 10:05:23 -0600 Subject: [PATCH 2/5] Masked regridded regions outside of data range with NaN --- src/xarray_regrid/methods/most_common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/xarray_regrid/methods/most_common.py b/src/xarray_regrid/methods/most_common.py index b56ab1b..5b87800 100644 --- a/src/xarray_regrid/methods/most_common.py +++ b/src/xarray_regrid/methods/most_common.py @@ -190,6 +190,12 @@ def most_common(data: xr.Dataset, target_ds: xr.Dataset, time_dim: str) -> xr.Da ds_regrid = ds_regrid.rename({f"{coord}_bins": coord for coord in coords}) for coord in coords: ds_regrid[coord] = target_ds[coord] + + # Replace zeros outside of original data grid with NaNs + mask = ((target_ds[coord] <= data[coord].max()) + & (target_ds[coord] >= data[coord].min())) + ds_regrid = ds_regrid.where(mask) + ds_regrid[coord].attrs = coord_attrs[coord] return ds_regrid.transpose(*dim_order) From 81272efc7690650b036d4977453fa4b4affcf868 Mon Sep 17 00:00:00 2001 From: Keith Doore Date: Tue, 27 Feb 2024 09:19:29 -0600 Subject: [PATCH 3/5] Linting fixes --- src/xarray_regrid/methods/conservative.py | 17 ++++++++++------- src/xarray_regrid/methods/most_common.py | 7 ++++--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/xarray_regrid/methods/conservative.py b/src/xarray_regrid/methods/conservative.py index 7f59722..c46bc51 100644 --- a/src/xarray_regrid/methods/conservative.py +++ b/src/xarray_regrid/methods/conservative.py @@ -90,10 +90,12 @@ def conservative_regrid_dataset( da_attrs = [da.attrs for da in dataarrays] coord_attrs = [data[coord].attrs for coord in data_coords] - mask = {} + # track which target coordinate values are not covered by the source grid + uncovered_target_grid = {} for coord in coords: - mask[coord] = ((coords[coord] <= data[coord].max()) - & (coords[coord] >= data[coord].min())) + uncovered_target_grid[coord] = (coords[coord] <= data[coord].max()) & ( + coords[coord] >= data[coord].min() + ) target_coords = coords[coord].to_numpy() source_coords = data[coord].to_numpy() @@ -118,7 +120,7 @@ def conservative_regrid_dataset( # Replace zeros outside of original data grid with NaNs for coord in coords: - regridded = regridded.where(mask[coord]) + regridded = regridded.where(uncovered_target_grid[coord]) regridded.attrs = attrs @@ -141,8 +143,9 @@ def conservative_regrid_dataarray( coord_attrs = [data[coord].attrs for coord in data_coords] for coord in coords: - mask = ((coords[coord] <= data[coord].max()) - & (coords[coord] >= data[coord].min())) + uncovered_target_grid = (coords[coord] <= data[coord].max()) & ( + coords[coord] >= data[coord].min() + ) if coord in data.coords: target_coords = coords[coord].to_numpy() @@ -162,7 +165,7 @@ def conservative_regrid_dataarray( data = apply_weights(data, weights, coord, target_coords) # Replace zeros outside of original data grid with NaNs - data = data.where(mask) + data = data.where(uncovered_target_grid) new_coords = [data[coord] for coord in data_coords] for coord, attr in zip(new_coords, coord_attrs, strict=True): diff --git a/src/xarray_regrid/methods/most_common.py b/src/xarray_regrid/methods/most_common.py index 5b87800..cd07c03 100644 --- a/src/xarray_regrid/methods/most_common.py +++ b/src/xarray_regrid/methods/most_common.py @@ -192,9 +192,10 @@ def most_common(data: xr.Dataset, target_ds: xr.Dataset, time_dim: str) -> xr.Da ds_regrid[coord] = target_ds[coord] # Replace zeros outside of original data grid with NaNs - mask = ((target_ds[coord] <= data[coord].max()) - & (target_ds[coord] >= data[coord].min())) - ds_regrid = ds_regrid.where(mask) + uncovered_target_grid = (target_ds[coord] <= data[coord].max()) & ( + target_ds[coord] >= data[coord].min() + ) + ds_regrid = ds_regrid.where(uncovered_target_grid) ds_regrid[coord].attrs = coord_attrs[coord] From 66c7a73342e319f138d8d7cfdf7e002b678624a3 Mon Sep 17 00:00:00 2001 From: Keith Doore Date: Tue, 27 Feb 2024 15:04:45 -0600 Subject: [PATCH 4/5] Most common NaN test --- tests/test_most_common.py | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_most_common.py b/tests/test_most_common.py index d8a2a07..fd374ee 100644 --- a/tests/test_most_common.py +++ b/tests/test_most_common.py @@ -51,6 +51,19 @@ def dummy_target_grid(): return create_regridding_dataset(new_grid) +@pytest.fixture +def oversized_dummy_target_grid(): + new_grid = Grid( + north=48, + east=48, + south=-8, + west=-8, + resolution_lat=8, + resolution_lon=8, + ) + return create_regridding_dataset(new_grid) + + def test_most_common(dummy_lc_data, dummy_target_grid): expected_data = np.array( [ @@ -81,6 +94,38 @@ def test_most_common(dummy_lc_data, dummy_target_grid): ) +def test_oversized_most_common(dummy_lc_data, oversized_dummy_target_grid): + expected_data = np.array( + [ + [np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN], + [np.NaN, 2, 2, 0, 0, 0, 0, np.NaN], + [np.NaN, 0, 0, 0, 0, 0, 0, np.NaN], + [np.NaN, 0, 0, 0, 0, 0, 0, np.NaN], + [np.NaN, 0, 0, 0, 0, 0, 0, np.NaN], + [np.NaN, 0, 0, 0, 0, 0, 0, np.NaN], + [np.NaN, 3, 3, 0, 0, 0, 1, np.NaN], + [np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN], + ] + ) + + lat_coords = np.linspace(-8, 48, num=8) + lon_coords = np.linspace(-8, 48, num=8) + + expected = xr.Dataset( + data_vars={ + "lc": (["longitude", "latitude"], expected_data), + }, + coords={ + "longitude": (["longitude"], lon_coords), + "latitude": (["latitude"], lat_coords), + }, + ) + xr.testing.assert_equal( + dummy_lc_data.regrid.most_common(oversized_dummy_target_grid)["lc"], + expected["lc"], + ) + + def test_attrs_dataarray(dummy_lc_data, dummy_target_grid): dummy_lc_data["lc"].attrs = {"test": "testing"} da_regrid = dummy_lc_data["lc"].regrid.most_common(dummy_target_grid) From d97d79724f3356a3e954d09b14a134ee6180d5a0 Mon Sep 17 00:00:00 2001 From: Keith Doore Date: Wed, 28 Feb 2024 10:27:56 -0600 Subject: [PATCH 5/5] Updated CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 392c589..506213e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/). Fixed: - Ensured all attributes are kept upon regridding (dataset, variable and coordinate attrs). + - Regridding to larger grid now result in NaNs at locations outside of starting data grid. Changed: - Moved to the Ruff formatter, instead of black.