From e98b7ab8972c1e6cfe07d64490d2071065be104b Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 17 Nov 2022 11:43:04 -0800 Subject: [PATCH 1/3] Fix `swap_lon_axis()` breaking when sorting with singleton coords --- xcdat/axis.py | 98 +++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/xcdat/axis.py b/xcdat/axis.py index 70aeef16..29d8265c 100644 --- a/xcdat/axis.py +++ b/xcdat/axis.py @@ -211,6 +211,12 @@ def swap_lon_axis( exists. Afterwards, it sorts longitude and longitude bounds values in ascending order. + Note, based on how datasets are chunked, swapping the longitude dimension + and sorting might produce ``PerformanceWarning: Slicing is producing a + large chunk. To accept the large chunk and silence this warning, set the + option...``. This function uses xarray's arithmetic to swap orientations, + so this warning seems potentially unavoidable. + Parameters ---------- dataset : xr.Dataset @@ -233,31 +239,31 @@ def swap_lon_axis( The Dataset with swapped lon axes orientation. """ ds = dataset.copy() - coord_keys = get_dim_coords(ds, "X").coords.keys() + coords = get_dim_coords(ds, "X").coords + coord_keys = list(coords.keys()) # Attempt to swap the orientation for longitude coordinates. - with xr.set_options(keep_attrs=True): - for key in coord_keys: - new_coord = _swap_lon_axis(ds.coords[key], to) + for key in coord_keys: + new_coord = _swap_lon_axis(ds.coords[key], to) - if ds.coords[key].identical(new_coord): - continue + if ds.coords[key].identical(new_coord): + continue - ds.coords[key] = new_coord + ds.coords[key] = new_coord - try: - bounds = ds.bounds.get_bounds("X") - except KeyError: - bounds = None + try: + bounds = ds.bounds.get_bounds("X") + except KeyError: + bounds = None - if isinstance(bounds, xr.DataArray): - ds = _swap_lon_bounds(ds, str(bounds.name), to) - elif isinstance(bounds, xr.Dataset): - for key in bounds.data_vars.keys(): - ds = _swap_lon_bounds(ds, str(key), to) + if isinstance(bounds, xr.DataArray): + ds = _swap_lon_bounds(ds, str(bounds.name), to) + elif isinstance(bounds, xr.Dataset): + for key in bounds.data_vars.keys(): + ds = _swap_lon_bounds(ds, str(key), to) if sort_ascending: - ds = ds.sortby(list(coord_keys), ascending=True) + ds = ds.sortby(list(coords.dims), ascending=True) return ds @@ -312,7 +318,9 @@ def _get_all_coord_keys( return list(set(keys)) -def _swap_lon_bounds(ds: xr.Dataset, key: str, to: Tuple[float, float]): +def _swap_lon_bounds(dataset: xr.Dataset, key: str, to: Tuple[float, float]): + ds = dataset.copy() + bounds = ds[key].copy() new_bounds = _swap_lon_axis(bounds, to) @@ -350,30 +358,35 @@ def _swap_lon_axis(coords: xr.DataArray, to: Tuple[float, float]) -> xr.DataArra coordinates are already on the specified axis orientation, the same coordinates are returned. """ - if to == (-180, 180): - new_coords = ((coords + 180) % 360) - 180 - elif to == (0, 360): - # Swap the coordinates. - # Example with 180 coords: [-180, -0, 179] -> [0, 180, 360] - # Example with 360 coords: [60, 150, 360] -> [60, 150, 0] - new_coords = coords % 360 - - # Check if the original coordinates contain an element with a value of - # 360. If this element exists, use its index to revert its swapped - # value of 0 (360 % 360 is 0) back to 360. This case usually happens - # if the coordinate are already on the (0, 360) axis orientation. - # Example with 360 coords: [60, 150, 0] -> [60, 150, 360] - index_with_360 = np.where(coords == 360) - - if len(index_with_360) > 0: - _if_multidim_dask_array_then_load(new_coords) - - new_coords[index_with_360] = 360 - else: - raise ValueError( - "Currently, only (-180, 180) and (0, 360) are supported longitude axis " - "orientations." - ) + with xr.set_options(keep_attrs=True): + if to == (-180, 180): + # FIXME: Performance warning produced after swapping and then sorting. + new_coords = ((coords + 180) % 360) - 180 + elif to == (0, 360): + # Swap the coordinates. + # Example with 180 coords: [-180, -0, 179] -> [0, 180, 360] + # Example with 360 coords: [60, 150, 360] -> [60, 150, 0] + # FIXME: Performance warning produced after swapping and then sorting. + new_coords = coords % 360 + + # Check if the original coordinates contain an element with a value of + # 360. If this element exists, use its index to revert its swapped + # value of 0 (360 % 360 is 0) back to 360. This case usually happens + # if the coordinate are already on the (0, 360) axis orientation. + # Example with 360 coords: [60, 150, 0] -> [60, 150, 360] + index_with_360 = np.where(coords == 360) + + if len(index_with_360) > 0: + _if_multidim_dask_array_then_load(new_coords) + + new_coords[index_with_360] = 360 + else: + raise ValueError( + "Currently, only (-180, 180) and (0, 360) are supported longitude axis " + "orientations." + ) + + new_coords.encoding = coords.encoding return new_coords @@ -406,6 +419,7 @@ def _get_prime_meridian_index(lon_bounds: xr.DataArray) -> Optional[np.ndarray]: return None elif p_meridian_index.size > 1: raise ValueError("More than one grid cell spans prime meridian.") + return p_meridian_index From 1dfa259905676bba2c93ae2414a161add4cfa638 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 17 Nov 2022 14:08:02 -0800 Subject: [PATCH 2/3] Update comments --- xcdat/axis.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/xcdat/axis.py b/xcdat/axis.py index 29d8265c..7354d4b1 100644 --- a/xcdat/axis.py +++ b/xcdat/axis.py @@ -318,9 +318,7 @@ def _get_all_coord_keys( return list(set(keys)) -def _swap_lon_bounds(dataset: xr.Dataset, key: str, to: Tuple[float, float]): - ds = dataset.copy() - +def _swap_lon_bounds(ds: xr.Dataset, key: str, to: Tuple[float, float]): bounds = ds[key].copy() new_bounds = _swap_lon_axis(bounds, to) @@ -360,19 +358,21 @@ def _swap_lon_axis(coords: xr.DataArray, to: Tuple[float, float]) -> xr.DataArra """ with xr.set_options(keep_attrs=True): if to == (-180, 180): - # FIXME: Performance warning produced after swapping and then sorting. + # FIXME: Performance warning produced after swapping and then sorting + # based on how datasets are chunked. new_coords = ((coords + 180) % 360) - 180 elif to == (0, 360): - # Swap the coordinates. # Example with 180 coords: [-180, -0, 179] -> [0, 180, 360] # Example with 360 coords: [60, 150, 360] -> [60, 150, 0] - # FIXME: Performance warning produced after swapping and then sorting. + # FIXME: Performance warning produced after swapping and then sorting + # based on how datasets are chunked. new_coords = coords % 360 - # Check if the original coordinates contain an element with a value of - # 360. If this element exists, use its index to revert its swapped - # value of 0 (360 % 360 is 0) back to 360. This case usually happens - # if the coordinate are already on the (0, 360) axis orientation. + # Check if the original coordinates contain an element with a value + # of 360. If this element exists, use its index to revert its + # swapped value of 0 (360 % 360 is 0) back to 360. This case usually + # happens if the coordinate are already on the (0, 360) axis + # orientation. # Example with 360 coords: [60, 150, 0] -> [60, 150, 360] index_with_360 = np.where(coords == 360) From 580f5e92fb16f9a5124e88138e054491c440b458 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 17 Nov 2022 14:13:55 -0800 Subject: [PATCH 3/3] Update xcdat/axis.py --- xcdat/axis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcdat/axis.py b/xcdat/axis.py index 7354d4b1..5b71d5b7 100644 --- a/xcdat/axis.py +++ b/xcdat/axis.py @@ -212,7 +212,7 @@ def swap_lon_axis( ascending order. Note, based on how datasets are chunked, swapping the longitude dimension - and sorting might produce ``PerformanceWarning: Slicing is producing a + and sorting might raise ``PerformanceWarning: Slicing is producing a large chunk. To accept the large chunk and silence this warning, set the option...``. This function uses xarray's arithmetic to swap orientations, so this warning seems potentially unavoidable.