Skip to content
This repository has been archived by the owner on Aug 29, 2023. It is now read-only.

Jg 541 subset spatial update #563

Merged
merged 31 commits into from
Mar 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
212e778
Subset long_polygons
Mar 8, 2018
64b1308
Fix animation
Mar 8, 2018
178114f
Fix subset tests
Mar 8, 2018
2486d9d
ds uses new spatial_subset_impl
Mar 8, 2018
541e1ee
Don't create dis-joint longitude
Mar 8, 2018
ae84985
Adjust attributes, fix tests
Mar 8, 2018
add09d6
Notebook to test #508
Mar 9, 2018
96e2dfc
Subset spatial optimizations
Mar 9, 2018
1021215
Fix tests
Mar 9, 2018
ae384ad
Use matplotlib bulk ray tracing algorithm
Mar 9, 2018
0d0e132
Merge branch 'master' into jg-541-subset-spatial-update
Mar 14, 2018
548470a
Fix merging issues
Mar 14, 2018
3766b91
More robust bounds handling
Mar 14, 2018
1654754
Add monitor to subset_spatial
Mar 14, 2018
740944a
Plot meshgrid, include crossed pixels
Mar 14, 2018
9f01369
Update subset tests
Mar 14, 2018
ec8f6ff
Select all crossed pixels when masking a polygon
Mar 14, 2018
05c3638
Add parameter for pixel or contour plot
Mar 15, 2018
9f9efdf
Default pixel mesh for animations
Mar 15, 2018
922786b
Handle selecting single pixels and 1D lines
Mar 15, 2018
ba13167
Fix more edge cases
Mar 15, 2018
95a34a3
Fix anomaly test
Mar 15, 2018
5ade4c2
Tests, clean-up
Mar 15, 2018
1863c86
Notebok showcasing subset_spatial
Mar 15, 2018
0b5d621
Fix tests, add allow_point parameter
Mar 15, 2018
c20d755
Merge branch 'master' into jg-541-subset-spatial-update
Mar 15, 2018
945a4d2
Fix ds implementations
Mar 15, 2018
17e74c6
Fix messed up automatic merge
Mar 16, 2018
b92fc86
Flake8 fixes
Mar 16, 2018
b7388c0
Update CHANGES.md
Mar 16, 2018
2aff50d
Merge branch 'master' into jg-541-subset-spatial-update
JanisGailis Mar 16, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
## Version 2.0.0.dev5 (in dev)
## Version 2.0.0.dev5 (in dev)

* Select long rectangles with ``subset_spatial()``
[#541](https://github.com/CCI-Tools/cate/issues/541)
* Improve performance of ``subset_spatial()``, especially when masking complex polygons
[#508](https://github.com/CCI-Tools/cate/issues/508)
* Select all pixels that are crossed by the given polygon in ``subset_spatial()``
[#560](https://github.com/CCI-Tools/cate/issues/560)
* Enable ``subset_spatial()`` to work with all valid polygons, including sub-pixel ones.
[#507](https://github.com/CCI-Tools/cate/issues/507)
* By default ``plot_map()`` and ``animate_map()`` now produce colormesh (pixel) plots.
[#559](https://github.com/CCI-Tools/cate/issues/507)

## Version 2.0.0.dev4

Expand Down
240 changes: 190 additions & 50 deletions cate/core/opimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@
import numpy as np
import xarray as xr
from jdcal import jd2gcal
from shapely.geometry import Point, box, LineString, Polygon
from shapely.geometry import box, LineString, Polygon
from matplotlib import path

from .types import PolygonLike
from ..util.misc import to_list
from ..util.monitor import Monitor


def normalize_impl(ds: xr.Dataset) -> xr.Dataset:
Expand Down Expand Up @@ -153,7 +158,7 @@ def _normalize_jd2datetime(ds: xr.Dataset) -> xr.Dataset:
return ds


def adjust_spatial_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
def adjust_spatial_attrs_impl(ds: xr.Dataset, allow_point: bool) -> xr.Dataset:
"""
Adjust the global spatial attributes of the dataset by doing some
introspection of the dataset and adjusting the appropriate attributes
Expand All @@ -171,7 +176,7 @@ def adjust_spatial_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
ds = ds.copy()

for dim in ('lon', 'lat'):
geo_spatial_attrs = _get_geo_spatial_attrs(ds, dim)
geo_spatial_attrs = _get_geo_spatial_attrs(ds, dim, allow_point=allow_point)

for key in geo_spatial_attrs:
if geo_spatial_attrs[key] is not None:
Expand Down Expand Up @@ -292,12 +297,32 @@ def _get_geo_spatial_attrs(ds: xr.Dataset, dim_name: str, allow_point: bool = Fa
# If 'bounds' attribute is missing, the bounds coordinate variable may be named "<dim>_bnds"
bnds_name = '%s_bnds' % dim_name

if bnds_name in ds:
if 'units' in coord_var.attrs:
dim_units = coord_var.attrs['units']
else:
dim_units = None

try:
bnds_var = ds[bnds_name]
dim_res = abs(bnds_var.values[0][1] - bnds_var.values[0][0])
dim_min = min(bnds_var.values[0][0], bnds_var.values[-1][1])
dim_max = max(bnds_var.values[0][0], bnds_var.values[-1][1])
elif len(coord_var.values) >= 2:

res_name = 'geospatial_{}_resolution'.format(dim_name)
min_name = 'geospatial_{}_min'.format(dim_name)
max_name = 'geospatial_{}_max'.format(dim_name)
units_name = 'geospatial_{}_units'.format(dim_name)
ret[res_name] = dim_res
ret[min_name] = dim_min
ret[max_name] = dim_max
ret[units_name] = dim_units
return ret

except (ValueError, KeyError):
# Can't determine values from a bounds variable, carry on
pass

if len(coord_var.values) >= 2:
dim_res = abs(coord_var.values[1] - coord_var.values[0])
dim_min = min(coord_var.values[0], coord_var.values[-1]) - 0.5 * dim_res
dim_max = max(coord_var.values[0], coord_var.values[-1]) + 0.5 * dim_res
Expand All @@ -309,11 +334,6 @@ def _get_geo_spatial_attrs(ds: xr.Dataset, dim_name: str, allow_point: bool = Fa
else:
raise ValueError('Cannot determine spatial extent for dimension "{}"'.format(dim_name))

if 'units' in coord_var.attrs:
dim_units = coord_var.attrs['units']
else:
dim_units = None

res_name = 'geospatial_{}_resolution'.format(dim_name)
min_name = 'geospatial_{}_min'.format(dim_name)
max_name = 'geospatial_{}_max'.format(dim_name)
Expand Down Expand Up @@ -395,9 +415,70 @@ def _lat_inverted(lat: xr.DataArray) -> bool:
return False


def get_extents(region: PolygonLike.TYPE):
"""
Get extents of a PolygonLike, handles explicitly given
coordinates.

:param region: PolygonLike to introspect
:return: ([lon_min, lat_min, lon_max, lat_max], boolean_explicit_coords)
"""
explicit_coords = False
try:
maybe_rectangle = to_list(region, dtype=float)
if maybe_rectangle is not None:
if len(maybe_rectangle) == 4:
lon_min, lat_min, lon_max, lat_max = maybe_rectangle
explicit_coords = True
except BaseException:
# The polygon must be convertible, but it's complex
polygon = PolygonLike.convert(region)
if not polygon.is_valid:
# Heal polygon, see #506 and Shapely User Manual
region = polygon.buffer(0)
else:
region = polygon
# Get the bounding box
lon_min, lat_min, lon_max, lat_max = region.bounds

return([lon_min, lat_min, lon_max, lat_max], explicit_coords)


def _pad_extents(ds: xr.Dataset, extents: tuple):
"""
Pad extents by half a pixel in both directions, to make sure subsetting
slices include all pixels crossed by the bounding box. Set extremes
to maximum valid geospatial extents.
"""
try:
lon_pixel = abs(ds.lon.values[1] - ds.lon.values[0])
lon_min = extents[0] - lon_pixel / 2
lon_max = extents[2] + lon_pixel / 2
except IndexError:
# 1D dimension, leave extents as they were
lon_min = extents[0]
lon_max = extents[2]

try:
lat_pixel = abs(ds.lat.values[1] - ds.lat.values[0])
lat_min = extents[1] - lat_pixel / 2
lat_max = extents[3] + lat_pixel / 2
except IndexError:
lat_min = extents[1]
lat_max = extents[3]

lon_min = -180 if lon_min < -180 else lon_min
lat_min = -90 if lat_min < -90 else lat_min
lon_max = 180 if lon_max > 180 else lon_max
lat_max = 90 if lat_max > 90 else lat_max

return (lon_min, lat_min, lon_max, lat_max)


def subset_spatial_impl(ds: xr.Dataset,
region: Polygon,
mask: bool = True) -> xr.Dataset:
region: PolygonLike.TYPE,
mask: bool = True,
monitor: Monitor = Monitor.NONE) -> xr.Dataset:
"""
Do a spatial subset of the dataset

Expand All @@ -407,45 +488,51 @@ def subset_spatial_impl(ds: xr.Dataset,
not the polygon itself be masked with NaN.
:return: Subset dataset
"""
monitor.start('Subset', 10)
# Validate input
try:
polygon = PolygonLike.convert(region)
except BaseException as e:
raise e

if not region.is_valid:
# Heal polygon, see #506 and Shapely User Manual
region = region.buffer(0)
extents, explicit_coords = get_extents(region)

# Get the bounding box
lon_min, lat_min, lon_max, lat_max = region.bounds
lon_min, lat_min, lon_max, lat_max = extents

# Validate the bounding box
if (not (-90 <= lat_min <= 90)) or \
(not (-90 <= lat_max <= 90)) or \
(not (-180 <= lon_min <= 180)) or \
(not (-180 <= lon_max <= 180)):
raise ValueError('Provided polygon extent outside of geospatial'
raise ValueError('Provided polygon extends outside of geospatial'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use "extent", see http://www.dictionary.com/browse/extent

Copy link
Member Author

@JanisGailis JanisGailis Mar 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah... I don't agree. :)
The depends if you want a statement, or a description. But even if it's a statement, just 'extent' would be wrong-ish. So it's either:
'The provided polygon extent is outside of bounds'

Or with a verb (http://www.dictionary.com/browse/extend)
'The provided polygon extends outside of bounds'

' bounds: latitude [-90;90], longitude [-180;180]')

simple_polygon = False
if region.equals(box(lon_min, lat_min, lon_max, lat_max)):
# Don't do the computationally intensive masking if the provided
# region is a simple box-polygon, for which there will be nothing to
# mask.
simple_polygon = True
# Don't do the computationally intensive masking if the provided
# region is a simple box-polygon, for which there will be nothing to
# mask.
simple_polygon = polygon.equals(box(lon_min, lat_min, lon_max, lat_max))

# Pad extents to include crossed pixels
lon_min, lat_min, lon_max, lat_max = _pad_extents(ds, extents)

crosses_antimeridian = _crosses_antimeridian(region)
crosses_antimeridian = (lon_min > lon_max) if explicit_coords else _crosses_antimeridian(polygon)
lat_inverted = _lat_inverted(ds.lat)
if lat_inverted:
lat_index = slice(lat_max, lat_min)
else:
lat_index = slice(lat_min, lat_max)

if crosses_antimeridian and not simple_polygon:
if crosses_antimeridian and not simple_polygon and mask:
# Unlikely but plausible
raise NotImplementedError('Spatial subsets crossing the anti-meridian'
' are currently implemented for simple,'
' rectangular polygons only.')

' rectangular polygons only, or complex polygons'
' without masking.')
monitor.progress(1)
if crosses_antimeridian:
# Shapely messes up longitudes if the polygon crosses the antimeridian
lon_min, lon_max = lon_max, lon_min
if not explicit_coords:
lon_min, lon_max = lon_max, lon_min

# Can't perform a simple selection with slice, hence we have to
# construct an appropriate longitude indexer for selection
Expand All @@ -457,33 +544,86 @@ def subset_spatial_impl(ds: xr.Dataset,
indexers = {'lon': lon_index, 'lat': lat_index}
retset = ds.sel(**indexers)

if mask:
# Preserve the original longitude dimension, masking elements that
# do not belong to the polygon with NaN.
# Preserve the original longitude dimension, masking elements that
# do not belong to the polygon with NaN.
with monitor.observing(8):
return retset.reindex_like(ds.lon)
else:
# Return the dataset with no NaNs and with a disjoint longitude
# dimension
return retset

if not mask or simple_polygon:
# The polygon doesn't cross the IDL, it is a simple box -> Use a simple slice
lon_slice = slice(lon_min, lon_max)
indexers = {'lat': lat_index, 'lon': lon_slice}
return ds.sel(**indexers)
lon_slice = slice(lon_min, lon_max)
indexers = {'lat': lat_index, 'lon': lon_slice}
retset = ds.sel(**indexers)

if len(retset.lat) == 0 or len(retset.lon) == 0:
# Empty return dataset => region out of dataset bounds
raise ValueError("Can not select a region outside dataset boundaries.")

if not mask or simple_polygon or explicit_coords:
# The polygon doesn't cross the anti-meridian, it is a simple box -> Use a simple slice
with monitor.observing(8):
return retset

# Create the mask array. The result of this is a lon/lat DataArray where
# all values falling in the region or on its boundary are denoted with True
# and all the rest with False
lonm, latm = np.meshgrid(ds.lon.values, ds.lat.values)
mask = np.array([Point(lon, lat).intersects(region) for lon, lat in
zip(lonm.ravel(), latm.ravel())], dtype=bool)
mask = xr.DataArray(mask.reshape(lonm.shape),
coords={'lon': ds.lon, 'lat': ds.lat},
# all pixels falling in the region or on its boundary are denoted with True
# and all the rest with False. Works on polygon exterior

polypath = path.Path(np.column_stack([polygon.exterior.coords.xy[0],
polygon.exterior.coords.xy[1]]))

# Handle also a single pixel and 1D edge cases
if len(retset.lat) == 1 or len(retset.lon) == 1:
# Create a mask directly on pixel centers
lonm, latm = np.meshgrid(retset.lon.values, retset.lat.values)
grid_points = [(lon, lat) for lon, lat in zip(lonm.ravel(), latm.ravel())]
mask = polypath.contains_points(grid_points)
mask = mask.reshape(lonm.shape)
mask = xr.DataArray(mask,
coords={'lon': retset.lon.values, 'lat': retset.lat.values},
dims=['lat', 'lon'])

with monitor.observing(5):
# Apply the mask to data
retset = retset.where(mask, drop=True)
return retset

# The normal case
# Create a grid of pixel vertices
lon_pixel = abs(ds.lon.values[1] - ds.lon.values[0])
lat_pixel = abs(ds.lat.values[1] - ds.lat.values[0])

lon_min = retset.lon.values[0] - lon_pixel / 2
lon_max = retset.lon.values[-1] + lon_pixel / 2
lat_min = retset.lat.values[0] - lat_pixel / 2
lat_max = retset.lat.values[-1] + lat_pixel / 2

lat_grid = np.linspace(lat_min, lat_max, len(retset.lat.values) + 1)
lon_grid = np.linspace(lon_min, lon_max, len(retset.lon.values) + 1)

lonm, latm = np.meshgrid(lon_grid, lat_grid)

# Mark all grid points falling within the polygon as True

monitor.progress(1)
grid_points = [(lon, lat) for lon, lat in zip(lonm.ravel(), latm.ravel())]

monitor.progress(1)
mask = polypath.contains_points(grid_points)

monitor.progress(1)

mask = mask.reshape(lonm.shape)
# Vectorized 'rolling window' numpy magic to go from pixel vertices to pixel centers
mask = mask[1:, 1:] + mask[1:, :-1] + mask[:-1, 1:] + mask[:-1, :-1]

mask = xr.DataArray(mask,
coords={'lon': retset.lon.values, 'lat': retset.lat.values},
dims=['lat', 'lon'])

# Mask values outside the polygon with NaN, crop the dataset
return ds.where(mask, drop=True)
with monitor.observing(5):
# Apply the mask to data
retset = retset.where(mask, drop=True)

monitor.done()
return retset


def _crosses_antimeridian(region: Polygon) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion cate/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def some_op(file: PathLike.TYPE) -> bool:
import xarray
from shapely.errors import ShapelyError

from .opimpl import adjust_temporal_attrs_impl
from ..util.misc import to_list, to_datetime_range, to_datetime
from ..util.safe import safe_eval

Expand Down Expand Up @@ -630,6 +629,8 @@ class DatasetLike(Like[xarray.Dataset]):
@classmethod
def convert(cls, value: Any) -> Optional[xarray.Dataset]:
# Can be optional
from cate.core.opimpl import adjust_temporal_attrs_impl

if value is None:
return None
if isinstance(value, xarray.Dataset):
Expand Down
Loading