Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Triangulation speed #163

Merged
merged 4 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 36 additions & 15 deletions src/emsarray/conventions/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,23 +409,43 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
return Specificity.LOW

def _make_polygons(self) -> numpy.ndarray:
y_size, x_size = self.topology.shape
lon_bounds = self.topology.longitude_bounds.values
lat_bounds = self.topology.latitude_bounds.values

# Make a bounds array as if this dataset had 2D coordinates.
# 1D bounds are (min, max).
# 2D bounds are (j-i, i-1), (j-1, i+1), (j+1, i+1), (j+1, i-1).
# 1D bounds are (lon, 2) and (lat, 2).
# 2D bounds are (lat, lon, 4)
# where the 4 points are (j-i, i-1), (j-1, i+1), (j+1, i+1), (j+1, i-1).
# The bounds values are repeated as required, are given a new dimension,
# then repeated along that new dimension.
# They will come out as array with shape (lat, lon, 4)
y_size, x_size = self.topology.shape
lon_bounds_2d = numpy.tile(lon_bounds[numpy.newaxis, :, [0, 1, 1, 0]], (y_size, 1, 1))
lat_bounds_2d = numpy.tile(lat_bounds[:, numpy.newaxis, [0, 0, 1, 1]], (1, x_size, 1))
# They will come out as array with shape (y_size, x_size, 4)

lon_bounds_2d = numpy.stack([
lon_bounds[:, 0],
lon_bounds[:, 1],
lon_bounds[:, 1],
lon_bounds[:, 0],
], axis=-1)
lon_bounds_2d = numpy.broadcast_to(numpy.expand_dims(lon_bounds_2d, 0), (y_size, x_size, 4))

lat_bounds_2d = numpy.stack([
lat_bounds[:, 0],
lat_bounds[:, 0],
lat_bounds[:, 1],
lat_bounds[:, 1],
], axis=-1)
lat_bounds_2d = numpy.broadcast_to(numpy.expand_dims(lat_bounds_2d, 0), (x_size, y_size, 4))
lat_bounds_2d = numpy.transpose(lat_bounds_2d, (1, 0, 2))

assert lon_bounds_2d.shape == lat_bounds_2d.shape == (y_size, x_size, 4)

# points is a (topology.size, 4, 2) array of the corners of each cell
points = numpy.stack([lon_bounds_2d, lat_bounds_2d], axis=-1).reshape((-1, 4, 2))

return utils.make_polygons_with_holes(points)
polygons = utils.make_polygons_with_holes(points)

return polygons

@cached_property
def face_centres(self) -> numpy.ndarray:
Expand Down Expand Up @@ -506,6 +526,9 @@ def _get_or_make_bounds(self, coordinate: xarray.DataArray) -> xarray.DataArray:
bound_by_nan = j_bound_by_nan | i_bound_by_nan
coordinate_values[bound_by_nan] = numpy.nan

# grid is a (x+1, y+1) shape array built by averaging the cell centres.
# Cells on the outside have been padded with `nan` neighbours.
#
# numpy.nanmean will return nan for an all-nan column.
# This is the exact behaviour that we want.
# numpy emits a warning that can not be silenced when this happens,
Expand All @@ -519,23 +542,21 @@ def _get_or_make_bounds(self, coordinate: xarray.DataArray) -> xarray.DataArray:
], axis=0)

y_size, x_size = self.shape
bounds = numpy.array([
[
[grid[y, x], grid[y, x + 1], grid[y + 1, x + 1], grid[y + 1, x]]
for x in range(x_size)
]
for y in range(y_size)
])
bounds = numpy.stack([
grid[:-1, :-1], grid[:-1, 1:], grid[1:, 1:], grid[1:, :-1],
], axis=-1)

# Set nan bounds for all cells that have any `nan` in its bounds.
cells_with_nans = numpy.isnan(bounds).any(axis=2)
bounds[cells_with_nans] = numpy.nan

return xarray.DataArray(
data_array = xarray.DataArray(
bounds,
dims=[self.y_dimension, self.x_dimension, 'bounds'],
)

return data_array

@cached_property
def longitude_bounds(self) -> xarray.DataArray:
return self._get_or_make_bounds(self.longitude)
Expand Down
30 changes: 19 additions & 11 deletions src/emsarray/conventions/ugrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pathlib
import warnings
from collections import defaultdict
from collections.abc import Hashable, Iterable, Mapping, Sequence
from collections.abc import Hashable, Iterable, Sequence
from contextlib import suppress
from dataclasses import dataclass
from functools import cached_property
Expand Down Expand Up @@ -514,7 +514,8 @@ def _to_index_array(
else:
# If the value is still an integer then it had no fill value.
# Convert it to a mask array with no masked values.
values = numpy.ma.masked_array(values, mask=numpy.ma.nomask)
mask = numpy.full_like(values, fill_value=numpy.ma.nomask)
values = numpy.ma.masked_array(values, mask=mask)

# UGRID conventions allow for zero based or one based indexing.
# To be consistent we convert all indexes to zero based.
Expand Down Expand Up @@ -1078,24 +1079,31 @@ def grid_kinds(self) -> frozenset[UGridKind]:

def _make_polygons(self) -> numpy.ndarray:
"""Generate list of Polygons"""
# X,Y coords of each node
topology = self.topology
# X,Y coords of each node
node_x = topology.node_x.values
node_y = topology.node_y.values
# The nodes that each face is constructed from
face_node = topology.face_node_array

# Preallocate an array to put the polygons in
polygons = numpy.full(topology.face_count, None, dtype=numpy.object_)

# `shapely.polygons` will make polygons with the same number of vertices.
# UGRID polygons have arbitrary numbers of vertices.
# Group polygons by how many vertices they have, then make them in bulk.
polygons_of_size: Mapping[int, dict[int, numpy.ndarray]] = defaultdict(dict)
for index, row in enumerate(face_node):
vertices = row.compressed()
polygons_of_size[vertices.size][index] = numpy.c_[node_x[vertices], node_y[vertices]]

for size, size_polygons in polygons_of_size.items():
coords = numpy.stack(list(size_polygons.values()))
shapely.polygons(coords, indices=list(size_polygons.keys()), out=polygons)
# Polygon sizes can be derived by how many nodes are masked/not masked.
polygon_sizes = numpy.sum(~numpy.ma.getmaskarray(face_node), axis=1)
unique_sizes = numpy.unique(polygon_sizes)

# Make polygons in batches
for unique_size in unique_sizes:
# Extract the face node data for every polygon of this size
indices = numpy.flatnonzero(polygon_sizes == unique_size)
nodes = numpy.ma.getdata(face_node)[indices, :unique_size]
coords = numpy.stack([node_x[nodes], node_y[nodes]], axis=-1)
# Generate the polygons directly in to their correct locations
shapely.polygons(coords, indices=indices, out=polygons)

return polygons

Expand Down
Loading
Loading