From a2fe22945785ffb4e62c16c042ae22deb506a693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20H=C3=BCgel?= Date: Thu, 31 Oct 2024 14:24:28 +0000 Subject: [PATCH] Add UnaryUnion trait and impl This makes the unary unary union algorithm available to anything that can produce an iterator of Polygons (MultiPolygon, containers of Polygon) and enables on-demand multi-threaded unary unions with a wrapper. --- geo-types/CHANGES.md | 3 + geo-types/Cargo.toml | 2 +- geo/CHANGES.md | 4 + geo/Cargo.toml | 5 +- geo/src/algorithm/bool_ops/mod.rs | 180 ++++++++++++++++++++++++++++ geo/src/algorithm/bool_ops/tests.rs | 21 +++- geo/src/algorithm/mod.rs | 2 +- geo/src/lib.rs | 55 ++++----- 8 files changed, 239 insertions(+), 33 deletions(-) diff --git a/geo-types/CHANGES.md b/geo-types/CHANGES.md index 3937d89e2..a4e0cff6e 100644 --- a/geo-types/CHANGES.md +++ b/geo-types/CHANGES.md @@ -2,6 +2,9 @@ ## Unreleased +- Bumps rstar minimum version from 0.12.0 to 0.12.2 + - + ## 0.7.14 - POSSIBLY BREAKING: Minimum supported version of Rust (MSRV) is now 1.75 diff --git a/geo-types/Cargo.toml b/geo-types/Cargo.toml index 35890c329..af4f517e0 100644 --- a/geo-types/Cargo.toml +++ b/geo-types/Cargo.toml @@ -34,7 +34,7 @@ rstar_0_8 = { package = "rstar", version = "0.8", optional = true } rstar_0_9 = { package = "rstar", version = "0.9", optional = true } rstar_0_10 = { package = "rstar", version = "0.10", optional = true } rstar_0_11 = { package = "rstar", version = "0.11", optional = true } -rstar_0_12 = { package = "rstar", version = "0.12", optional = true } +rstar_0_12 = { package = "rstar", version = "0.12.2", optional = true } serde = { version = "1", optional = true, default-features = false, features = ["alloc", "derive"] } [dev-dependencies] diff --git a/geo/CHANGES.md b/geo/CHANGES.md index 4d776d9bb..8f6b06074 100644 --- a/geo/CHANGES.md +++ b/geo/CHANGES.md @@ -2,6 +2,10 @@ ## Unreleased +- Add Unary Union algorithm for fast union ops on adjacent / overlapping geometries + - + - Adds an optional dependency on Rayon (previously depended on by i_overlay) + - Bumps minimum rstar version to 0.12.2 - Fix crash in `BoolOps` by updating `i_overlay` to 1.9.0. - diff --git a/geo/Cargo.toml b/geo/Cargo.toml index 1c40d91ff..5941cc308 100644 --- a/geo/Cargo.toml +++ b/geo/Cargo.toml @@ -17,9 +17,10 @@ default = ["earcutr", "spade", "multithreading"] use-proj = ["proj"] proj-network = ["use-proj", "proj/network"] use-serde = ["serde", "geo-types/serde"] -multithreading = ["i_overlay/allow_multithreading", "geo-types/multithreading"] +multithreading = ["i_overlay/allow_multithreading", "geo-types/multithreading", "dep:rayon"] [dependencies] +rayon = { version = "1.10.0", optional = true } earcutr = { version = "0.4.2", optional = true } spade = { version = "2.10.0", optional = true } float_next_after = "1.0.0" @@ -29,7 +30,7 @@ log = "0.4.11" num-traits = "0.2" proj = { version = "0.27.0", optional = true } robust = "1.1.0" -rstar = "0.12.0" +rstar = "0.12.2" serde = { version = "1.0", optional = true, features = ["derive"] } i_overlay = { version = "1.9.0, < 1.10.0", default-features = false } diff --git a/geo/src/algorithm/bool_ops/mod.rs b/geo/src/algorithm/bool_ops/mod.rs index e0debd80a..25bbab6e2 100644 --- a/geo/src/algorithm/bool_ops/mod.rs +++ b/geo/src/algorithm/bool_ops/mod.rs @@ -10,9 +10,15 @@ use i_overlay::core::fill_rule::FillRule; use i_overlay::float::clip::FloatClip; use i_overlay::float::single::SingleFloatOverlay; use i_overlay::string::clip::ClipRule; +#[cfg(feature = "multithreading")] +use rayon::prelude::*; + pub use i_overlay_integration::BoolOpsNum; use crate::geometry::{LineString, MultiLineString, MultiPolygon, Polygon}; +use rstar::{ + primitives::CachedEnvelope, primitives::ObjectRef, ParentNode, RTree, RTreeNode, RTreeObject, +}; /// Boolean Operations on geometry. /// @@ -34,6 +40,12 @@ use crate::geometry::{LineString, MultiLineString, MultiPolygon, Polygon}; /// In particular, taking `union` with an empty geom should remove degeneracies /// and fix invalid polygons as long the interior-exterior requirement above is /// satisfied. +/// +/// # Performance +/// +/// For union operations on a collection of overlapping and / or adjacent [`Polygon`]s +/// (e.g. contained in a `Vec` or a [`MultiPolygon`]), using [`UnaryUnion`] will +/// yield far better performance. pub trait BooleanOps { type Scalar: BoolOpsNum; @@ -109,6 +121,106 @@ pub enum OpType { Xor, } +// Recursive algorithms can benefit from grouping those parameters which are constant over +// the whole algorithm to reduce the overhead of the recursive calls, in this case the single- +// and multi-threaded unary union tree traversals +#[derive(Debug)] +struct Ops { + init: I, + fold: F, + reduce: R, +} + +/// Efficient [BooleanOps::union] of adjacent / overlapping geometries +/// +/// For geometries with a high degree of overlap or adjacency +/// (for instance, merging a large contiguous area made up of many adjacent polygons) +/// this method will be orders of magnitude faster than a manual iteration and union approach. +pub trait UnaryUnion { + type Scalar: BoolOpsNum; + + /// Construct a tree of all the input geometries and progressively union them from the "bottom up" + /// + /// This is considerably more efficient than using e.g. `fold()` over an iterator of Polygons. + /// # Examples + /// + /// ``` + /// use geo::{BooleanOps, UnaryUnion}; + /// use geo::{MultiPolygon, polygon}; + /// let poly1 = polygon![ + /// (x: 0.0, y: 0.0), + /// (x: 4.0, y: 0.0), + /// (x: 4.0, y: 4.0), + /// (x: 0.0, y: 4.0), + /// (x: 0.0, y: 0.0), + /// ]; + /// let poly2 = polygon![ + /// (x: 4.0, y: 0.0), + /// (x: 8.0, y: 0.0), + /// (x: 8.0, y: 4.0), + /// (x: 4.0, y: 4.0), + /// (x: 4.0, y: 0.0), + /// ]; + /// let merged = &poly1.union(&poly2); + /// let mp = MultiPolygon(vec![poly1, poly2]); + /// // A larger single rectangle + /// let combined = mp.unary_union(); + /// assert_eq!(&combined, merged); + /// ``` + fn unary_union(self) -> MultiPolygon; +} + +// This function carries out a full post-order traversal of the tree, building up MultiPolygons from inside to outside. +// Though the operation is carried out via fold() over the tree iterator, there are two actual nested operations: +// "fold" operations on leaf nodes build up output MultiPolygons by adding Polygons to them via union and +// "reduce" operations on parent nodes combine these output MultiPolygons from leaf operations by recursion +fn bottom_up_fold_reduce(ops: &Ops, parent: &ParentNode) -> S +where + // This operation only requires two types: output (S) and input (T) + T: RTreeObject, + // Because this is a fold operation, we need to initialise a "container" to which we'll be adding using union. + // The output of a union op is a MultiPolygon. + I: Fn() -> S, + // The meat of the fold op is unioning input borrowed leaf Polygons into an output MultiPolygon. + F: Fn(S, &T) -> S, + // Parent nodes require us to process their child nodes to produce a MultiPolygon. We do this recursively. + // This is a reduce op so there's no need for an init value and the two inputs must have the same type: MultiPolygon + R: Fn(S, S) -> S, +{ + parent + .children() + .iter() + .fold((ops.init)(), |accum, child| match child { + RTreeNode::Leaf(value) => (ops.fold)(accum, value), + RTreeNode::Parent(parent) => { + let value = bottom_up_fold_reduce(ops, parent); + (ops.reduce)(accum, value) + } + }) +} + +fn par_bottom_up_fold_reduce(ops: &Ops, parent: &ParentNode) -> S +where + T: RTreeObject, + RTreeNode: Send + Sync, + S: Send, + I: Fn() -> S + Send + Sync, + F: Fn(S, &T) -> S + Send + Sync, + R: Fn(S, S) -> S + Send + Sync, +{ + parent + .children() + .into_par_iter() + .fold(&ops.init, |accum, child| match child { + RTreeNode::Leaf(value) => (ops.fold)(accum, value), + RTreeNode::Parent(parent) => { + let value = par_bottom_up_fold_reduce(ops, parent); + (ops.reduce)(accum, value) + } + }) + .reduce(&ops.init, &ops.reduce) +} + impl BooleanOps for Polygon { type Scalar = T; @@ -124,3 +236,71 @@ impl BooleanOps for MultiPolygon { self.iter().flat_map(BooleanOps::rings) } } + +impl<'a, T, Boppable, BoppableCollection> UnaryUnion for &'a BoppableCollection +where + T: BoolOpsNum, + Boppable: BooleanOps + RTreeObject + 'a, + &'a BoppableCollection: IntoIterator, +{ + type Scalar = T; + + fn unary_union(self) -> MultiPolygon { + // these three functions drive the union operation + let init = || MultiPolygon::::new(vec![]); + let fold = |mut accum: MultiPolygon, + poly: &CachedEnvelope>| + -> MultiPolygon { + accum = accum.union(&***poly); + accum + }; + let reduce = |accum1: MultiPolygon, accum2: MultiPolygon| -> MultiPolygon { + accum1.union(&accum2) + }; + let rtree = RTree::bulk_load( + self.into_iter() + .map(|p| CachedEnvelope::new(ObjectRef::new(p))) + .collect(), + ); + let ops = Ops { init, fold, reduce }; + bottom_up_fold_reduce(&ops, rtree.root()) + } +} + +#[cfg(feature = "multithreading")] +/// Wrapper type which signals to algorithms operating on `T` that utilizing parallelism might be viable. +pub struct AllowMultithreading(pub T); + +#[cfg(feature = "multithreading")] +impl<'a, T, Boppable, BoppableCollection> UnaryUnion for AllowMultithreading<&'a BoppableCollection> +where + T: BoolOpsNum + Send, + Boppable: BooleanOps + RTreeObject + 'a + Send + Sync, + ::Envelope: Send + Sync, + &'a BoppableCollection: IntoParallelIterator, +{ + type Scalar = T; + + fn unary_union(self) -> MultiPolygon { + // these three functions drive the union operation + let init = || MultiPolygon::::new(vec![]); + let fold = |mut accum: MultiPolygon, + poly: &CachedEnvelope>| + -> MultiPolygon { + accum = accum.union(&***poly); + accum + }; + let reduce = |accum1: MultiPolygon, accum2: MultiPolygon| -> MultiPolygon { + accum1.union(&accum2) + }; + let rtree = RTree::bulk_load( + self.0 + .into_par_iter() + .map(|p| CachedEnvelope::new(ObjectRef::new(p))) + .collect(), + ); + + let ops = Ops { init, fold, reduce }; + par_bottom_up_fold_reduce(&ops, rtree.root()) + } +} diff --git a/geo/src/algorithm/bool_ops/tests.rs b/geo/src/algorithm/bool_ops/tests.rs index ffc4c059a..2951ad710 100644 --- a/geo/src/algorithm/bool_ops/tests.rs +++ b/geo/src/algorithm/bool_ops/tests.rs @@ -1,7 +1,24 @@ -use super::BooleanOps; -use crate::{wkt, Convert, MultiPolygon, Relate}; +use super::{BooleanOps, UnaryUnion}; +use crate::{wkt, Convert, MultiPolygon, Polygon, Relate}; use wkt::ToWkt; +#[test] +fn test_unary_union() { + let poly1: Polygon = wkt!(POLYGON((204.0 287.0,203.69670020700084 288.2213844497616,200.38308697914755 288.338793163584,204.0 287.0))); + let poly2: Polygon = wkt!(POLYGON((210.0 290.0,204.07584923592933 288.2701221108328,212.24082541367974 285.47846008552216,210.0 290.0))); + let poly3: Polygon = wkt!(POLYGON((211.0 292.0,202.07584923592933 288.2701221108328,212.24082541367974 285.47846008552216,210.0 290.0))); + + let polys = vec![poly1.clone(), poly2.clone(), poly3.clone()]; + let poly_union = polys.unary_union(); + assert_eq!(poly_union.0.len(), 1); + + let multi_poly_12 = MultiPolygon::new(vec![poly1.clone(), poly2.clone()]); + let multi_poly_3 = MultiPolygon::new(vec![poly3]); + let multi_polys = vec![multi_poly_12.clone(), multi_poly_3.clone()]; + let multi_poly_union = multi_polys.unary_union(); + assert_eq!(multi_poly_union.0.len(), 1); +} + #[test] fn jts_test_overlay_la_1() { // From TestOverlayLA.xml test case with description "mLmA - A and B complex, overlapping and touching #1" diff --git a/geo/src/algorithm/mod.rs b/geo/src/algorithm/mod.rs index 6f92320cf..7c1bf0d10 100644 --- a/geo/src/algorithm/mod.rs +++ b/geo/src/algorithm/mod.rs @@ -8,7 +8,7 @@ pub use area::Area; /// Boolean Ops such as union, xor, difference. pub mod bool_ops; -pub use bool_ops::{BooleanOps, OpType}; +pub use bool_ops::{BooleanOps, OpType, UnaryUnion}; /// Calculate the bounding rectangle of a `Geometry`. pub mod bounding_rect; diff --git a/geo/src/lib.rs b/geo/src/lib.rs index 0d1cde38f..c28ebe2a5 100644 --- a/geo/src/lib.rs +++ b/geo/src/lib.rs @@ -4,7 +4,7 @@ //! //! # Types //! -//! - **[`Coord`]**: A two-dimensional coordinate. All geometry types are composed of [`Coord`]s, though [`Coord`] itself is not a [`Geometry`] type. +//! - **[`Coord`]**: A two-dimensional coordinate. All geometry types are composed of [`Coord`]s, though [`Coord`] itself is not a [`Geometry`] type //! - **[`Point`]**: A single point represented by one [`Coord`] //! - **[`MultiPoint`]**: A collection of [`Point`]s //! - **[`Line`]**: A line segment represented by two [`Coord`]s @@ -67,7 +67,8 @@ //! //! ## Boolean Operations //! -//! - **[`BooleanOps`]**: combine or split (Multi)Polygons using intersecton, union, xor, or difference operations. +//! - **[`BooleanOps`]**: Combine or split (Multi)Polygons using intersecton, union, xor, or difference operations +//! - **[`UnaryUnion`]**: Efficient union of a collection of adjacent or overlapping [`Polygon`] or [`MultiPolygon`]s //! //! ## Outlier Detection //! @@ -86,7 +87,7 @@ //! - **[`ClosestPoint`]**: Find the point on a geometry //! closest to a given point //! - **[`HaversineClosestPoint`]**: Find the point on a geometry -//! closest to a given point on a sphere using spherical coordinates and lines being great arcs. +//! closest to a given point on a sphere using spherical coordinates and lines being great arcs //! - **[`IsConvex`]**: Calculate the convexity of a //! [`LineString`] //! - **[`LineInterpolatePoint`]**: @@ -105,14 +106,14 @@ //! - **[`Intersects`]**: Calculate if a geometry intersects //! another geometry //! - **[`line_intersection`]**: Calculates the -//! intersection, if any, between two lines. +//! intersection, if any, between two lines //! - **[`Relate`]**: Topologically relate two geometries based on -//! [DE-9IM](https://en.wikipedia.org/wiki/DE-9IM) semantics. -//! - **[`Within`]**: Calculate if a geometry lies completely within another geometry. +//! [DE-9IM](https://en.wikipedia.org/wiki/DE-9IM) semantics +//! - **[`Within`]**: Calculate if a geometry lies completely within another geometry //! //! ## Triangulation //! -//! - **[`TriangulateEarcut`](triangulate_earcut)**: Triangulate polygons using the earcut algorithm. Requires the `"earcutr"` feature, which is enabled by default. +//! - **[`TriangulateEarcut`](triangulate_earcut)**: Triangulate polygons using the earcut algorithm. Requires the `"earcutr"` feature, which is enabled by default //! //! ## Winding //! @@ -151,24 +152,24 @@ //! //! ## Conversion //! -//! - **[`Convert`]**: Convert (infalliby) the type of a geometry’s coordinate value -//! - **[`TryConvert`]**: Convert (falliby) the type of a geometry’s coordinate value -//! - **[`ToDegrees`]**: Radians to degrees coordinate transforms for a given geometry. -//! - **[`ToRadians`]**: Degrees to radians coordinate transforms for a given geometry. +//! - **[`Convert`]**: Convert (infalliby) the numeric type of a geometry’s coordinate value +//! - **[`TryConvert`]**: Convert (falliby) the numeric type of a geometry’s coordinate value +//! - **[`ToDegrees`]**: Radians to degrees coordinate transforms for a given geometry +//! - **[`ToRadians`]**: Degrees to radians coordinate transforms for a given geometry //! //! ## Miscellaneous //! //! - **[`Centroid`]**: Calculate the centroid of a geometry -//! - **[`ChaikinSmoothing`]**: Smoothen `LineString`, `Polygon`, `MultiLineString` and `MultiPolygon` using Chaikin's algorithm. +//! - **[`ChaikinSmoothing`]**: Smoothen `LineString`, `Polygon`, `MultiLineString` and `MultiPolygon` using Chaikin's algorithm //! - **[`proj`]**: Project geometries with the `proj` crate (requires the `use-proj` feature) -//! - **[`LineStringSegmentize`]**: Segment a LineString into `n` segments. -//! - **[`LineStringSegmentizeHaversine`]**: Segment a LineString using Haversine distance. -//! - **[`Transform`]**: Transform a geometry using Proj. -//! - **[`RemoveRepeatedPoints`]**: Remove repeated points from a geometry. +//! - **[`LineStringSegmentize`]**: Segment a LineString into `n` segments +//! - **[`LineStringSegmentizeHaversine`]**: Segment a LineString using Haversine distance +//! - **[`Transform`]**: Transform a geometry using Proj +//! - **[`RemoveRepeatedPoints`]**: Remove repeated points from a geometry //! //! # Spatial Indexing //! -//! `geo` geometries (`Point`, `Line`, `LineString`, `Polygon`) can be used with the [rstar](https://docs.rs/rstar/0.12.0/rstar/struct.RTree.html#usage) +//! `geo` geometries ([`Point`], [`Line`], [`LineString`], [`Polygon`], [`MultiPolygon`]) can be used with the [rstar](https://docs.rs/rstar/0.12.0/rstar/struct.RTree.html#usage) //! R*-tree crate for fast distance and nearest-neighbour queries. Multi- geometries can be added to the tree by iterating over //! their members and adding them. Note in particular the availability of the [`bulk_load`](https://docs.rs/rstar/0.12.0/rstar/struct.RTree.html#method.bulk_load) //! method and [`GeomWithData`](https://docs.rs/rstar/0.12.0/rstar/primitives/struct.GeomWithData.html) struct. @@ -178,22 +179,22 @@ //! The following optional [Cargo features] are available: //! //! - `earcutr`: -//! - Enables the `earcutr` crate, which provides triangulation of polygons using the earcut algorithm. -//! - ☑ Enabled by default. +//! - Enables the `earcutr` crate, which provides triangulation of polygons using the earcut algorithm +//! - ☑ Enabled by default //! - `proj-network`: -//! - Enables [network grid] support for the [`proj` crate]. +//! - Enables [network grid] support for the [`proj` crate] //! - After enabling this feature, [further configuration][proj crate file download] is required to use the network grid. -//! - ☐ Disabled by default. +//! - ☐ Disabled by default //! - `use-proj`: //! - Enables coordinate conversion and transformation of `Point` geometries using the [`proj` crate] -//! - ☐ Disabled by default. +//! - ☐ Disabled by default //! - `use-serde`: -//! - Allows geometry types to be serialized and deserialized with [Serde]. -//! - ☐ Disabled by default. +//! - Allows geometry types to be serialized and deserialized with [Serde] +//! - ☐ Disabled by default //! - `multithreading`: -//! - Enables multithreading support for the `i_overlay` crate (via Rayon), and activates the `multithreading` flag -//! in `geo-types`, enabling multi-threaded iteration over `Multi*` geometries. -//! - ☑ Enabled by default. +//! - Enables multithreading support (via Rayon), and activates the `multithreading` flag +//! in `geo-types`, enabling multi-threaded iteration over `Multi*` geometries +//! - ☑ Enabled by default //! //! # Ecosystem //!