Skip to content

Commit

Permalink
Add UnaryUnion trait and impl
Browse files Browse the repository at this point in the history
This makes the unary unary union algorithm available to anything that can produce an iterator of Polygons (MultiPolygon, containers of Polygon) and enables on-demand multi-threaded unary unions with a wrapper.
  • Loading branch information
urschrei committed Nov 26, 2024
1 parent 23d9eb3 commit b4b3e2b
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 33 deletions.
3 changes: 3 additions & 0 deletions geo-types/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

- Bumps rstar minimum version from 0.12.0 to 0.12.2
- <https://github.com/georust/geo/pull/1246>

## 0.7.14

- POSSIBLY BREAKING: Minimum supported version of Rust (MSRV) is now 1.75
Expand Down
2 changes: 1 addition & 1 deletion geo-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ rstar_0_8 = { package = "rstar", version = "0.8", optional = true }
rstar_0_9 = { package = "rstar", version = "0.9", optional = true }
rstar_0_10 = { package = "rstar", version = "0.10", optional = true }
rstar_0_11 = { package = "rstar", version = "0.11", optional = true }
rstar_0_12 = { package = "rstar", version = "0.12", optional = true }
rstar_0_12 = { package = "rstar", version = "0.12.2", optional = true }
serde = { version = "1", optional = true, default-features = false, features = ["alloc", "derive"] }

[dev-dependencies]
Expand Down
5 changes: 5 additions & 0 deletions geo/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## Unreleased

- Add Unary Union algorithm for fast union ops on adjacent / overlapping geometries
- <https://github.com/georust/geo/pull/1246>
- Adds an optional dependency on Rayon (previously depended on by i_overlay)
- Bumps minimum rstar version to 0.12.2

## 0.29.2 - 2024.11.15

- Pin `i_overlay` to < 1.8.0 to work around [recursion bug](https://github.com/georust/geo/issues/1270).
Expand Down
5 changes: 3 additions & 2 deletions geo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ default = ["earcutr", "spade", "multithreading"]
use-proj = ["proj"]
proj-network = ["use-proj", "proj/network"]
use-serde = ["serde", "geo-types/serde"]
multithreading = ["i_overlay/allow_multithreading", "geo-types/multithreading"]
multithreading = ["i_overlay/allow_multithreading", "geo-types/multithreading", "dep:rayon"]

[dependencies]
rayon = { version = "1.10.0", optional = true }
earcutr = { version = "0.4.2", optional = true }
spade = { version = "2.10.0", optional = true }
float_next_after = "1.0.0"
Expand All @@ -29,7 +30,7 @@ log = "0.4.11"
num-traits = "0.2"
proj = { version = "0.27.0", optional = true }
robust = "1.1.0"
rstar = "0.12.0"
rstar = "0.12.2"
serde = { version = "1.0", optional = true, features = ["derive"] }
i_overlay = { version = "1.7.2, < 1.8.0", default-features = false }

Expand Down
180 changes: 180 additions & 0 deletions geo/src/algorithm/bool_ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@ mod i_overlay_integration;
#[cfg(test)]
mod tests;

#[cfg(feature = "multithreading")]
use rayon::prelude::*;

pub use i_overlay_integration::BoolOpsNum;

use crate::geometry::{LineString, MultiLineString, MultiPolygon, Polygon};
use rstar::{
primitives::CachedEnvelope, primitives::ObjectRef, ParentNode, RTree, RTreeNode, RTreeObject,
};

/// Boolean Operations on geometry.
///
Expand All @@ -26,6 +32,12 @@ use crate::geometry::{LineString, MultiLineString, MultiPolygon, Polygon};
/// In particular, taking `union` with an empty geom should remove degeneracies
/// and fix invalid polygons as long the interior-exterior requirement above is
/// satisfied.
///
/// # Performance
///
/// For union operations on a collection of overlapping and / or adjacent [`Polygon`]s
/// (e.g. contained in a `Vec` or a [`MultiPolygon`]), using [`UnaryUnion`] will
/// yield far better performance.
pub trait BooleanOps {
type Scalar: BoolOpsNum;

Expand Down Expand Up @@ -125,6 +137,106 @@ pub enum OpType {
Xor,
}

// Recursive algorithms can benefit from grouping those parameters which are constant over
// the whole algorithm to reduce the overhead of the recursive calls, in this case the single-
// and multi-threaded unary union tree traversals
#[derive(Debug)]
struct Ops<I, F, R> {
init: I,
fold: F,
reduce: R,
}

/// Efficient [BooleanOps::union] of adjacent / overlapping geometries
///
/// For geometries with a high degree of overlap or adjacency
/// (for instance, merging a large contiguous area made up of many adjacent polygons)
/// this method will be orders of magnitude faster than a manual iteration and union approach.
pub trait UnaryUnion {
type Scalar: BoolOpsNum;

/// Construct a tree of all the input geometries and progressively union them from the "bottom up"
///
/// This is considerably more efficient than using e.g. `fold()` over an iterator of Polygons.
/// # Examples
///
/// ```
/// use geo::{BooleanOps, UnaryUnion};
/// use geo::{MultiPolygon, polygon};
/// let poly1 = polygon![
/// (x: 0.0, y: 0.0),
/// (x: 4.0, y: 0.0),
/// (x: 4.0, y: 4.0),
/// (x: 0.0, y: 4.0),
/// (x: 0.0, y: 0.0),
/// ];
/// let poly2 = polygon![
/// (x: 4.0, y: 0.0),
/// (x: 8.0, y: 0.0),
/// (x: 8.0, y: 4.0),
/// (x: 4.0, y: 4.0),
/// (x: 4.0, y: 0.0),
/// ];
/// let merged = &poly1.union(&poly2);
/// let mp = MultiPolygon(vec![poly1, poly2]);
/// // A larger single rectangle
/// let combined = mp.unary_union();
/// assert_eq!(&combined, merged);
/// ```
fn unary_union(self) -> MultiPolygon<Self::Scalar>;
}

// This function carries out a full post-order traversal of the tree, building up MultiPolygons from inside to outside.
// Though the operation is carried out via fold() over the tree iterator, there are two actual nested operations:
// "fold" operations on leaf nodes build up output MultiPolygons by adding Polygons to them via union and
// "reduce" operations on parent nodes combine these output MultiPolygons from leaf operations by recursion
fn bottom_up_fold_reduce<T, S, I, F, R>(ops: &Ops<I, F, R>, parent: &ParentNode<T>) -> S
where
// This operation only requires two types: output (S) and input (T)
T: RTreeObject,
// Because this is a fold operation, we need to initialise a "container" to which we'll be adding using union.
// The output of a union op is a MultiPolygon.
I: Fn() -> S,
// The meat of the fold op is unioning input borrowed leaf Polygons into an output MultiPolygon.
F: Fn(S, &T) -> S,
// Parent nodes require us to process their child nodes to produce a MultiPolygon. We do this recursively.
// This is a reduce op so there's no need for an init value and the two inputs must have the same type: MultiPolygon
R: Fn(S, S) -> S,
{
parent
.children()
.iter()
.fold((ops.init)(), |accum, child| match child {
RTreeNode::Leaf(value) => (ops.fold)(accum, value),
RTreeNode::Parent(parent) => {
let value = bottom_up_fold_reduce(ops, parent);
(ops.reduce)(accum, value)
}
})
}

fn par_bottom_up_fold_reduce<T, S, I, F, R>(ops: &Ops<I, F, R>, parent: &ParentNode<T>) -> S
where
T: RTreeObject,
RTreeNode<T>: Send + Sync,
S: Send,
I: Fn() -> S + Send + Sync,
F: Fn(S, &T) -> S + Send + Sync,
R: Fn(S, S) -> S + Send + Sync,
{
parent
.children()
.into_par_iter()
.fold(&ops.init, |accum, child| match child {
RTreeNode::Leaf(value) => (ops.fold)(accum, value),
RTreeNode::Parent(parent) => {
let value = par_bottom_up_fold_reduce(ops, parent);
(ops.reduce)(accum, value)
}
})
.reduce(&ops.init, &ops.reduce)
}

impl<T: BoolOpsNum> BooleanOps for Polygon<T> {
type Scalar = T;

Expand All @@ -140,3 +252,71 @@ impl<T: BoolOpsNum> BooleanOps for MultiPolygon<T> {
self.0.iter().flat_map(|p| p.rings())
}
}

impl<'a, T, Boppable, BoppableCollection> UnaryUnion for &'a BoppableCollection
where
T: BoolOpsNum,
Boppable: BooleanOps<Scalar = T> + RTreeObject + 'a,
&'a BoppableCollection: IntoIterator<Item = &'a Boppable>,
{
type Scalar = T;

fn unary_union(self) -> MultiPolygon<Self::Scalar> {
// these three functions drive the union operation
let init = || MultiPolygon::<T>::new(vec![]);
let fold = |mut accum: MultiPolygon<T>,
poly: &CachedEnvelope<ObjectRef<'a, Boppable>>|
-> MultiPolygon<T> {
accum = accum.union(&***poly);
accum
};
let reduce = |accum1: MultiPolygon<T>, accum2: MultiPolygon<T>| -> MultiPolygon<T> {
accum1.union(&accum2)
};
let rtree = RTree::bulk_load(
self.into_iter()
.map(|p| CachedEnvelope::new(ObjectRef::new(p)))
.collect(),
);
let ops = Ops { init, fold, reduce };
bottom_up_fold_reduce(&ops, rtree.root())
}
}

#[cfg(feature = "multithreading")]
/// Wrapper type which signals to algorithms operating on `T` that utilizing parallelism might be viable.
pub struct AllowMultithreading<T>(pub T);

#[cfg(feature = "multithreading")]
impl<'a, T, Boppable, BoppableCollection> UnaryUnion for AllowMultithreading<&'a BoppableCollection>
where
T: BoolOpsNum + Send,
Boppable: BooleanOps<Scalar = T> + RTreeObject + 'a + Send + Sync,
<Boppable as RTreeObject>::Envelope: Send + Sync,
&'a BoppableCollection: IntoParallelIterator<Item = &'a Boppable>,
{
type Scalar = T;

fn unary_union(self) -> MultiPolygon<Self::Scalar> {
// these three functions drive the union operation
let init = || MultiPolygon::<T>::new(vec![]);
let fold = |mut accum: MultiPolygon<T>,
poly: &CachedEnvelope<ObjectRef<'a, Boppable>>|
-> MultiPolygon<T> {
accum = accum.union(&***poly);
accum
};
let reduce = |accum1: MultiPolygon<T>, accum2: MultiPolygon<T>| -> MultiPolygon<T> {
accum1.union(&accum2)
};
let rtree = RTree::bulk_load(
self.0
.into_par_iter()
.map(|p| CachedEnvelope::new(ObjectRef::new(p)))
.collect(),
);

let ops = Ops { init, fold, reduce };
par_bottom_up_fold_reduce(&ops, rtree.root())
}
}
21 changes: 19 additions & 2 deletions geo/src/algorithm/bool_ops/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
use super::BooleanOps;
use crate::{wkt, Convert, MultiPolygon, Relate};
use super::{BooleanOps, UnaryUnion};
use crate::{wkt, Convert, MultiPolygon, Polygon, Relate};

#[test]
fn test_unary_union() {
let poly1: Polygon = wkt!(POLYGON((204.0 287.0,203.69670020700084 288.2213844497616,200.38308697914755 288.338793163584,204.0 287.0)));
let poly2: Polygon = wkt!(POLYGON((210.0 290.0,204.07584923592933 288.2701221108328,212.24082541367974 285.47846008552216,210.0 290.0)));
let poly3: Polygon = wkt!(POLYGON((211.0 292.0,202.07584923592933 288.2701221108328,212.24082541367974 285.47846008552216,210.0 290.0)));

let polys = vec![poly1.clone(), poly2.clone(), poly3.clone()];
let poly_union = polys.unary_union();
assert_eq!(poly_union.0.len(), 1);

let multi_poly_12 = MultiPolygon::new(vec![poly1.clone(), poly2.clone()]);
let multi_poly_3 = MultiPolygon::new(vec![poly3]);
let multi_polys = vec![multi_poly_12.clone(), multi_poly_3.clone()];
let multi_poly_union = multi_polys.unary_union();
assert_eq!(multi_poly_union.0.len(), 1);
}

#[test]
fn jts_test_overlay_la_1() {
Expand Down
2 changes: 1 addition & 1 deletion geo/src/algorithm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub use area::Area;

/// Boolean Ops such as union, xor, difference.
pub mod bool_ops;
pub use bool_ops::{BooleanOps, OpType};
pub use bool_ops::{BooleanOps, OpType, UnaryUnion};

/// Calculate the bounding rectangle of a `Geometry`.
pub mod bounding_rect;
Expand Down
Loading

0 comments on commit b4b3e2b

Please sign in to comment.