From 0460d7cb47c81997e1d1a46aa6a19ff6f6d28e8d Mon Sep 17 00:00:00 2001 From: georgios-ts Date: Mon, 27 Sep 2021 16:08:05 +0300 Subject: [PATCH] Add dfs-search Widely used graph libraries like Boost Graph Library and graph-tool provide the ability to insert callback functions at specified event points for many common graph search algorithms. petgraph has a similar concept in `petgraph::visit::depth_first_search` function. This commit implements an iterative version of `depth_first_search`, that will be used in a follow-up in the pending PRs #444, #445. At the same time it exposes this new functionality in Python by letting users subclassing `retworkx.visit.DFSVisitor` and provide their own implementation for the appropriate callback functions. The benefit is less memory consumption since we avoid storing the results but rather let the user take the desired action at specified points. For example, if a user wants to process the nodes in dfs-order, we don't need to create a new list with all the graph nodes in dfs-order but rather the user can process a node on the fly. We can (probably) leverage this approach in other algorithms as an alternative for our inability to provide "real" python iterators. --- docs/source/api.rst | 4 + .../notes/dfs-search-6083680bf62356b0.yaml | 28 +++ retworkx/__init__.py | 71 +++++++ retworkx/visit.py | 58 ++++++ src/lib.rs | 2 + src/traversal/dfs_visit.rs | 195 ++++++++++++++++++ src/traversal/mod.rs | 148 +++++++++++++ 7 files changed, 506 insertions(+) create mode 100644 releasenotes/notes/dfs-search-6083680bf62356b0.yaml create mode 100644 retworkx/visit.py create mode 100644 src/traversal/dfs_visit.rs diff --git a/docs/source/api.rst b/docs/source/api.rst index d217e075b4..51b094b0d6 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -56,6 +56,7 @@ Traversal :toctree: stubs retworkx.dfs_edges + retworkx.dfs_search retworkx.bfs_successors retworkx.topological_sort retworkx.lexicographical_topological_sort @@ -63,6 +64,7 @@ Traversal retworkx.ancestors retworkx.collect_runs retworkx.collect_bicolor_runs + retworkx.visit.DFSVisitor .. _dag-algorithms: @@ -238,6 +240,7 @@ the functions from the explicitly typed based on the data type. retworkx.digraph_all_pairs_dijkstra_path_lengths retworkx.digraph_k_shortest_path_lengths retworkx.digraph_dfs_edges + retworkx.digraph_dfs_search retworkx.digraph_find_cycle retworkx.digraph_transitivity retworkx.digraph_core_number @@ -281,6 +284,7 @@ typed API based on the data type. retworkx.graph_k_shortest_path_lengths retworkx.graph_all_pairs_dijkstra_path_lengths retworkx.graph_dfs_edges + retworkx.graph_dfs_search retworkx.graph_transitivity retworkx.graph_core_number retworkx.graph_complement diff --git a/releasenotes/notes/dfs-search-6083680bf62356b0.yaml b/releasenotes/notes/dfs-search-6083680bf62356b0.yaml new file mode 100644 index 0000000000..256f652472 --- /dev/null +++ b/releasenotes/notes/dfs-search-6083680bf62356b0.yaml @@ -0,0 +1,28 @@ +--- +features: + - | + Added a new :func:`~retworkx.dfs_search` (and it's per type variants + :func:`~retworkx.graph_dfs_search` and :func:`~retworkx.digraph_dfs_search`) + that traverses the graph in a depth-first manner and emits events at specified + points. The events are handled by a visitor object that subclasses + :class:`~retworkx.visit.DFSVisitor` through the appropriate callback functions. + For example: + + .. jupyter-execute:: + + import retworkx + from retworkx.visit import DFSVisitor + + class TreeEdgesRecorder(DFSVisitor): + + def __init__(self): + self.edges = [] + + def tree_edge(self, edge): + self.edges.append(edge) + + graph = retworkx.PyGraph() + graph.extend_from_edge_list([(1, 3), (0, 1), (2, 1), (0, 2)]) + vis = TreeEdgesRecorder() + retworkx.dfs_search(graph, 0, vis) + print('Tree edges:', vis.edges) diff --git a/retworkx/__init__.py b/retworkx/__init__.py index 784be4bc22..445f2efa28 100644 --- a/retworkx/__init__.py +++ b/retworkx/__init__.py @@ -1730,3 +1730,74 @@ def _graph_union( merge_edges=False, ): return graph_union(first, second, merge_nodes=False, merge_edges=False) + + +@functools.singledispatch +def dfs_search(graph, source, visitor): + """Depth-first traversal of a directed/undirected graph. + + The pseudo-code for the DFS algorithm is listed below, with the annotated + event points, for which the given visitor object will be called with the + appropriate method. + + :: + + DFS(G) + for each vertex u in V + color[u] := WHITE initialize vertex u + end for + time := 0 + call DFS-VISIT(G, source) start vertex s + + DFS-VISIT(G, u) + color[u] := GRAY discover vertex u + for each v in Adj[u] examine edge (u,v) + if (color[v] = WHITE) (u,v) is a tree edge + all DFS-VISIT(G, v) + else if (color[v] = GRAY) (u,v) is a back edge + ... + else if (color[v] = BLACK) (u,v) is a cross or forward edge + ... + end for + color[u] := BLACK finish vertex u + + In the following example we keep track of the tree edges: + + .. jupyter-execute:: + + import retworkx + from retworkx.visit import DFSVisitor + + class TreeEdgesRecorder(DFSVisitor): + + def __init__(self): + self.edges = [] + + def tree_edge(self, edge): + self.edges.append(edge) + + graph = retworkx.PyGraph() + graph.extend_from_edge_list([(1, 3), (0, 1), (2, 1), (0, 2)]) + vis = TreeEdgesRecorder() + retworkx.dfs_search(graph, 0, vis) + print('Tree edges:', vis.edges) + + :param PyGraph graph: The graph to be used. + :param int source: An optional node index to use as the starting node + for the depth-first search. If this is not specified then a source + will be chosen arbitrarly and repeated until all components of the + graph are searched. + :param visitor: A visitor object that is invoked at the event points inside the + algorithm. This should be a subclass of :class:`~retworkx.visit.DFSVisitor`. + """ + raise TypeError("Invalid Input Type %s for graph" % type(graph)) + + +@dfs_search.register(PyDiGraph) +def _digraph_dfs_search(graph, source, visitor): + return digraph_dfs_search(graph, source, visitor) + + +@dfs_search.register(PyGraph) +def _graph_dfs_search(graph, source, visitor): + return graph_dfs_search(graph, source, visitor) diff --git a/retworkx/visit.py b/retworkx/visit.py new file mode 100644 index 0000000000..ed38ee5c19 --- /dev/null +++ b/retworkx/visit.py @@ -0,0 +1,58 @@ +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + + +class DFSVisitor: + """A visitor object that is invoked at the event-points inside the + :func:`~retworkx.dfs_search` algorithm. By default, it performs no + action, and should be used as a base class in order to be useful. + """ + + def discover_vertex(self, v, t): + """ + This is invoked when a vertex is encountered for the first time. + Together we report the discover time of vertex `v`. + """ + return + + def finish_vertex(self, v, t): + """ + This is invoked on vertex `v` after `finish_vertex` has been called for all + the vertices in the DFS-tree rooted at vertex u. If vertex `v` is a leaf in + the DFS-tree, then the `finish_vertex` function is called on `v` after all + the out-edges of `v` have been examined. Together we report the finish time + of vertex `v`. + """ + return + + def tree_edge(self, e): + """ + This is invoked on each edge as it becomes a member of the edges + that form the search tree. + """ + return + + def back_edge(self, e): + """ + This is invoked on the back edges in the graph. + For an undirected graph there is some ambiguity between tree edges + and back edges since the edge :math:`(u, v)` and :math:`(v, u)` are the + same edge, but both the `tree_edge()` and `back_edge()` functions will be + invoked. One way to resolve this ambiguity is to record the tree edges, + and then disregard the back-edges that are already marked as tree edges. + An easy way to record tree edges is to record predecessors at the + `tree_edge` event point. + """ + return + + def forward_or_cross_edge(self, e): + """ + This is invoked on forward or cross edges in the graph. + In an undirected graph this method is never called. + """ + return diff --git a/src/lib.rs b/src/lib.rs index f9ad455e30..1aa7735c34 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -292,6 +292,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { ))?; m.add_wrapped(wrap_pyfunction!(metric_closure))?; m.add_wrapped(wrap_pyfunction!(steiner_tree))?; + m.add_wrapped(wrap_pyfunction!(digraph_dfs_search))?; + m.add_wrapped(wrap_pyfunction!(graph_dfs_search))?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/traversal/dfs_visit.rs b/src/traversal/dfs_visit.rs new file mode 100644 index 0000000000..5be50e49c4 --- /dev/null +++ b/src/traversal/dfs_visit.rs @@ -0,0 +1,195 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +// This module is an iterative implementation of the upstream petgraph +// ``depth_first_search`` function. +// https://github.com/petgraph/petgraph/blob/0.6.0/src/visit/dfsvisit.rs + +use pyo3::prelude::*; + +use petgraph::stable_graph::NodeIndex; +use petgraph::visit::{ + ControlFlow, DfsEvent, IntoNeighbors, Time, VisitMap, Visitable, +}; + +/// Return if the expression is a break value, execute the provided statement +/// if it is a prune value. +/// https://github.com/petgraph/petgraph/blob/0.6.0/src/visit/dfsvisit.rs#L27 +macro_rules! try_control { + ($e:expr, $p:stmt) => { + try_control!($e, $p, ()); + }; + ($e:expr, $p:stmt, $q:stmt) => { + match $e { + x => { + if x.should_break() { + return x; + } else if x.should_prune() { + $p + } else { + $q + } + } + } + }; +} + +/// An iterative depth first search. +/// +/// Starting points are the nodes in the iterator `starts` (specify just one +/// start vertex *x* by using `Some(x)`). +/// +/// The traversal emits discovery and finish events for each reachable vertex, +/// and edge classification of each reachable edge. `visitor` is called for each +/// event, see `petgraph::DfsEvent` for possible values. +/// +/// The return value should implement the trait `ControlFlow`, and can be used to change +/// the control flow of the search. +/// +/// `Control` Implements `ControlFlow` such that `Control::Continue` resumes the search. +/// `Control::Break` will stop the visit early, returning the contained value. +/// `Control::Prune` will stop traversing any additional edges from the current +/// node and proceed immediately to the `Finish` event. +/// +/// There are implementations of `ControlFlow` for `()`, and `Result` where +/// `C: ControlFlow`. The implementation for `()` will continue until finished. +/// For `Result`, upon encountering an `E` it will break, otherwise acting the same as `C`. +/// +/// ***Panics** if you attempt to prune a node from its `Finish` event. +pub fn depth_first_search(graph: G, starts: I, mut visitor: F) -> C +where + G: IntoNeighbors + Visitable, + I: IntoIterator, + F: FnMut(DfsEvent) -> C, + C: ControlFlow, +{ + let time = &mut Time(0); + let discovered = &mut graph.visit_map(); + let finished = &mut graph.visit_map(); + + for start in starts { + try_control!( + dfs_visitor(graph, start, &mut visitor, discovered, finished, time), + unreachable!() + ); + } + C::continuing() +} + +fn dfs_visitor( + graph: G, + u: G::NodeId, + visitor: &mut F, + discovered: &mut G::Map, + finished: &mut G::Map, + time: &mut Time, +) -> C +where + G: IntoNeighbors + Visitable, + F: FnMut(DfsEvent) -> C, + C: ControlFlow, +{ + if !discovered.visit(u) { + return C::continuing(); + } + + try_control!(visitor(DfsEvent::Discover(u, time_post_inc(time))), {}, { + let mut stack: Vec<(G::NodeId, ::Neighbors)> = + Vec::new(); + stack.push((u, graph.neighbors(u))); + + while let Some(elem) = stack.last_mut() { + let u = elem.0; + let neighbors = &mut elem.1; + let mut next = None; + + for v in neighbors { + if !discovered.is_visited(&v) { + try_control!(visitor(DfsEvent::TreeEdge(u, v)), continue); + discovered.visit(v); + try_control!( + visitor(DfsEvent::Discover(v, time_post_inc(time))), + continue + ); + next = Some(v); + break; + } else if !finished.is_visited(&v) { + try_control!(visitor(DfsEvent::BackEdge(u, v)), continue); + } else { + try_control!( + visitor(DfsEvent::CrossForwardEdge(u, v)), + continue + ); + } + } + + match next { + Some(v) => stack.push((v, graph.neighbors(v))), + None => { + let first_finish = finished.visit(u); + debug_assert!(first_finish); + try_control!( + visitor(DfsEvent::Finish(u, time_post_inc(time))), + panic!("Pruning on the `DfsEvent::Finish` is not supported!") + ); + stack.pop(); + } + }; + } + }); + + C::continuing() +} + +fn time_post_inc(x: &mut Time) -> Time { + let v = *x; + x.0 += 1; + v +} + +#[derive(FromPyObject)] +pub struct PyDfsVisitor { + discover_vertex: PyObject, + finish_vertex: PyObject, + tree_edge: PyObject, + back_edge: PyObject, + forward_or_cross_edge: PyObject, +} + +pub fn handler( + py: Python, + vis: &PyDfsVisitor, + event: DfsEvent, +) -> PyResult<()> { + match event { + DfsEvent::Discover(u, Time(t)) => { + vis.discover_vertex.call1(py, (u.index(), t))?; + } + DfsEvent::TreeEdge(u, v) => { + let edge = (u.index(), v.index()); + vis.tree_edge.call1(py, (edge,))?; + } + DfsEvent::BackEdge(u, v) => { + let edge = (u.index(), v.index()); + vis.back_edge.call1(py, (edge,))?; + } + DfsEvent::CrossForwardEdge(u, v) => { + let edge = (u.index(), v.index()); + vis.forward_or_cross_edge.call1(py, (edge,))?; + } + DfsEvent::Finish(u, Time(t)) => { + vis.finish_vertex.call1(py, (u.index(), t))?; + } + } + + Ok(()) +} diff --git a/src/traversal/mod.rs b/src/traversal/mod.rs index c4b0b0fa21..44712e4c04 100644 --- a/src/traversal/mod.rs +++ b/src/traversal/mod.rs @@ -13,8 +13,10 @@ #![allow(clippy::float_cmp)] mod dfs_edges; +pub mod dfs_visit; use super::{digraph, graph, iterators}; +use dfs_visit::{depth_first_search, handler, PyDfsVisitor}; use hashbrown::HashSet; @@ -167,3 +169,149 @@ fn descendants(graph: &digraph::PyDiGraph, node: usize) -> HashSet { out_set.remove(&node); out_set } + +/// Depth-first traversal of a directed graph. +/// +/// The pseudo-code for the DFS algorithm is listed below, with the annotated +/// event points, for which the given visitor object will be called with the +/// appropriate method. +/// +/// :: +/// +/// DFS(G) +/// for each vertex u in V +/// color[u] := WHITE initialize vertex u +/// end for +/// time := 0 +/// call DFS-VISIT(G, source) start vertex s +/// +/// DFS-VISIT(G, u) +/// color[u] := GRAY discover vertex u +/// for each v in Adj[u] examine edge (u,v) +/// if (color[v] = WHITE) (u,v) is a tree edge +/// all DFS-VISIT(G, v) +/// else if (color[v] = GRAY) (u,v) is a back edge +/// ... +/// else if (color[v] = BLACK) (u,v) is a cross or forward edge +/// ... +/// end for +/// color[u] := BLACK finish vertex u +/// +/// In the following example we keep track of the tree edges: +/// +/// .. jupyter-execute:: +/// +/// import retworkx +/// from retworkx.visit import DFSVisitor +/// +/// class TreeEdgesRecorder(DFSVisitor): +/// +/// def __init__(self): +/// self.edges = [] +/// +/// def tree_edge(self, edge): +/// self.edges.append(edge) +/// +/// graph = retworkx.PyGraph() +/// graph.extend_from_edge_list([(1, 3), (0, 1), (2, 1), (0, 2)]) +/// vis = TreeEdgesRecorder() +/// retworkx.dfs_search(graph, 0, vis) +/// print('Tree edges:', vis.edges) +/// +/// :param PyDiGraph graph: The graph to be used. +/// :param int source: An optional node index to use as the starting node +/// for the depth-first search. If this is not specified then a source +/// will be chosen arbitrarly and repeated until all components of the +/// graph are searched. +/// :param visitor: A visitor object that is invoked at the event points inside the +/// algorithm. This should be a subclass of :class:`~retworkx.visit.DFSVisitor`. +#[pyfunction] +#[pyo3(text_signature = "(graph, source, visitor)")] +pub fn digraph_dfs_search( + py: Python, + graph: &digraph::PyDiGraph, + source: Option, + visitor: PyDfsVisitor, +) -> PyResult<()> { + let starts = match source { + Some(nx) => vec![NodeIndex::new(nx)], + None => graph.graph.node_indices().collect(), + }; + + depth_first_search(&graph.graph, starts, |event| { + handler(py, &visitor, event) + }) +} + +/// Depth-first traversal of an undirected graph. +/// +/// The pseudo-code for the DFS algorithm is listed below, with the annotated +/// event points, for which the given visitor object will be called with the +/// appropriate method. +/// +/// :: +/// +/// DFS(G) +/// for each vertex u in V +/// color[u] := WHITE initialize vertex u +/// end for +/// time := 0 +/// call DFS-VISIT(G, source) start vertex s +/// +/// DFS-VISIT(G, u) +/// color[u] := GRAY discover vertex u +/// for each v in Adj[u] examine edge (u,v) +/// if (color[v] = WHITE) (u,v) is a tree edge +/// all DFS-VISIT(G, v) +/// else if (color[v] = GRAY) (u,v) is a back edge +/// ... +/// else if (color[v] = BLACK) (u,v) is a cross or forward edge +/// ... +/// end for +/// color[u] := BLACK finish vertex u +/// +/// In the following example we keep track of the tree edges: +/// +/// .. jupyter-execute:: +/// +/// import retworkx +/// from retworkx.visit import DFSVisitor +/// +/// class TreeEdgesRecorder(DFSVisitor): +/// +/// def __init__(self): +/// self.edges = [] +/// +/// def tree_edge(self, edge): +/// self.edges.append(edge) +/// +/// graph = retworkx.PyGraph() +/// graph.extend_from_edge_list([(1, 3), (0, 1), (2, 1), (0, 2)]) +/// vis = TreeEdgesRecorder() +/// retworkx.dfs_search(graph, 0, vis) +/// print('Tree edges:', vis.edges) +/// +/// :param PyGraph graph: The graph to be used. +/// :param int source: An optional node index to use as the starting node +/// for the depth-first search. If this is not specified then a source +/// will be chosen arbitrarly and repeated until all components of the +/// graph are searched. +/// :param visitor: A visitor object that is invoked at the event points inside the +/// algorithm. This should be a subclass of :class:`~retworkx.visit.DFSVisitor`. +#[pyfunction] +#[pyo3(text_signature = "(graph, source, visitor)")] +pub fn graph_dfs_search( + py: Python, + graph: &graph::PyGraph, + source: Option, + visitor: PyDfsVisitor, +) -> PyResult<()> { + let starts = match source { + Some(nx) => vec![NodeIndex::new(nx)], + None => graph.graph.node_indices().collect(), + }; + + depth_first_search(&graph.graph, starts, |event| { + handler(py, &visitor, event) + }) +}