From 316ff436bb5a3295cce9c1271132e64b116f6cfb Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 16 Oct 2024 11:43:08 +0200 Subject: [PATCH 01/10] Add `mode` argument to visualize to allow display of either tasks or data --- src/sciline/visualize.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 0e78c307..a6d7bb88 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) from collections.abc import Hashable from dataclasses import dataclass -from typing import Any, get_args, get_origin +from typing import Any, Literal, get_args, get_origin import cyclebane from graphviz import Digraph @@ -31,6 +31,7 @@ class FormattedProvider: def to_graphviz( graph: Graph, compact: bool = False, + mode: Literal['data', 'task', 'both'] = 'both', cluster_generics: bool = True, cluster_color: str | None = '#f0f0ff', **kwargs: Any, @@ -69,7 +70,7 @@ def to_graphviz( dot_subgraph.attr(style='dotted') else: dot_subgraph.attr(style='filled', color=cluster_color) - _add_subgraph(subgraph, dot, dot_subgraph) + _add_subgraph(subgraph, dot, dot_subgraph, mode=mode) return dot @@ -82,7 +83,12 @@ def _to_subgraphs(graph: FormattedGraph) -> dict[str, FormattedGraph]: return subgraphs -def _add_subgraph(graph: FormattedGraph, dot: Digraph, subgraph: Digraph) -> None: +def _add_subgraph( + graph: FormattedGraph, + dot: Digraph, + subgraph: Digraph, + mode: Literal['data', 'task', 'both'] = 'both', +) -> None: for p, formatted_p in graph.items(): if formatted_p.kind == 'unsatisfied': subgraph.node( @@ -93,17 +99,26 @@ def _add_subgraph(graph: FormattedGraph, dot: Digraph, subgraph: Digraph) -> Non fontcolor='red', # Set text color to red style='dashed', ) - else: + elif mode != 'task' or formatted_p.kind == 'parameter': subgraph.node( formatted_p.ret.name, formatted_p.ret.name, shape='box3d' if formatted_p.ret.collapsed else 'rectangle', ) if formatted_p.kind == 'function': - dot.node(p, formatted_p.name, shape='ellipse') - for arg in formatted_p.args: - dot.edge(arg.name, p) - dot.edge(p, formatted_p.ret.name) + if mode == 'both': + dot.node(p, formatted_p.name, shape='ellipse') + for arg in formatted_p.args: + dot.edge(arg.name, p) + dot.edge(p, formatted_p.ret.name) + elif mode == 'task': + p = formatted_p.ret.name + dot.node(p, formatted_p.name, shape='ellipse') + for arg in formatted_p.args: + dot.edge(arg.name, p) + elif mode == 'data': + for arg in formatted_p.args: + dot.edge(arg.name, formatted_p.ret.name) # else: Do not draw dummy providers created by Pipeline when setting instances From 9cd1ed776d405663977ee885b75e15d0dc05b5e7 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 16 Oct 2024 11:44:14 +0200 Subject: [PATCH 02/10] Explicitly define keyword args to forward for better complete and docs --- src/sciline/pipeline.py | 38 +++++++++++++++++++++++++++++++++++--- src/sciline/task_graph.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 5f24c874..7b06b52b 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -5,7 +5,15 @@ from collections.abc import Callable, Hashable, Iterable, Sequence from itertools import chain from types import UnionType -from typing import TYPE_CHECKING, Any, TypeVar, get_args, get_type_hints, overload +from typing import ( + TYPE_CHECKING, + Any, + Literal, + TypeVar, + get_args, + get_type_hints, + overload, +) from ._provider import Provider, ToProvider from .data_graph import DataGraph, to_task_graph @@ -89,7 +97,15 @@ def compute(self, tp: type | Iterable[type] | UnionType, **kwargs: Any) -> Any: """ return self.get(tp, **kwargs).compute() - def visualize(self, tp: type | Iterable[type], **kwargs: Any) -> graphviz.Digraph: + def visualize( + self, + tp: type | Iterable[type], + compact: bool = False, + mode: Literal['data', 'task', 'both'] = 'both', + cluster_generics: bool = True, + cluster_color: str | None = '#f0f0ff', + **kwargs: Any, + ) -> graphviz.Digraph: """ Return a graphviz Digraph object representing the graph for the given keys. @@ -100,10 +116,26 @@ def visualize(self, tp: type | Iterable[type], **kwargs: Any) -> graphviz.Digrap tp: Type to visualize the graph for. Can be a single type or an iterable of types. + compact: + If True, parameter-table-dependent branches are collapsed into a single copy + of the branch. Recommended for large graphs with long parameter tables. + mode: + If 'data', only data nodes are shown. If 'task', only task nodes and input + data nodes are shown. If 'both', all nodes are shown. + cluster_generics: + If True, generic products are grouped into clusters. + cluster_color: + Background color of clusters. If None, clusters are dotted. kwargs: Keyword arguments passed to :py:class:`graphviz.Digraph`. """ - return self.get(tp, handler=HandleAsComputeTimeException()).visualize(**kwargs) + return self.get(tp, handler=HandleAsComputeTimeException()).visualize( + compact=compact, + mode=mode, + cluster_generics=cluster_generics, + cluster_color=cluster_color, + **kwargs, + ) def get( self, diff --git a/src/sciline/task_graph.py b/src/sciline/task_graph.py index 51f51ff5..750cde56 100644 --- a/src/sciline/task_graph.py +++ b/src/sciline/task_graph.py @@ -4,7 +4,7 @@ from collections.abc import Generator, Hashable, Sequence from html import escape -from typing import Any, TypeVar +from typing import Any, Literal, TypeVar from ._utils import key_name from .scheduler import DaskScheduler, NaiveScheduler, Scheduler @@ -126,18 +126,42 @@ def keys(self) -> Generator[Key, None, None]: """ yield from self._graph.keys() - def visualize(self, **kwargs: Any) -> graphviz.Digraph: # type: ignore[name-defined] # noqa: F821 + def visualize( + self, + compact: bool = False, + mode: Literal['data', 'task', 'both'] = 'both', + cluster_generics: bool = True, + cluster_color: str | None = '#f0f0ff', + **kwargs: Any, + ) -> graphviz.Digraph: # type: ignore[name-defined] # noqa: F821 """ Return a graphviz Digraph object representing the graph. Parameters ---------- + compact: + If True, parameter-table-dependent branches are collapsed into a single copy + of the branch. Recommended for large graphs with long parameter tables. + mode: + If 'data', only data nodes are shown. If 'task', only task nodes and input + data nodes are shown. If 'both', all nodes are shown. + cluster_generics: + If True, generic products are grouped into clusters. + cluster_color: + Background color of clusters. If None, clusters are dotted. kwargs: Keyword arguments passed to :py:class:`graphviz.Digraph`. """ from .visualize import to_graphviz - return to_graphviz(self._graph, **kwargs) + return to_graphviz( + self._graph, + compact=compact, + mode=mode, + cluster_generics=cluster_generics, + cluster_color=cluster_color, + **kwargs, + ) def serialize(self) -> dict[str, Json]: """Serialize the graph to JSON. From 053826c30f9893f0ab03735b4e0397ea06a556a5 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 18 Oct 2024 07:10:40 +0200 Subject: [PATCH 03/10] Several further vis improvements --- src/sciline/visualize.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index a6d7bb88..171ac83f 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -46,6 +46,9 @@ def to_graphviz( compact: If True, parameter-table-dependent branches are collapsed into a single copy of the branch. Recommended for large graphs with long parameter tables. + mode: + If 'data', only data nodes are shown. If 'task', only task nodes and input data + nodes are shown. If 'both', all nodes are shown. cluster_generics: If True, generic products are grouped into clusters. cluster_color: @@ -54,6 +57,7 @@ def to_graphviz( Keyword arguments passed to :py:class:`graphviz.Digraph`. """ dot = Digraph(strict=True, **kwargs) + dot.graph_attr['compound'] = 'true' formatted_graph = _format_graph(graph, compact=compact) ordered_graph = dict( sorted(formatted_graph.items(), key=lambda item: item[1].ret.name) @@ -70,6 +74,8 @@ def to_graphviz( dot_subgraph.attr(style='dotted') else: dot_subgraph.attr(style='filled', color=cluster_color) + origin = next(iter(subgraph.values())).ret.name.split('[')[0] + dot_subgraph.attr(label=f'<{origin}>') _add_subgraph(subgraph, dot, dot_subgraph, mode=mode) return dot @@ -89,11 +95,26 @@ def _add_subgraph( subgraph: Digraph, mode: Literal['data', 'task', 'both'] = 'both', ) -> None: + cluster = subgraph.name is not None + cluster_connected = [] + common_provider = len({v.name for v in graph.values()}) == 1 for p, formatted_p in graph.items(): + split = formatted_p.ret.name.split('[', maxsplit=1) + name = ( + f'{split[1][:-1]}' + if cluster + else formatted_p.ret.name + ) + if mode == 'data' and formatted_p.kind == 'function': + # Show provider name in data mode + via = f'via:{formatted_p.name}' + name = f'<{name}
{via}>' + else: + name = f'<{name}>' if formatted_p.kind == 'unsatisfied': subgraph.node( formatted_p.ret.name, - formatted_p.ret.name, + name, shape='box3d' if formatted_p.ret.collapsed else 'rectangle', color='red', fontcolor='red', # Set text color to red @@ -102,7 +123,7 @@ def _add_subgraph( elif mode != 'task' or formatted_p.kind == 'parameter': subgraph.node( formatted_p.ret.name, - formatted_p.ret.name, + name, shape='box3d' if formatted_p.ret.collapsed else 'rectangle', ) if formatted_p.kind == 'function': @@ -118,7 +139,17 @@ def _add_subgraph( dot.edge(arg.name, p) elif mode == 'data': for arg in formatted_p.args: - dot.edge(arg.name, formatted_p.ret.name) + if cluster and common_provider and '[' not in arg.name: + # Avoid duplicate arrows to subnodes if all providers are the + # same and the argument is not a generic + if arg.name not in cluster_connected: + dot.edge( + arg.name, formatted_p.ret.name, lhead=subgraph.name + ) + cluster_connected.append(arg.name) + else: + dot.edge(arg.name, formatted_p.ret.name) + # else: Do not draw dummy providers created by Pipeline when setting instances From 70da1a9efd326ed32195bc0e4129e89e131799f9 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 18 Oct 2024 09:40:34 +0200 Subject: [PATCH 04/10] Tune attrs and clean up --- src/sciline/visualize.py | 72 +++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 171ac83f..d7fd9cb4 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -57,6 +57,25 @@ def to_graphviz( Keyword arguments passed to :py:class:`graphviz.Digraph`. """ dot = Digraph(strict=True, **kwargs) + if dot.graph_attr.get('rankdir', 'TB') == 'LR': + # Significant horizontal space helps distinguishing edges + dot.graph_attr['ranksep'] = '1' + # Little vertical space + dot.graph_attr['nodesep'] = '0.05' + # Avoiding edges connecting to top/bottom reduces edge clutter in larger graphs + dot.edge_attr['tailport'] = 'e' + dot.edge_attr['headport'] = 'w' + else: + dot.graph_attr['ranksep'] = '0.5' + dot.graph_attr['nodesep'] = '0.1' + dot.edge_attr['tailport'] = 's' + # Nodes are wide in west-east direction, so *not* connecting to headport='n' + # looks better + dot.node_attr.update({'height': '0', 'width': '0'}) + # Ensure user can override defaults + dot.node_attr.update(kwargs.get('node_attr', {})) + dot.edge_attr.update(kwargs.get('edge_attr', {})) + dot.graph_attr.update(kwargs.get('graph_attr', {})) dot.graph_attr['compound'] = 'true' formatted_graph = _format_graph(graph, compact=compact) ordered_graph = dict( @@ -74,8 +93,10 @@ def to_graphviz( dot_subgraph.attr(style='dotted') else: dot_subgraph.attr(style='filled', color=cluster_color) + # For keys such as MyType[int] we show MyType only once as the cluster + # label. The nodes within the cluster will only show to bit inside []. origin = next(iter(subgraph.values())).ret.name.split('[')[0] - dot_subgraph.attr(label=f'<{origin}>') + dot_subgraph.attr(label=f'{origin}') _add_subgraph(subgraph, dot, dot_subgraph, mode=mode) return dot @@ -97,43 +118,45 @@ def _add_subgraph( ) -> None: cluster = subgraph.name is not None cluster_connected = [] - common_provider = len({v.name for v in graph.values()}) == 1 + common_provider = len(graph) > 1 and len({v.name for v in graph.values()}) == 1 for p, formatted_p in graph.items(): - split = formatted_p.ret.name.split('[', maxsplit=1) - name = ( - f'{split[1][:-1]}' - if cluster - else formatted_p.ret.name - ) + ret_name = formatted_p.ret.name + if cluster: + # Remove the origin from the name if we are in a cluster, as it is shown + # as the cluster label + split = ret_name[ret_name.index('[') :] + # The nodes within the cluster use slightly smaller text. + name = f'<{split}>' + else: + name = f'<{ret_name}>' if mode == 'data' and formatted_p.kind == 'function': # Show provider name in data mode via = f'via:{formatted_p.name}' - name = f'<{name}
{via}>' - else: - name = f'<{name}>' + if common_provider: + origin = ret_name.split('[')[0] + subgraph.attr(label=f'<{origin}
{via}>') + else: + name = f'{name[:-1]}
{via}>' + shape = 'box3d' if formatted_p.ret.collapsed else 'rectangle' if formatted_p.kind == 'unsatisfied': subgraph.node( - formatted_p.ret.name, + ret_name, name, - shape='box3d' if formatted_p.ret.collapsed else 'rectangle', + shape=shape, color='red', - fontcolor='red', # Set text color to red + fontcolor='red', style='dashed', ) elif mode != 'task' or formatted_p.kind == 'parameter': - subgraph.node( - formatted_p.ret.name, - name, - shape='box3d' if formatted_p.ret.collapsed else 'rectangle', - ) + subgraph.node(ret_name, name, shape=shape) if formatted_p.kind == 'function': if mode == 'both': dot.node(p, formatted_p.name, shape='ellipse') for arg in formatted_p.args: dot.edge(arg.name, p) - dot.edge(p, formatted_p.ret.name) + dot.edge(p, ret_name) elif mode == 'task': - p = formatted_p.ret.name + p = ret_name dot.node(p, formatted_p.name, shape='ellipse') for arg in formatted_p.args: dot.edge(arg.name, p) @@ -143,13 +166,10 @@ def _add_subgraph( # Avoid duplicate arrows to subnodes if all providers are the # same and the argument is not a generic if arg.name not in cluster_connected: - dot.edge( - arg.name, formatted_p.ret.name, lhead=subgraph.name - ) + dot.edge(arg.name, ret_name, lhead=subgraph.name) cluster_connected.append(arg.name) else: - dot.edge(arg.name, formatted_p.ret.name) - + dot.edge(arg.name, ret_name) # else: Do not draw dummy providers created by Pipeline when setting instances From 4f29d7e84b5f8e5a061817f848ef43376ce0598c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 18 Oct 2024 09:44:26 +0200 Subject: [PATCH 05/10] Comments --- src/sciline/visualize.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index d7fd9cb4..69147d8f 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -76,6 +76,7 @@ def to_graphviz( dot.node_attr.update(kwargs.get('node_attr', {})) dot.edge_attr.update(kwargs.get('edge_attr', {})) dot.graph_attr.update(kwargs.get('graph_attr', {})) + # Compound is required for connecting edges to clusters dot.graph_attr['compound'] = 'true' formatted_graph = _format_graph(graph, compact=compact) ordered_graph = dict( @@ -95,6 +96,8 @@ def to_graphviz( dot_subgraph.attr(style='filled', color=cluster_color) # For keys such as MyType[int] we show MyType only once as the cluster # label. The nodes within the cluster will only show to bit inside []. + # This save a lot of horizontal space in the graph in LR mode and + # duplication and clutter in general. origin = next(iter(subgraph.values())).ret.name.split('[')[0] dot_subgraph.attr(label=f'{origin}') _add_subgraph(subgraph, dot, dot_subgraph, mode=mode) From d739b99c50f1db0012198980bd5fea2f57b6d0d1 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 18 Oct 2024 10:54:55 +0200 Subject: [PATCH 06/10] Use thick pen --- src/sciline/visualize.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 69147d8f..42e07794 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -169,7 +169,13 @@ def _add_subgraph( # Avoid duplicate arrows to subnodes if all providers are the # same and the argument is not a generic if arg.name not in cluster_connected: - dot.edge(arg.name, ret_name, lhead=subgraph.name) + dot.edge( + arg.name, + ret_name, + lhead=subgraph.name, + # Thick pen to indicate multiple connections + penwidth='2.0', + ) cluster_connected.append(arg.name) else: dot.edge(arg.name, ret_name) From 6c2343308a48100c5c69c437ec900ab766e02cb8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 18 Oct 2024 10:55:09 +0200 Subject: [PATCH 07/10] Default to mode='data' --- src/sciline/pipeline.py | 2 +- src/sciline/task_graph.py | 2 +- src/sciline/visualize.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 7b06b52b..2be01a93 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -101,7 +101,7 @@ def visualize( self, tp: type | Iterable[type], compact: bool = False, - mode: Literal['data', 'task', 'both'] = 'both', + mode: Literal['data', 'task', 'both'] = 'data', cluster_generics: bool = True, cluster_color: str | None = '#f0f0ff', **kwargs: Any, diff --git a/src/sciline/task_graph.py b/src/sciline/task_graph.py index 750cde56..27df1c92 100644 --- a/src/sciline/task_graph.py +++ b/src/sciline/task_graph.py @@ -129,7 +129,7 @@ def keys(self) -> Generator[Key, None, None]: def visualize( self, compact: bool = False, - mode: Literal['data', 'task', 'both'] = 'both', + mode: Literal['data', 'task', 'both'] = 'data', cluster_generics: bool = True, cluster_color: str | None = '#f0f0ff', **kwargs: Any, diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 42e07794..3c2252de 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -31,7 +31,7 @@ class FormattedProvider: def to_graphviz( graph: Graph, compact: bool = False, - mode: Literal['data', 'task', 'both'] = 'both', + mode: Literal['data', 'task', 'both'] = 'data', cluster_generics: bool = True, cluster_color: str | None = '#f0f0ff', **kwargs: Any, @@ -117,7 +117,7 @@ def _add_subgraph( graph: FormattedGraph, dot: Digraph, subgraph: Digraph, - mode: Literal['data', 'task', 'both'] = 'both', + mode: Literal['data', 'task', 'both'], ) -> None: cluster = subgraph.name is not None cluster_connected = [] From f59214105f50d12073161eb41264b036a832e907 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 1 Nov 2024 07:12:02 +0100 Subject: [PATCH 08/10] Escape provider names since clashes with HTML --- src/sciline/visualize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 3c2252de..0d6ff653 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +import html from collections.abc import Hashable from dataclasses import dataclass from typing import Any, Literal, get_args, get_origin @@ -134,7 +135,8 @@ def _add_subgraph( name = f'<{ret_name}>' if mode == 'data' and formatted_p.kind == 'function': # Show provider name in data mode - via = f'via:{formatted_p.name}' + via_name = html.escape(formatted_p.name) + via = f'via:{via_name}' if common_provider: origin = ret_name.split('[')[0] subgraph.attr(label=f'<{origin}
{via}>') From fda69b3ce6cb63b1d9abad476b67cca3484a1358 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 1 Nov 2024 07:33:04 +0100 Subject: [PATCH 09/10] Avoid some curved edges --- src/sciline/visualize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sciline/visualize.py b/src/sciline/visualize.py index 0d6ff653..8055a8ef 100644 --- a/src/sciline/visualize.py +++ b/src/sciline/visualize.py @@ -69,7 +69,8 @@ def to_graphviz( else: dot.graph_attr['ranksep'] = '0.5' dot.graph_attr['nodesep'] = '0.1' - dot.edge_attr['tailport'] = 's' + # With tailport='s' we get more curved edges, so we omit it. In larger graphs + # this still seems to happen though, may need revisiting. # Nodes are wide in west-east direction, so *not* connecting to headport='n' # looks better dot.node_attr.update({'height': '0', 'width': '0'}) From 1e8432b1bc518a37721ac59cfea22f33bbc11e03 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 1 Nov 2024 08:32:27 +0100 Subject: [PATCH 10/10] Fix broken merge --- src/sciline/pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index a1ec5cc1..3107caa0 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -139,7 +139,6 @@ def visualize( cluster_color=cluster_color, **kwargs, ) - return self.get(tp, handler=HandleAsComputeTimeException()).visualize(**kwargs) def get( self,