From 534853839bf2bd7a0ce0d7cdbe6f8cbf98990167 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 12:42:26 +0200 Subject: [PATCH 01/17] Add compute_series --- docs/api-reference/index.md | 10 ++++ requirements/base.in | 2 +- requirements/base.txt | 2 +- requirements/basetest.in | 1 + requirements/basetest.txt | 18 ++++++-- requirements/ci.txt | 4 +- requirements/dev.txt | 2 +- requirements/docs.txt | 6 +-- requirements/static.txt | 2 +- requirements/test-dask.txt | 2 +- src/sciline/__init__.py | 3 +- src/sciline/pipeline.py | 43 ++++++++++++++++- tests/pipeline_map_reduce_test.py | 77 ++++++++++++++++++++++++++++++- 13 files changed, 155 insertions(+), 17 deletions(-) diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index 0982117f..de7495b3 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -21,6 +21,16 @@ HandleAsComputeTimeException ``` +## Top-level functions + +```{eval-rst} +.. autosummary:: + :toctree: ../generated/functions + :recursive: + + compute_series +``` + ## Exceptions ```{eval-rst} diff --git a/requirements/base.in b/requirements/base.in index b3dae665..c7c541e8 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -2,4 +2,4 @@ # will not be touched by ``make_base.py`` # --- END OF CUSTOM SECTION --- # The following was generated by 'tox -e deps', DO NOT EDIT MANUALLY! -cyclebane >= 24.06.0 +cyclebane>=24.06.0 diff --git a/requirements/base.txt b/requirements/base.txt index 55c42b64..c41d150f 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,4 +1,4 @@ -# SHA1:44b03b700447e95874de7cd4d8b11558c0c972e5 +# SHA1:1b4246f703135629f3fb69e65829cfb99abca695 # # This file is autogenerated by pip-compile-multi # To update, run: diff --git a/requirements/basetest.in b/requirements/basetest.in index fb893e1e..03c54bd4 100644 --- a/requirements/basetest.in +++ b/requirements/basetest.in @@ -4,4 +4,5 @@ graphviz jsonschema numpy +pandas pytest diff --git a/requirements/basetest.txt b/requirements/basetest.txt index 97075b10..35f87c42 100644 --- a/requirements/basetest.txt +++ b/requirements/basetest.txt @@ -1,4 +1,4 @@ -# SHA1:f7d11f6aab1600c37d922ffb857a414368b800cd +# SHA1:fa9ebd4f58fe57db20baa224ad46fac634f3f046 # # This file is autogenerated by pip-compile-multi # To update, run: @@ -19,14 +19,22 @@ jsonschema==4.22.0 # via -r basetest.in jsonschema-specifications==2023.12.1 # via jsonschema -numpy==1.26.4 - # via -r basetest.in +numpy==2.0.0 + # via + # -r basetest.in + # pandas packaging==24.1 # via pytest +pandas==2.2.2 + # via -r basetest.in pluggy==1.5.0 # via pytest pytest==8.2.2 # via -r basetest.in +python-dateutil==2.9.0.post0 + # via pandas +pytz==2024.1 + # via pandas referencing==0.35.1 # via # jsonschema @@ -35,5 +43,9 @@ rpds-py==0.18.1 # via # jsonschema # referencing +six==1.16.0 + # via python-dateutil tomli==2.0.1 # via pytest +tzdata==2024.1 + # via pandas diff --git a/requirements/ci.txt b/requirements/ci.txt index 1469754f..5d631e1e 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -17,7 +17,7 @@ colorama==0.4.6 # via tox distlib==0.3.8 # via virtualenv -filelock==3.14.0 +filelock==3.15.1 # via # tox # virtualenv @@ -50,7 +50,7 @@ tomli==2.0.1 # tox tox==4.15.1 # via -r ci.in -urllib3==2.2.1 +urllib3==2.2.2 # via requests virtualenv==20.26.2 # via tox diff --git a/requirements/dev.txt b/requirements/dev.txt index c4f2e617..e3c4c6b9 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -92,7 +92,7 @@ prometheus-client==0.20.0 # via jupyter-server pycparser==2.22 # via cffi -pydantic==2.7.3 +pydantic==2.7.4 # via copier pydantic-core==2.18.4 # via pydantic diff --git a/requirements/docs.txt b/requirements/docs.txt index c770a50d..a641e5ac 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -48,7 +48,7 @@ exceptiongroup==1.2.1 # via ipython executing==2.0.1 # via stack-data -fastjsonschema==2.19.1 +fastjsonschema==2.20.0 # via nbformat graphviz==0.20.3 # via -r docs.in @@ -120,7 +120,7 @@ nbsphinx==0.9.4 # via -r docs.in nest-asyncio==1.6.0 # via ipykernel -numpy==1.26.4 +numpy==2.0.0 # via pandas packaging==24.1 # via @@ -241,7 +241,7 @@ typing-extensions==4.12.2 # pydata-sphinx-theme tzdata==2024.1 # via pandas -urllib3==2.2.1 +urllib3==2.2.2 # via requests wcwidth==0.2.13 # via prompt-toolkit diff --git a/requirements/static.txt b/requirements/static.txt index 8fb5c318..cb03fb85 100644 --- a/requirements/static.txt +++ b/requirements/static.txt @@ -9,7 +9,7 @@ cfgv==3.4.0 # via pre-commit distlib==0.3.8 # via virtualenv -filelock==3.14.0 +filelock==3.15.1 # via virtualenv identify==2.5.36 # via pre-commit diff --git a/requirements/test-dask.txt b/requirements/test-dask.txt index de448ec5..6629b827 100644 --- a/requirements/test-dask.txt +++ b/requirements/test-dask.txt @@ -10,7 +10,7 @@ click==8.1.7 # via dask cloudpickle==3.0.0 # via dask -dask==2024.5.2 +dask==2024.6.0 # via -r test-dask.in fsspec==2024.6.0 # via dask diff --git a/src/sciline/__init__.py b/src/sciline/__init__.py index 31158e57..1f09d700 100644 --- a/src/sciline/__init__.py +++ b/src/sciline/__init__.py @@ -17,7 +17,7 @@ HandleAsComputeTimeException, UnsatisfiedRequirement, ) -from .pipeline import Pipeline +from .pipeline import Pipeline, compute_series from .task_graph import TaskGraph __all__ = [ @@ -30,6 +30,7 @@ "UnsatisfiedRequirement", "HandleAsBuildTimeException", "HandleAsComputeTimeException", + "compute_series", ] del importlib diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 6eaa68c3..a8583e57 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -2,10 +2,10 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) from __future__ import annotations -from collections.abc import Callable, Iterable +from collections.abc import Callable, Hashable, Iterable from itertools import chain from types import UnionType -from typing import Any, TypeVar, get_args, get_type_hints, overload +from typing import TYPE_CHECKING, Any, TypeVar, get_args, get_type_hints, overload from ._provider import Provider, ToProvider from .data_graph import DataGraph, to_task_graph @@ -15,6 +15,10 @@ from .task_graph import TaskGraph from .typing import Key +if TYPE_CHECKING: + import pandas + + T = TypeVar('T') KeyType = TypeVar('KeyType', bound=Key) @@ -194,3 +198,38 @@ def bind_and_call( def _repr_html_(self) -> str: nodes = ((key, data) for key, data in self._graph.nodes.items()) return pipeline_html_repr(nodes) + + +def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: + """ + Given a graph with key depending on mapped nodes, compute a series for the key. + + If the key depends on multiple indices, the series will be a multi-index series. + + Note that Pandas is not a dependency of Sciline and must be installed separately. + + Parameters + ---------- + graph: + The pipeline to compute the series from. + key: + The key to compute the series for. This key must depend on mapped nodes. + + Returns + ------- + : + The computed series. + """ + import pandas as pd + from cyclebane.graph import IndexValues, NodeName + + graph = graph[key] # Drops unrelated indices + indices = graph._cbgraph.indices + index_names = tuple(indices) + index = pd.MultiIndex.from_product(indices.values(), names=index_names) + keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) + if index.nlevels == 1: # Avoid more complicate MultiIndex if unnecessary + index = index.get_level_values(0) + key_series = pd.Series(keys, index=index, name=key) + results = graph.compute(key_series) + return key_series.apply(lambda x: results[x]) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 7043f950..8819c2a8 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -1,8 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) +from __future__ import annotations + from typing import NewType +import pandas as pd import pytest +from cyclebane.graph import IndexValues, NodeName import sciline as sl @@ -31,7 +35,6 @@ def test_map_returns_pipeline_that_can_compute_for_each_value() -> None: with pytest.raises(sl.UnsatisfiedRequirement): # B is not in the graph any more, since it has been duplicated mapped.compute(B) - from cyclebane.graph import IndexValues, NodeName for i in range(3): index = IndexValues(('dim_0',), (i,)) @@ -47,3 +50,75 @@ def test_reduce_returns_pipeline_passing_mapped_branches_to_reducing_func() -> N Result = NewType('Result', int) assert mapped.reduce(func=min, name=Result).compute(Result) == Result(1) assert mapped.reduce(func=max, name=Result).compute(Result) == Result(21) + + +def test_compute_series_single_index() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + mapped = pl.map(paramsA) + result = sl.compute_series(mapped, C) + assert result['a'] == C(7) + assert result['b'] == C(17) + assert result['c'] == C(27) + assert result.index.name == 'x' + assert result.name == C + + +def test_compute_series_single_index_with_no_name() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + paramsA = pd.DataFrame({A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c']) + mapped = pl.map(paramsA) + result = sl.compute_series(mapped, C) + assert result['a'] == C(7) + assert result['b'] == C(17) + assert result['c'] == C(27) + assert result.index.name == 'dim_0' + assert result.name == C + + +def test_compute_series_from_mapped_with_implicit_index() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + mapped = pl.map({A: [A(10 * i) for i in range(3)]}) + result = sl.compute_series(mapped, C) + assert result[0] == C(7) + assert result[1] == C(17) + assert result[2] == C(27) + assert result.index.name == 'dim_0' + assert result.name == C + + +def test_compute_series_multiple_indices_creates_multiindex() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + paramsB = pd.DataFrame( + {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] + ).rename_axis('y') + mapped = pl.map(paramsA).map(paramsB) + result = sl.compute_series(mapped, C) + assert result['a', 'aa'] == C(0) + assert result['a', 'bb'] == C(1) + assert result['b', 'aa'] == C(10) + assert result['b', 'bb'] == C(11) + assert result['c', 'aa'] == C(20) + assert result['c', 'bb'] == C(21) + assert result.index.names == ['x', 'y'] + assert result.name == C From 6680c5c0e601b6cc60ba63059660b2130a30cda0 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 13:40:49 +0200 Subject: [PATCH 02/17] Improve tests --- tests/pipeline_map_reduce_test.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 8819c2a8..25d22cf1 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -120,5 +120,25 @@ def ab_to_c(a: A, b: B) -> C: assert result['b', 'bb'] == C(11) assert result['c', 'aa'] == C(20) assert result['c', 'bb'] == C(21) - assert result.index.names == ['x', 'y'] + assert result.index.names == list(mapped._cbgraph.index_names) assert result.name == C + + +def test_compute_series_ignores_unrelated_index() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + paramsB = pd.DataFrame( + {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] + ).rename_axis('y') + mapped = pl.map(paramsA).map(paramsB) + result = sl.compute_series(mapped, A) + assert result.index.name == 'x' + assert result['a'] == A(0) + assert result['b'] == A(10) + assert result['c'] == A(20) + assert result.name == A From 2041c3f95882f942609a778dc281a44d736e095a Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 13:58:12 +0200 Subject: [PATCH 03/17] Update docs --- docs/api-reference/index.md | 1 + docs/user-guide/parameter-tables.ipynb | 27 +++++++++++++----- src/sciline/__init__.py | 3 +- src/sciline/pipeline.py | 38 ++++++++++++++++++++++---- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index de7495b3..2da1e36d 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -29,6 +29,7 @@ :recursive: compute_series + get_mapped_node_names ``` ## Exceptions diff --git a/docs/user-guide/parameter-tables.ipynb b/docs/user-guide/parameter-tables.ipynb index bb7c70b3..8bfd5bd2 100644 --- a/docs/user-guide/parameter-tables.ipynb +++ b/docs/user-guide/parameter-tables.ipynb @@ -146,8 +146,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then we can compute `Result` for each index in the parameter table.\n", - "Currently there is no convenient way of accessing these, instead we manually define the target nodes to compute:" + "We can use the [compute_series](https://scipp.github.io/sciline/generated/functions/sciline.compute_series.html) function to compute `Result` for each index in the parameter table:" ] }, { @@ -156,10 +155,8 @@ "metadata": {}, "outputs": [], "source": [ - "from cyclebane.graph import NodeName, IndexValues\n", - "\n", - "targets = [NodeName(Result, IndexValues(('run_id',), (i,))) for i in run_ids]\n", - "pipeline.compute(targets)" + "results = sciline.compute_series(pipeline, Result)\n", + "pd.DataFrame(results) # DataFrame for HTML rendering" ] }, { @@ -168,7 +165,22 @@ "source": [ "Note the use of the `run_id` index.\n", "If the index axis of the DataFrame has no name then a default of `dim_0`, `dim_1`, etc. is used.\n", - "We can also visualize the task graph for computing the series of `Result` values:" + "\n", + "
\n", + "\n", + "**Note**\n", + "\n", + "[compute_series](https://scipp.github.io/sciline/generated/functions/sciline.compute_series.html) depends on Pandas, which is not a dependency of Sciline and must be installed separately, e.g., using pip:\n", + "\n", + "```bash\n", + "pip install pandas\n", + "```\n", + "\n", + "
\n", + "\n", + "We can also visualize the task graph for computing the series of `Result` values.\n", + "For this, we need to get all the node names derived from `Result` via the `map` operation.\n", + "The [get_mapped_node_names](https://scipp.github.io/sciline/generated/functions/sciline.get_mapped_node_names.html) function can be used to get a `pandas.Series` of these node names, which we can then visualize:" ] }, { @@ -177,6 +189,7 @@ "metadata": {}, "outputs": [], "source": [ + "targets = sciline.get_mapped_node_names(pipeline, Result)\n", "pipeline.visualize(targets)" ] }, diff --git a/src/sciline/__init__.py b/src/sciline/__init__.py index 1f09d700..a860fe78 100644 --- a/src/sciline/__init__.py +++ b/src/sciline/__init__.py @@ -17,7 +17,7 @@ HandleAsComputeTimeException, UnsatisfiedRequirement, ) -from .pipeline import Pipeline, compute_series +from .pipeline import Pipeline, compute_series, get_mapped_node_names from .task_graph import TaskGraph __all__ = [ @@ -31,6 +31,7 @@ "HandleAsBuildTimeException", "HandleAsComputeTimeException", "compute_series", + "get_mapped_node_names", ] del importlib diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index a8583e57..9371a706 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -200,10 +200,12 @@ def _repr_html_(self) -> str: return pipeline_html_repr(nodes) -def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: +def get_mapped_node_names(graph: Pipeline, key: Hashable) -> pandas.Series: """ - Given a graph with key depending on mapped nodes, compute a series for the key. + Given a graph with key depending on mapped nodes, return a series corresponding + mapped keys. + This is meant to be used in combination with :py:func:`Pipeline.map`. If the key depends on multiple indices, the series will be a multi-index series. Note that Pandas is not a dependency of Sciline and must be installed separately. @@ -211,14 +213,14 @@ def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: Parameters ---------- graph: - The pipeline to compute the series from. + The pipeline to get the mapped key names from. key: - The key to compute the series for. This key must depend on mapped nodes. + The key to get the mapped key names for. This key must depend on mapped nodes. Returns ------- : - The computed series. + The series of mapped key names. """ import pandas as pd from cyclebane.graph import IndexValues, NodeName @@ -230,6 +232,30 @@ def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) if index.nlevels == 1: # Avoid more complicate MultiIndex if unnecessary index = index.get_level_values(0) - key_series = pd.Series(keys, index=index, name=key) + return pd.Series(keys, index=index, name=key) + + +def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: + """ + Given a graph with key depending on mapped nodes, compute a series for the key. + + This is meant to be used in combination with :py:func:`Pipeline.map`. + If the key depends on multiple indices, the series will be a multi-index series. + + Note that Pandas is not a dependency of Sciline and must be installed separately. + + Parameters + ---------- + graph: + The pipeline to compute the series from. + key: + The key to compute the series for. This key must depend on mapped nodes. + + Returns + ------- + : + The computed series. + """ + key_series = get_mapped_node_names(graph, key) results = graph.compute(key_series) return key_series.apply(lambda x: results[x]) From 07b5e07f5ae566584a5547e3b874510a5a321180 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 14:07:07 +0200 Subject: [PATCH 04/17] Satisfy mypy --- src/sciline/pipeline.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 9371a706..348a87cb 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) from __future__ import annotations -from collections.abc import Callable, Hashable, Iterable +from collections.abc import Callable, Iterable from itertools import chain from types import UnionType from typing import TYPE_CHECKING, Any, TypeVar, get_args, get_type_hints, overload @@ -16,6 +16,7 @@ from .typing import Key if TYPE_CHECKING: + import graphviz import pandas @@ -88,7 +89,7 @@ def compute(self, tp: type | Iterable[type] | UnionType, **kwargs: Any) -> Any: """ return self.get(tp, **kwargs).compute() - def visualize(self, tp: type | Iterable[type], **kwargs: Any) -> graphviz.Digraph: # type: ignore[name-defined] # noqa: F821 + def visualize(self, tp: type | Iterable[type], **kwargs: Any) -> graphviz.Digraph: """ Return a graphviz Digraph object representing the graph for the given keys. @@ -200,7 +201,7 @@ def _repr_html_(self) -> str: return pipeline_html_repr(nodes) -def get_mapped_node_names(graph: Pipeline, key: Hashable) -> pandas.Series: +def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: """ Given a graph with key depending on mapped nodes, return a series corresponding mapped keys. @@ -235,7 +236,7 @@ def get_mapped_node_names(graph: Pipeline, key: Hashable) -> pandas.Series: return pd.Series(keys, index=index, name=key) -def compute_series(graph: Pipeline, key: Hashable) -> pandas.Series: +def compute_series(graph: Pipeline, key: type) -> pandas.Series: """ Given a graph with key depending on mapped nodes, compute a series for the key. From c7f95e257d6b4482b68105191f3529ab3829c6c6 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 14:09:47 +0200 Subject: [PATCH 05/17] Add nicer exception --- src/sciline/pipeline.py | 2 ++ tests/pipeline_map_reduce_test.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 348a87cb..40f85cf2 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -228,6 +228,8 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: graph = graph[key] # Drops unrelated indices indices = graph._cbgraph.indices + if len(indices) == 0: + raise ValueError(f"'{key}' does not depend on any mapped nodes.") index_names = tuple(indices) index = pd.MultiIndex.from_product(indices.values(), names=index_names) keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 25d22cf1..2bde7a91 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -142,3 +142,14 @@ def ab_to_c(a: A, b: B) -> C: assert result['b'] == A(10) assert result['c'] == A(20) assert result.name == A + + +def test_compute_series_raises_if_node_is_not_mapped() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + mapped = pl.map({A: [A(10 * i) for i in range(3)]}) + with pytest.raises(ValueError, match='does not depend on any mapped nodes'): + sl.compute_series(mapped, B) From e2c2ff2805d404f5520d18800720cc1c6efef475 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 14:15:44 +0200 Subject: [PATCH 06/17] Add another test/example --- tests/pipeline_map_reduce_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 2bde7a91..da1ab38c 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -153,3 +153,20 @@ def ab_to_c(a: A, b: B) -> C: mapped = pl.map({A: [A(10 * i) for i in range(3)]}) with pytest.raises(ValueError, match='does not depend on any mapped nodes'): sl.compute_series(mapped, B) + + +def test_can_compute_subset_of_get_mapped_node_names() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + mapped = pl.map(paramsA) + result = sl.get_mapped_node_names(mapped, C) + # We lose the convenience of compute_series which returns a nicely setup series + # but this is perfectly fine and possible. + assert mapped.compute(result[1]) == A(17) + assert mapped.compute(result.loc['b']) == A(17) From eaefb27687ed23a7d394aee3476388ea729b7175 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Tue, 18 Jun 2024 14:18:34 +0200 Subject: [PATCH 07/17] Avoid FutureWarning from Pandas --- tests/pipeline_map_reduce_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index da1ab38c..8eb88fad 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -168,5 +168,5 @@ def ab_to_c(a: A, b: B) -> C: result = sl.get_mapped_node_names(mapped, C) # We lose the convenience of compute_series which returns a nicely setup series # but this is perfectly fine and possible. - assert mapped.compute(result[1]) == A(17) + assert mapped.compute(result.iloc[1]) == A(17) assert mapped.compute(result.loc['b']) == A(17) From 194beb50171f1c0d7e5a1b394fcecb46929822bd Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 19 Jun 2024 13:50:31 +0200 Subject: [PATCH 08/17] Rename to `compute_mapped` --- docs/api-reference/index.md | 2 +- src/sciline/__init__.py | 4 ++-- src/sciline/pipeline.py | 2 +- tests/pipeline_map_reduce_test.py | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index 2da1e36d..0135a0b8 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -28,7 +28,7 @@ :toctree: ../generated/functions :recursive: - compute_series + compute_mapped get_mapped_node_names ``` diff --git a/src/sciline/__init__.py b/src/sciline/__init__.py index a860fe78..c500f987 100644 --- a/src/sciline/__init__.py +++ b/src/sciline/__init__.py @@ -17,7 +17,7 @@ HandleAsComputeTimeException, UnsatisfiedRequirement, ) -from .pipeline import Pipeline, compute_series, get_mapped_node_names +from .pipeline import Pipeline, compute_mapped, get_mapped_node_names from .task_graph import TaskGraph __all__ = [ @@ -30,7 +30,7 @@ "UnsatisfiedRequirement", "HandleAsBuildTimeException", "HandleAsComputeTimeException", - "compute_series", + "compute_mapped", "get_mapped_node_names", ] diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 40f85cf2..386829a1 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -238,7 +238,7 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: return pd.Series(keys, index=index, name=key) -def compute_series(graph: Pipeline, key: type) -> pandas.Series: +def compute_mapped(graph: Pipeline, key: type) -> pandas.Series: """ Given a graph with key depending on mapped nodes, compute a series for the key. diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 8eb88fad..58d1837b 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -62,7 +62,7 @@ def ab_to_c(a: A, b: B) -> C: {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] ).rename_axis('x') mapped = pl.map(paramsA) - result = sl.compute_series(mapped, C) + result = sl.compute_mapped(mapped, C) assert result['a'] == C(7) assert result['b'] == C(17) assert result['c'] == C(27) @@ -78,7 +78,7 @@ def ab_to_c(a: A, b: B) -> C: pl[B] = B(7) paramsA = pd.DataFrame({A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c']) mapped = pl.map(paramsA) - result = sl.compute_series(mapped, C) + result = sl.compute_mapped(mapped, C) assert result['a'] == C(7) assert result['b'] == C(17) assert result['c'] == C(27) @@ -93,7 +93,7 @@ def ab_to_c(a: A, b: B) -> C: pl = sl.Pipeline((ab_to_c,)) pl[B] = B(7) mapped = pl.map({A: [A(10 * i) for i in range(3)]}) - result = sl.compute_series(mapped, C) + result = sl.compute_mapped(mapped, C) assert result[0] == C(7) assert result[1] == C(17) assert result[2] == C(27) @@ -113,7 +113,7 @@ def ab_to_c(a: A, b: B) -> C: {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] ).rename_axis('y') mapped = pl.map(paramsA).map(paramsB) - result = sl.compute_series(mapped, C) + result = sl.compute_mapped(mapped, C) assert result['a', 'aa'] == C(0) assert result['a', 'bb'] == C(1) assert result['b', 'aa'] == C(10) @@ -136,7 +136,7 @@ def ab_to_c(a: A, b: B) -> C: {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] ).rename_axis('y') mapped = pl.map(paramsA).map(paramsB) - result = sl.compute_series(mapped, A) + result = sl.compute_mapped(mapped, A) assert result.index.name == 'x' assert result['a'] == A(0) assert result['b'] == A(10) @@ -152,7 +152,7 @@ def ab_to_c(a: A, b: B) -> C: pl[B] = B(7) mapped = pl.map({A: [A(10 * i) for i in range(3)]}) with pytest.raises(ValueError, match='does not depend on any mapped nodes'): - sl.compute_series(mapped, B) + sl.compute_mapped(mapped, B) def test_can_compute_subset_of_get_mapped_node_names() -> None: From 12442de96a13c3cc2a2fabf64b4ee7904558cd37 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 19 Jun 2024 14:07:56 +0200 Subject: [PATCH 09/17] Test and catch some more errors --- src/sciline/pipeline.py | 20 ++++++++++---- tests/pipeline_map_reduce_test.py | 46 +++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 386829a1..0d134a34 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -224,16 +224,24 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: The series of mapped key names. """ import pandas as pd - from cyclebane.graph import IndexValues, NodeName - - graph = graph[key] # Drops unrelated indices + from cyclebane.graph import IndexValues, MappedNode, NodeName + + underlying = graph._cbgraph.graph + candidates = [ + node + for node in underlying.nodes + if isinstance(node, MappedNode) and node.name == key + ] + if len(candidates) == 0: + raise ValueError(f"'{key}' is not a mapped node.") + if len(candidates) > 1: + raise ValueError(f"Multiple mapped nodes with name '{key}' found: {candidates}") + graph = graph[candidates[0]] # Drops unrelated indices indices = graph._cbgraph.indices - if len(indices) == 0: - raise ValueError(f"'{key}' does not depend on any mapped nodes.") index_names = tuple(indices) index = pd.MultiIndex.from_product(indices.values(), names=index_names) keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) - if index.nlevels == 1: # Avoid more complicate MultiIndex if unnecessary + if index.nlevels == 1: # Avoid more complicated MultiIndex if unnecessary index = index.get_level_values(0) return pd.Series(keys, index=index, name=key) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 58d1837b..0bc68936 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -52,7 +52,7 @@ def test_reduce_returns_pipeline_passing_mapped_branches_to_reducing_func() -> N assert mapped.reduce(func=max, name=Result).compute(Result) == Result(21) -def test_compute_series_single_index() -> None: +def test_compute_mapped_single_index() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -70,7 +70,7 @@ def ab_to_c(a: A, b: B) -> C: assert result.name == C -def test_compute_series_single_index_with_no_name() -> None: +def test_compute_mapped_single_index_with_no_name() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -86,7 +86,7 @@ def ab_to_c(a: A, b: B) -> C: assert result.name == C -def test_compute_series_from_mapped_with_implicit_index() -> None: +def test_compute_mapped_from_mapped_with_implicit_index() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -101,7 +101,7 @@ def ab_to_c(a: A, b: B) -> C: assert result.name == C -def test_compute_series_multiple_indices_creates_multiindex() -> None: +def test_compute_mapped_multiple_indices_creates_multiindex() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -124,7 +124,7 @@ def ab_to_c(a: A, b: B) -> C: assert result.name == C -def test_compute_series_ignores_unrelated_index() -> None: +def test_compute_mapped_ignores_unrelated_index() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -144,17 +144,31 @@ def ab_to_c(a: A, b: B) -> C: assert result.name == A -def test_compute_series_raises_if_node_is_not_mapped() -> None: +def test_compute_mapped_raises_if_node_is_not_mapped() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) pl = sl.Pipeline((ab_to_c,)) pl[B] = B(7) mapped = pl.map({A: [A(10 * i) for i in range(3)]}) - with pytest.raises(ValueError, match='does not depend on any mapped nodes'): + with pytest.raises(ValueError, match='is not a mapped node'): sl.compute_mapped(mapped, B) +def test_compute_mapped_raises_if_node_depends_on_but_is_not_mapped() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + pl[B] = B(7) + pl = pl.map({A: [A(10 * i) for i in range(3)]}).reduce(func=max, name=C) + pl.insert(c_to_d) + # Slightly different failure case from above: The relevant subgraph *does* have + # indices, but the node does not. + with pytest.raises(ValueError, match='is not a mapped node'): + sl.compute_mapped(pl, D) + + def test_can_compute_subset_of_get_mapped_node_names() -> None: def ab_to_c(a: A, b: B) -> C: return C(a + b) @@ -166,7 +180,23 @@ def ab_to_c(a: A, b: B) -> C: ).rename_axis('x') mapped = pl.map(paramsA) result = sl.get_mapped_node_names(mapped, C) - # We lose the convenience of compute_series which returns a nicely setup series + # We lose the convenience of compute_mapped which returns a nicely setup series # but this is perfectly fine and possible. assert mapped.compute(result.iloc[1]) == A(17) assert mapped.compute(result.loc['b']) == A(17) + + +def test_compute_mapped_raises_if_multiple_mapped_nodes_with_given_name() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + paramsB = pd.DataFrame( + {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] + ).rename_axis('y') + pl = pl.map(paramsA).map(paramsB).reduce(func=max, name=C, index='x') + with pytest.raises(ValueError, match='Multiple mapped nodes with name'): + sl.compute_mapped(pl, C) From 0df63008b417b68c1ce66ff310b58d8c31bb295c Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 19 Jun 2024 14:35:29 +0200 Subject: [PATCH 10/17] Add argument to disambiguate if multiple matches --- src/sciline/pipeline.py | 30 ++++++++++++++++++++++++------ tests/pipeline_map_reduce_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 0d134a34..056e85a8 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) from __future__ import annotations -from collections.abc import Callable, Iterable +from collections.abc import Callable, Hashable, Iterable, Sequence from itertools import chain from types import UnionType from typing import TYPE_CHECKING, Any, TypeVar, get_args, get_type_hints, overload @@ -201,7 +201,9 @@ def _repr_html_(self) -> str: return pipeline_html_repr(nodes) -def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: +def get_mapped_node_names( + graph: Pipeline, key: type, index_names: Sequence[Hashable] | None = None +) -> pandas.Series: """ Given a graph with key depending on mapped nodes, return a series corresponding mapped keys. @@ -217,6 +219,10 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: The pipeline to get the mapped key names from. key: The key to get the mapped key names for. This key must depend on mapped nodes. + index_names: + Specifies the names of the indices of the mapped node. If not given this is + inferred from the graph, but the argument may be required to disambiguate + multiple mapped nodes with the same name. Returns ------- @@ -226,19 +232,25 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: import pandas as pd from cyclebane.graph import IndexValues, MappedNode, NodeName - underlying = graph._cbgraph.graph candidates = [ node - for node in underlying.nodes + for node in graph._cbgraph.graph.nodes if isinstance(node, MappedNode) and node.name == key ] + if index_names is not None: + candidates = [ + node for node in candidates if set(node.indices) == set(index_names) + ] if len(candidates) == 0: raise ValueError(f"'{key}' is not a mapped node.") if len(candidates) > 1: raise ValueError(f"Multiple mapped nodes with name '{key}' found: {candidates}") graph = graph[candidates[0]] # Drops unrelated indices indices = graph._cbgraph.indices + if index_names is not None: + indices = {name: indices[name] for name in indices if name in index_names} index_names = tuple(indices) + index = pd.MultiIndex.from_product(indices.values(), names=index_names) keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) if index.nlevels == 1: # Avoid more complicated MultiIndex if unnecessary @@ -246,7 +258,9 @@ def get_mapped_node_names(graph: Pipeline, key: type) -> pandas.Series: return pd.Series(keys, index=index, name=key) -def compute_mapped(graph: Pipeline, key: type) -> pandas.Series: +def compute_mapped( + graph: Pipeline, key: type, index_names: Sequence[Hashable] | None = None +) -> pandas.Series: """ Given a graph with key depending on mapped nodes, compute a series for the key. @@ -261,12 +275,16 @@ def compute_mapped(graph: Pipeline, key: type) -> pandas.Series: The pipeline to compute the series from. key: The key to compute the series for. This key must depend on mapped nodes. + index_names: + Specifies the names of the indices of the mapped node. If not given this is + inferred from the graph, but the argument may be required to disambiguate + multiple mapped nodes with the same name. Returns ------- : The computed series. """ - key_series = get_mapped_node_names(graph, key) + key_series = get_mapped_node_names(graph=graph, key=key, index_names=index_names) results = graph.compute(key_series) return key_series.apply(lambda x: results[x]) diff --git a/tests/pipeline_map_reduce_test.py b/tests/pipeline_map_reduce_test.py index 0bc68936..bdb26511 100644 --- a/tests/pipeline_map_reduce_test.py +++ b/tests/pipeline_map_reduce_test.py @@ -200,3 +200,29 @@ def ab_to_c(a: A, b: B) -> C: pl = pl.map(paramsA).map(paramsB).reduce(func=max, name=C, index='x') with pytest.raises(ValueError, match='Multiple mapped nodes with name'): sl.compute_mapped(pl, C) + + +def test_compute_mapped_indices_selects_between_multiple_candidates() -> None: + def ab_to_c(a: A, b: B) -> C: + return C(a + b) + + pl = sl.Pipeline((ab_to_c,)) + paramsA = pd.DataFrame( + {A: [A(10 * i) for i in range(3)]}, index=['a', 'b', 'c'] + ).rename_axis('x') + paramsB = pd.DataFrame( + {B: [B(i) for i in range(2)]}, index=['aa', 'bb'] + ).rename_axis('y') + mapped = pl.map(paramsA).map(paramsB) + pl = mapped.reduce(func=max, name=C, index='x') + result_y = sl.compute_mapped(pl, C, index_names=('y',)) + assert result_y['aa'] == C(20) + assert result_y['bb'] == C(21) + for index_names in [('x', 'y'), ('y', 'x')]: + result_xy = sl.compute_mapped(pl, C, index_names=index_names) + assert result_xy['a', 'aa'] == C(0) + assert result_xy['a', 'bb'] == C(1) + assert result_xy['b', 'aa'] == C(10) + assert result_xy['b', 'bb'] == C(11) + assert result_xy['c', 'aa'] == C(20) + assert result_xy['c', 'bb'] == C(21) From 303bfb70a0ce006320398c4916e4b81fbf884f13 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 19 Jun 2024 14:40:02 +0200 Subject: [PATCH 11/17] Fix mypy --- src/sciline/pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 056e85a8..f8affa99 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -245,7 +245,8 @@ def get_mapped_node_names( raise ValueError(f"'{key}' is not a mapped node.") if len(candidates) > 1: raise ValueError(f"Multiple mapped nodes with name '{key}' found: {candidates}") - graph = graph[candidates[0]] # Drops unrelated indices + # Drops unrelated indices + graph = graph[candidates[0]] # type: ignore[index] indices = graph._cbgraph.indices if index_names is not None: indices = {name: indices[name] for name in indices if name in index_names} From 1997df4a9602c9d130c26f27734279aa192a3112 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Wed, 19 Jun 2024 14:51:26 +0200 Subject: [PATCH 12/17] Fix docs --- docs/user-guide/parameter-tables.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user-guide/parameter-tables.ipynb b/docs/user-guide/parameter-tables.ipynb index 8bfd5bd2..9557993d 100644 --- a/docs/user-guide/parameter-tables.ipynb +++ b/docs/user-guide/parameter-tables.ipynb @@ -146,7 +146,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can use the [compute_series](https://scipp.github.io/sciline/generated/functions/sciline.compute_series.html) function to compute `Result` for each index in the parameter table:" + "We can use the [compute_mapped](https://scipp.github.io/sciline/generated/functions/sciline.compute_mapped.html) function to compute `Result` for each index in the parameter table:" ] }, { @@ -155,7 +155,7 @@ "metadata": {}, "outputs": [], "source": [ - "results = sciline.compute_series(pipeline, Result)\n", + "results = sciline.compute_mapped(pipeline, Result)\n", "pd.DataFrame(results) # DataFrame for HTML rendering" ] }, @@ -170,7 +170,7 @@ "\n", "**Note**\n", "\n", - "[compute_series](https://scipp.github.io/sciline/generated/functions/sciline.compute_series.html) depends on Pandas, which is not a dependency of Sciline and must be installed separately, e.g., using pip:\n", + "[compute_mapped](https://scipp.github.io/sciline/generated/functions/sciline.compute_mapped.html) depends on Pandas, which is not a dependency of Sciline and must be installed separately, e.g., using pip:\n", "\n", "```bash\n", "pip install pandas\n", From 4ae04f7466a06b0ab641997e5e1eb2f01cde5882 Mon Sep 17 00:00:00 2001 From: Simon Heybrock <12912489+SimonHeybrock@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:14:54 +0200 Subject: [PATCH 13/17] Update src/sciline/pipeline.py Co-authored-by: Jan-Lukas Wynen --- src/sciline/pipeline.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index f8affa99..c8e1e36b 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -208,6 +208,11 @@ def get_mapped_node_names( Given a graph with key depending on mapped nodes, return a series corresponding mapped keys. + This is meant to be used in combination with :py:func:`Pipeline.map`. + Return all mapped keys for a base key. + + Given a graph with mapped nodes and a key to a node, this function returns + the keys of mapped nodes that correspond to the key. This is meant to be used in combination with :py:func:`Pipeline.map`. If the key depends on multiple indices, the series will be a multi-index series. From 9d4bd3621474096478b01526e04d28bf8a7c669e Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 21 Jun 2024 10:13:28 +0200 Subject: [PATCH 14/17] Use relative links --- docs/user-guide/parameter-tables.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/parameter-tables.ipynb b/docs/user-guide/parameter-tables.ipynb index 9557993d..c9c8b7e8 100644 --- a/docs/user-guide/parameter-tables.ipynb +++ b/docs/user-guide/parameter-tables.ipynb @@ -130,7 +130,7 @@ "Equivalently the table could be represented as a `dict`, where each key corresponds to a column header and each value is a list of values for that column, i.e., `{Filename: filenames}`.\n", "Specifying an index is currently not possible in this case, and it will default to a range index.\n", "\n", - "We can now use [Pipeline.map](https://scipp.github.io/sciline/generated/classes/sciline.Pipeline.html#sciline.Pipeline.map) to create a modified pipeline that processes each row in the parameter table:" + "We can now use [Pipeline.map](../generated/classes/sciline.Pipeline.html#sciline.Pipeline.map) to create a modified pipeline that processes each row in the parameter table:" ] }, { @@ -146,7 +146,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can use the [compute_mapped](https://scipp.github.io/sciline/generated/functions/sciline.compute_mapped.html) function to compute `Result` for each index in the parameter table:" + "We can use the [compute_mapped](../generated/functions/sciline.compute_mapped.html) function to compute `Result` for each index in the parameter table:" ] }, { @@ -170,7 +170,7 @@ "\n", "**Note**\n", "\n", - "[compute_mapped](https://scipp.github.io/sciline/generated/functions/sciline.compute_mapped.html) depends on Pandas, which is not a dependency of Sciline and must be installed separately, e.g., using pip:\n", + "[compute_mapped](../generated/functions/sciline.compute_mapped.html) depends on Pandas, which is not a dependency of Sciline and must be installed separately, e.g., using pip:\n", "\n", "```bash\n", "pip install pandas\n", @@ -180,7 +180,7 @@ "\n", "We can also visualize the task graph for computing the series of `Result` values.\n", "For this, we need to get all the node names derived from `Result` via the `map` operation.\n", - "The [get_mapped_node_names](https://scipp.github.io/sciline/generated/functions/sciline.get_mapped_node_names.html) function can be used to get a `pandas.Series` of these node names, which we can then visualize:" + "The [get_mapped_node_names](../generated/functions/sciline.get_mapped_node_names.html) function can be used to get a `pandas.Series` of these node names, which we can then visualize:" ] }, { From 151fd4c88652c87ae27e847a9e332fea28ec1fc8 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 21 Jun 2024 10:20:28 +0200 Subject: [PATCH 15/17] More precise type hints and naming --- src/sciline/pipeline.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index c8e1e36b..c5a5c376 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -202,7 +202,7 @@ def _repr_html_(self) -> str: def get_mapped_node_names( - graph: Pipeline, key: type, index_names: Sequence[Hashable] | None = None + graph: DataGraph, key: type, index_names: Sequence[Hashable] | None = None ) -> pandas.Series: """ Given a graph with key depending on mapped nodes, return a series corresponding @@ -221,7 +221,7 @@ def get_mapped_node_names( Parameters ---------- graph: - The pipeline to get the mapped key names from. + The data graph to get the mapped key names from. key: The key to get the mapped key names for. This key must depend on mapped nodes. index_names: @@ -265,10 +265,10 @@ def get_mapped_node_names( def compute_mapped( - graph: Pipeline, key: type, index_names: Sequence[Hashable] | None = None + pipeline: Pipeline, key: type, index_names: Sequence[Hashable] | None = None ) -> pandas.Series: """ - Given a graph with key depending on mapped nodes, compute a series for the key. + Given a pipeline with key depending on mapped nodes, compute a series for the key. This is meant to be used in combination with :py:func:`Pipeline.map`. If the key depends on multiple indices, the series will be a multi-index series. @@ -277,7 +277,7 @@ def compute_mapped( Parameters ---------- - graph: + pipeline: The pipeline to compute the series from. key: The key to compute the series for. This key must depend on mapped nodes. @@ -291,6 +291,6 @@ def compute_mapped( : The computed series. """ - key_series = get_mapped_node_names(graph=graph, key=key, index_names=index_names) - results = graph.compute(key_series) + key_series = get_mapped_node_names(graph=pipeline, key=key, index_names=index_names) + results = pipeline.compute(key_series) return key_series.apply(lambda x: results[x]) From c88585bd3649af2b1dfea2a97109287e37431d23 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 21 Jun 2024 10:27:44 +0200 Subject: [PATCH 16/17] Avoid mixing "key" and "name" --- src/sciline/pipeline.py | 61 ++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index c5a5c376..424db3f4 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -202,28 +202,24 @@ def _repr_html_(self) -> str: def get_mapped_node_names( - graph: DataGraph, key: type, index_names: Sequence[Hashable] | None = None + graph: DataGraph, base_name: type, *, index_names: Sequence[Hashable] | None = None ) -> pandas.Series: """ - Given a graph with key depending on mapped nodes, return a series corresponding - mapped keys. + Given a graph with a mapped node with given base_name, return a series of + corresponding mapped names. - This is meant to be used in combination with :py:func:`Pipeline.map`. - Return all mapped keys for a base key. - - Given a graph with mapped nodes and a key to a node, this function returns - the keys of mapped nodes that correspond to the key. - This is meant to be used in combination with :py:func:`Pipeline.map`. - If the key depends on multiple indices, the series will be a multi-index series. + This is meant to be used in combination with :py:func:`DataGraph.map`. + If the mapped node depends on multiple indices, the index of the returned series + will have a multi-index. Note that Pandas is not a dependency of Sciline and must be installed separately. Parameters ---------- graph: - The data graph to get the mapped key names from. - key: - The key to get the mapped key names for. This key must depend on mapped nodes. + The data graph to get the mapped node names from. + base_name: + The base name of the mapped node to get the names for. index_names: Specifies the names of the indices of the mapped node. If not given this is inferred from the graph, but the argument may be required to disambiguate @@ -232,7 +228,7 @@ def get_mapped_node_names( Returns ------- : - The series of mapped key names. + The series of node names corresponding to the mapped node. """ import pandas as pd from cyclebane.graph import IndexValues, MappedNode, NodeName @@ -240,16 +236,18 @@ def get_mapped_node_names( candidates = [ node for node in graph._cbgraph.graph.nodes - if isinstance(node, MappedNode) and node.name == key + if isinstance(node, MappedNode) and node.name == base_name ] if index_names is not None: candidates = [ node for node in candidates if set(node.indices) == set(index_names) ] if len(candidates) == 0: - raise ValueError(f"'{key}' is not a mapped node.") + raise ValueError(f"'{base_name}' is not a mapped node.") if len(candidates) > 1: - raise ValueError(f"Multiple mapped nodes with name '{key}' found: {candidates}") + raise ValueError( + f"Multiple mapped nodes with name '{base_name}' found: {candidates}" + ) # Drops unrelated indices graph = graph[candidates[0]] # type: ignore[index] indices = graph._cbgraph.indices @@ -258,29 +256,34 @@ def get_mapped_node_names( index_names = tuple(indices) index = pd.MultiIndex.from_product(indices.values(), names=index_names) - keys = tuple(NodeName(key, IndexValues(index_names, idx)) for idx in index) + keys = tuple(NodeName(base_name, IndexValues(index_names, idx)) for idx in index) if index.nlevels == 1: # Avoid more complicated MultiIndex if unnecessary index = index.get_level_values(0) - return pd.Series(keys, index=index, name=key) + return pd.Series(keys, index=index, name=base_name) def compute_mapped( - pipeline: Pipeline, key: type, index_names: Sequence[Hashable] | None = None + pipeline: Pipeline, + base_name: type, + *, + index_names: Sequence[Hashable] | None = None, ) -> pandas.Series: """ - Given a pipeline with key depending on mapped nodes, compute a series for the key. + Given a graph with a mapped node with given base_name, return a series of computed + results. This is meant to be used in combination with :py:func:`Pipeline.map`. - If the key depends on multiple indices, the series will be a multi-index series. + If the mapped node depends on multiple indices, the index of the returned series + will have a multi-index. Note that Pandas is not a dependency of Sciline and must be installed separately. Parameters ---------- - pipeline: - The pipeline to compute the series from. - key: - The key to compute the series for. This key must depend on mapped nodes. + graph: + The data graph to get the mapped node names from. + base_name: + The base name of the mapped node to get the names for. index_names: Specifies the names of the indices of the mapped node. If not given this is inferred from the graph, but the argument may be required to disambiguate @@ -289,8 +292,10 @@ def compute_mapped( Returns ------- : - The computed series. + The series of computed results corresponding to the mapped node. """ - key_series = get_mapped_node_names(graph=pipeline, key=key, index_names=index_names) + key_series = get_mapped_node_names( + graph=pipeline, base_name=base_name, index_names=index_names + ) results = pipeline.compute(key_series) return key_series.apply(lambda x: results[x]) From f6d805878cf25a4c0936fe9b9aaa45d3e1a33868 Mon Sep 17 00:00:00 2001 From: Simon Heybrock Date: Fri, 21 Jun 2024 10:29:33 +0200 Subject: [PATCH 17/17] Move check --- src/sciline/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sciline/pipeline.py b/src/sciline/pipeline.py index 424db3f4..6b6ff3e8 100644 --- a/src/sciline/pipeline.py +++ b/src/sciline/pipeline.py @@ -238,12 +238,12 @@ def get_mapped_node_names( for node in graph._cbgraph.graph.nodes if isinstance(node, MappedNode) and node.name == base_name ] + if len(candidates) == 0: + raise ValueError(f"'{base_name}' is not a mapped node.") if index_names is not None: candidates = [ node for node in candidates if set(node.indices) == set(index_names) ] - if len(candidates) == 0: - raise ValueError(f"'{base_name}' is not a mapped node.") if len(candidates) > 1: raise ValueError( f"Multiple mapped nodes with name '{base_name}' found: {candidates}"