From fc552711dd4baaa4278fcb41f00aff46be7ec605 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 12 Jan 2025 23:36:07 +0100 Subject: [PATCH 1/8] Add "did you mean" function --- xarray/core/utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 11f9ee49ca2..34938072223 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -114,6 +114,31 @@ def wrapper(*args, **kwargs): return wrapper +def did_you_mean(word: Hashable, possibilities: Iterable[Hashable]) -> str: + """ + Get a suggested word. + + Examples + -------- + >>> did_you_mean("bluch", ("blech", "gray_r", 1, None, (2, 56))) + "Did you mean one of ('blech',)?" + >>> did_you_mean(1, ("blech", "gray_r", 1, None, (2, 56))) + 'Did you mean one of (1,)?' + """ + import difflib + + possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities} + + msg = "" + if len( + best_str := difflib.get_close_matches(str(word), list(possibilites_str.keys())) + ): + best = tuple(possibilites_str[k] for k in best_str) + msg = f"Did you mean one of {best}?" + + return msg + + def get_valid_numpy_dtype(array: np.ndarray | pd.Index) -> np.dtype: """Return a numpy compatible dtype from either a numpy array or a pandas.Index. From ffc645e07190e20a84c6af5fea368c67421c1091 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 12 Jan 2025 23:36:39 +0100 Subject: [PATCH 2/8] improve error for wrong key in dataset --- xarray/core/dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d6ffa7308a3..fb50bcf9976 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1611,6 +1611,11 @@ def __getitem__( return self._construct_dataarray(key) except KeyError as e: message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" + + best_guess = utils.did_you_mean(key, self.variables.keys()) + if best_guess: + message += f" {best_guess}" + # If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]` if isinstance(key, tuple): message += f"\nHint: use a list to select multiple variables, for example `ds[{list(key)}]`" From 22cf0da732bf684e38fce07c8daef027643802ed Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 13 Jan 2025 00:55:43 +0100 Subject: [PATCH 3/8] Prioritize best guess --- xarray/core/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fb50bcf9976..1e14e7565c8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1610,11 +1610,13 @@ def __getitem__( try: return self._construct_dataarray(key) except KeyError as e: - message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" + message = f"No variable named {key!r}." best_guess = utils.did_you_mean(key, self.variables.keys()) if best_guess: message += f" {best_guess}" + else: + message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" # If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]` if isinstance(key, tuple): From bc07c206aa715563a944feb1a0710d6ef68f6791 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 13 Jan 2025 00:57:45 +0100 Subject: [PATCH 4/8] increase number of valid suggestions to match previous idea --- xarray/core/utils.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 34938072223..c939a7b037e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -37,6 +37,7 @@ from __future__ import annotations import contextlib +import difflib import functools import importlib import inspect @@ -114,24 +115,39 @@ def wrapper(*args, **kwargs): return wrapper -def did_you_mean(word: Hashable, possibilities: Iterable[Hashable]) -> str: +def did_you_mean( + word: Hashable, possibilities: Iterable[Hashable], *, n: int = 10 +) -> str: """ - Get a suggested word. + Suggest a few correct words based on a list of possibilites + + Parameters + ---------- + word : Hashable + Word to compare to a list of possibilites. + possibilities : Iterable of Hashable + The iterable of Hashable that contains the correct values. + n : int, default: 10 + Maximum number of suggestions to show. Examples -------- >>> did_you_mean("bluch", ("blech", "gray_r", 1, None, (2, 56))) "Did you mean one of ('blech',)?" - >>> did_you_mean(1, ("blech", "gray_r", 1, None, (2, 56))) - 'Did you mean one of (1,)?' - """ - import difflib + >>> did_you_mean("none", ("blech", "gray_r", 1, None, (2, 56))) + 'Did you mean one of (None,)?' + See also + -------- + https://en.wikipedia.org/wiki/String_metric + """ possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities} msg = "" if len( - best_str := difflib.get_close_matches(str(word), list(possibilites_str.keys())) + best_str := difflib.get_close_matches( + str(word), list(possibilites_str.keys()), n=n + ) ): best = tuple(possibilites_str[k] for k in best_str) msg = f"Did you mean one of {best}?" From ed6d60065d2fbf577630d0bffa4ed0237e775a06 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 13 Jan 2025 01:05:41 +0100 Subject: [PATCH 5/8] Update dataset.py --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1e14e7565c8..a943d9bfc57 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1616,7 +1616,7 @@ def __getitem__( if best_guess: message += f" {best_guess}" else: - message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" + message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" # If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]` if isinstance(key, tuple): From f26f9d87a609940f4507b9accf2e2919c69f5c79 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 13 Jan 2025 01:15:56 +0100 Subject: [PATCH 6/8] Update utils.py --- xarray/core/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c939a7b037e..c3187b77722 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -141,6 +141,7 @@ def did_you_mean( -------- https://en.wikipedia.org/wiki/String_metric """ + # Convert all values to string, get_close_matches doesn't handle all hashables: possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities} msg = "" From a02987326dd1c6620bd73256cfab474638b2fa3f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 17 Jan 2025 18:21:45 +0100 Subject: [PATCH 7/8] Update whats-new.rst --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a19345b4ef6..7042d03d70a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,8 @@ v2025.01.2 (unreleased) New Features ~~~~~~~~~~~~ - +- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) + By `Jimmy Westling `_. Breaking changes ~~~~~~~~~~~~~~~~ From 86ce18b94703a6f2cbfa860878cbbd273df5b8f4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 17 Jan 2025 18:26:03 +0100 Subject: [PATCH 8/8] Update whats-new.rst --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0a13e331b22..fe698bc358b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,10 +50,10 @@ eventually be deprecated. New Features ~~~~~~~~~~~~ -- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) - By `Jimmy Westling `_. - Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). By `Kai Mühlbauer `_ and `Spencer Clark `_. +- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) + By `Jimmy Westling `_. Breaking changes ~~~~~~~~~~~~~~~~