diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 467ef536a08..fe698bc358b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,6 +52,8 @@ New Features ~~~~~~~~~~~~ - Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). By `Kai Mühlbauer `_ and `Spencer Clark `_. +- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) + By `Jimmy Westling `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d6ffa7308a3..a943d9bfc57 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1610,7 +1610,14 @@ def __getitem__( try: return self._construct_dataarray(key) except KeyError as e: - message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" + message = f"No variable named {key!r}." + + best_guess = utils.did_you_mean(key, self.variables.keys()) + if best_guess: + message += f" {best_guess}" + else: + message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}" + # If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]` if isinstance(key, tuple): message += f"\nHint: use a list to select multiple variables, for example `ds[{list(key)}]`" diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 11f9ee49ca2..c3187b77722 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -37,6 +37,7 @@ from __future__ import annotations import contextlib +import difflib import functools import importlib import inspect @@ -114,6 +115,47 @@ def wrapper(*args, **kwargs): return wrapper +def did_you_mean( + word: Hashable, possibilities: Iterable[Hashable], *, n: int = 10 +) -> str: + """ + Suggest a few correct words based on a list of possibilites + + Parameters + ---------- + word : Hashable + Word to compare to a list of possibilites. + possibilities : Iterable of Hashable + The iterable of Hashable that contains the correct values. + n : int, default: 10 + Maximum number of suggestions to show. + + Examples + -------- + >>> did_you_mean("bluch", ("blech", "gray_r", 1, None, (2, 56))) + "Did you mean one of ('blech',)?" + >>> did_you_mean("none", ("blech", "gray_r", 1, None, (2, 56))) + 'Did you mean one of (None,)?' + + See also + -------- + https://en.wikipedia.org/wiki/String_metric + """ + # Convert all values to string, get_close_matches doesn't handle all hashables: + possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities} + + msg = "" + if len( + best_str := difflib.get_close_matches( + str(word), list(possibilites_str.keys()), n=n + ) + ): + best = tuple(possibilites_str[k] for k in best_str) + msg = f"Did you mean one of {best}?" + + return msg + + def get_valid_numpy_dtype(array: np.ndarray | pd.Index) -> np.dtype: """Return a numpy compatible dtype from either a numpy array or a pandas.Index.