Skip to content

Commit

Permalink
Suggest the correct name when no key matches in the dataset (#9943)
Browse files Browse the repository at this point in the history
* Add "did you mean" function

* improve error for wrong key in dataset

* Prioritize best guess

* increase number of valid suggestions to match previous idea

* Update dataset.py

* Update utils.py

* Update whats-new.rst

* Update whats-new.rst
  • Loading branch information
Illviljan authored Jan 17, 2025
1 parent 5761de6 commit 70997ef
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ New Features
~~~~~~~~~~~~
- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Spencer Clark <https://github.com/spencerkclark>`_.
- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`)
By `Jimmy Westling <https://github.com/illviljan>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
9 changes: 8 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1610,7 +1610,14 @@ def __getitem__(
try:
return self._construct_dataarray(key)
except KeyError as e:
message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
message = f"No variable named {key!r}."

best_guess = utils.did_you_mean(key, self.variables.keys())
if best_guess:
message += f" {best_guess}"
else:
message += f" Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"

# If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]`
if isinstance(key, tuple):
message += f"\nHint: use a list to select multiple variables, for example `ds[{list(key)}]`"
Expand Down
42 changes: 42 additions & 0 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from __future__ import annotations

import contextlib
import difflib
import functools
import importlib
import inspect
Expand Down Expand Up @@ -114,6 +115,47 @@ def wrapper(*args, **kwargs):
return wrapper


def did_you_mean(
word: Hashable, possibilities: Iterable[Hashable], *, n: int = 10
) -> str:
"""
Suggest a few correct words based on a list of possibilites
Parameters
----------
word : Hashable
Word to compare to a list of possibilites.
possibilities : Iterable of Hashable
The iterable of Hashable that contains the correct values.
n : int, default: 10
Maximum number of suggestions to show.
Examples
--------
>>> did_you_mean("bluch", ("blech", "gray_r", 1, None, (2, 56)))
"Did you mean one of ('blech',)?"
>>> did_you_mean("none", ("blech", "gray_r", 1, None, (2, 56)))
'Did you mean one of (None,)?'
See also
--------
https://en.wikipedia.org/wiki/String_metric
"""
# Convert all values to string, get_close_matches doesn't handle all hashables:
possibilites_str: dict[str, Hashable] = {str(k): k for k in possibilities}

msg = ""
if len(
best_str := difflib.get_close_matches(
str(word), list(possibilites_str.keys()), n=n
)
):
best = tuple(possibilites_str[k] for k in best_str)
msg = f"Did you mean one of {best}?"

return msg


def get_valid_numpy_dtype(array: np.ndarray | pd.Index) -> np.dtype:
"""Return a numpy compatible dtype from either
a numpy array or a pandas.Index.
Expand Down

0 comments on commit 70997ef

Please sign in to comment.