From 87e097275ce468db8f18c80ef8970310cfef1fe8 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Sat, 24 Dec 2022 12:02:36 -0500 Subject: [PATCH] Improve robustness to empty data in Plot and Nominal (#3202) * Improve robustness to missing data in Plot and Nominal * Fix Plot.on tests to work with new logic --- doc/whatsnew/v0.12.2.rst | 2 ++ seaborn/_core/plot.py | 7 +++---- seaborn/_core/scales.py | 2 +- tests/_core/test_plot.py | 9 +++++---- tests/_core/test_scales.py | 6 ++++++ 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/whatsnew/v0.12.2.rst b/doc/whatsnew/v0.12.2.rst index ad12a778e1..aa8def9a0b 100644 --- a/doc/whatsnew/v0.12.2.rst +++ b/doc/whatsnew/v0.12.2.rst @@ -8,6 +8,8 @@ v0.12.2 (Unreleased) - |Fix| Fixed a bug where legends for numeric variables with large values with be incorrectly shown (i.e. with a missing offset or exponent; :pr:`3187`). +- |Fix| Improve robustness to empty data in several components of the objects interface (:pr:`3202`). + - |Fix| Fixed a regression in v0.12.0 where manually-added labels could have duplicate legend entries (:pr:`3116`). - |Fix| Fixed a bug in :func:`histplot` with `kde=True` and `log_scale=True` where the curve was not scaled properly (:pr:`3173`). diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 8915616b90..64f59cb239 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -1466,8 +1466,6 @@ def _setup_split_generator( self, grouping_vars: list[str], df: DataFrame, subplots: list[dict[str, Any]], ) -> Callable[[], Generator]: - allow_empty = False # TODO will need to recreate previous categorical plots - grouping_keys = [] grouping_vars = [ v for v in grouping_vars if v in df and v not in ["col", "row"] @@ -1506,7 +1504,8 @@ def split_generator(keep_na=False) -> Generator: subplot_keys[dim] = view[dim] if not grouping_vars or not any(grouping_keys): - yield subplot_keys, axes_df.copy(), view["ax"] + if not axes_df.empty: + yield subplot_keys, axes_df.copy(), view["ax"] continue grouped_df = axes_df.groupby(grouping_vars, sort=False, as_index=False) @@ -1526,7 +1525,7 @@ def split_generator(keep_na=False) -> Generator: # case this option could be removed df_subset = axes_df.loc[[]] - if df_subset.empty and not allow_empty: + if df_subset.empty: continue sub_vars = dict(zip(grouping_vars, key)) diff --git a/seaborn/_core/scales.py b/seaborn/_core/scales.py index c91f6fdc46..6c9ecbc902 100644 --- a/seaborn/_core/scales.py +++ b/seaborn/_core/scales.py @@ -163,7 +163,7 @@ def _setup( new = new.label() # TODO flexibility over format() which isn't great for numbers / dates - stringify = np.vectorize(format) + stringify = np.vectorize(format, otypes=["object"]) units_seed = categorical_order(data, new.order) diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index 2bff6bed17..506739624e 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -680,8 +680,9 @@ def test_matplotlib_object_creation(self): def test_empty(self): m = MockMark() - Plot().plot() + Plot().add(m).plot() assert m.n_splits == 0 + assert not m.passed_data def test_no_orient_variance(self): @@ -1086,7 +1087,7 @@ def test_on_axes(self): ax = mpl.figure.Figure().subplots() m = MockMark() - p = Plot().on(ax).add(m).plot() + p = Plot([1], [2]).on(ax).add(m).plot() assert m.passed_axes == [ax] assert p._figure is ax.figure @@ -1095,7 +1096,7 @@ def test_on_figure(self, facet): f = mpl.figure.Figure() m = MockMark() - p = Plot().on(f).add(m) + p = Plot([1, 2], [3, 4]).on(f).add(m) if facet: p = p.facet(["a", "b"]) p = p.plot() @@ -1112,7 +1113,7 @@ def test_on_subfigure(self, facet): sf1, sf2 = mpl.figure.Figure().subfigures(2) sf1.subplots() m = MockMark() - p = Plot().on(sf2).add(m) + p = Plot([1, 2], [3, 4]).on(sf2).add(m) if facet: p = p.facet(["a", "b"]) p = p.plot() diff --git a/tests/_core/test_scales.py b/tests/_core/test_scales.py index c4b39f5d34..5baf53ceaf 100644 --- a/tests/_core/test_scales.py +++ b/tests/_core/test_scales.py @@ -555,6 +555,12 @@ class MockProperty(IntervalProperty): s = Nominal((2, 4))._setup(x, MockProperty()) assert_array_equal(s(x), [4, np.sqrt(10), 2, np.sqrt(10)]) + def test_empty_data(self): + + x = pd.Series([], dtype=object, name="x") + s = Nominal()._setup(x, Coordinate()) + assert_array_equal(s(x), []) + class TestTemporal: