diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7f44550f93..9930b6187c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -16,7 +16,7 @@ If you think you've encountered a bug in seaborn, please report it on the [Githu - A clear explanation of why you think something is wrong - The specific versions of seaborn and matplotlib that you are working with -Bug reports are easiest to address if they can be demonstrated using one of the example datasets from the seaborn docs (i.e. with `seaborn.load_dataset`). Otherwise, it is preferable that your example generate synthetic data to reproduce the problem. If you can only demonstrate the issue with your actual dataset, you will need to share it, ideally as a csv. Note that you can upload a csv directly to a github issue thread, but it must have a `.txt` suffix. +Bug reports are easiest to address if they can be demonstrated using one of the example datasets from the seaborn docs (i.e. with `seaborn.load_dataset`). Otherwise, it is preferable that your example generate synthetic data to reproduce the problem. If you can only demonstrate the issue with your actual dataset, you will need to share it, ideally as a csv (do not share data as a pickle file). If you've encountered an error, searching the specific text of the message before opening a new issue can often help you solve the problem quickly and avoid making a duplicate report. diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 74efcb54f9..11ab24f978 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,15 +16,16 @@ env: NB_KERNEL: python MPLBACKEND: Agg SEABORN_DATA: ${{ github.workspace }}/seaborn-data + PYDEVD_DISABLE_FILE_VALIDATION: 1 jobs: build-docs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup Python 3.11 - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: python-version: "3.11" @@ -35,7 +36,8 @@ jobs: - name: Install pandoc run: | - sudo apt-get install pandoc + wget https://github.com/jgm/pandoc/releases/download/3.1.11/pandoc-3.1.11-1-amd64.deb + sudo dpkg -i pandoc-3.1.11-1-amd64.deb - name: Cache datasets run: | @@ -56,7 +58,7 @@ jobs: strategy: matrix: - python: ["3.8", "3.9", "3.10", "3.11"] + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] install: [full] deps: [latest] @@ -69,12 +71,13 @@ jobs: deps: latest steps: - - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: python-version: ${{ matrix.python }} + allow-prereleases: true - name: Install seaborn run: | @@ -97,10 +100,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: Setup Python - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 - name: Install tools run: pip install mypy flake8 diff --git a/LICENSE.md b/LICENSE.md index b5ebba6263..86f5ad0986 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -Copyright (c) 2012-2021, Michael L. Waskom +Copyright (c) 2012-2023, Michael L. Waskom All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index f642e553f1..97603ede54 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ seaborn: statistical data visualization ======================================= [![PyPI Version](https://img.shields.io/pypi/v/seaborn.svg)](https://pypi.org/project/seaborn/) -[![License](https://img.shields.io/pypi/l/seaborn.svg)](https://github.com/mwaskom/seaborn/blob/master/LICENSE) +[![License](https://img.shields.io/pypi/l/seaborn.svg)](https://github.com/mwaskom/seaborn/blob/master/LICENSE.md) [![DOI](https://joss.theoj.org/papers/10.21105/joss.03021/status.svg)](https://doi.org/10.21105/joss.03021) [![Tests](https://github.com/mwaskom/seaborn/workflows/CI/badge.svg)](https://github.com/mwaskom/seaborn/actions) [![Code Coverage](https://codecov.io/gh/mwaskom/seaborn/branch/master/graph/badge.svg)](https://codecov.io/gh/mwaskom/seaborn) diff --git a/doc/_docstrings/barplot.ipynb b/doc/_docstrings/barplot.ipynb index e130ec4afa..bb1e6d193c 100644 --- a/doc/_docstrings/barplot.ipynb +++ b/doc/_docstrings/barplot.ipynb @@ -22,7 +22,7 @@ "id": "b53b65b8-5670-4905-aa39-36db04f4b813", "metadata": {}, "source": [ - "With long data, assign `x` and `y` to group by a categorical varaible and plot aggregated values, with confidence intervals:" + "With long data, assign `x` and `y` to group by a categorical variable and plot aggregated values, with confidence intervals:" ] }, { diff --git a/doc/_docstrings/histplot.ipynb b/doc/_docstrings/histplot.ipynb index 79b66364d4..b448f7a65a 100644 --- a/doc/_docstrings/histplot.ipynb +++ b/doc/_docstrings/histplot.ipynb @@ -312,7 +312,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Step functions, esepcially when unfilled, make it easy to compare cumulative histograms:" + "Step functions, especially when unfilled, make it easy to compare cumulative histograms:" ] }, { diff --git a/doc/_docstrings/objects.Est.ipynb b/doc/_docstrings/objects.Est.ipynb index 3dcac462e5..94aacfa902 100644 --- a/doc/_docstrings/objects.Est.ipynb +++ b/doc/_docstrings/objects.Est.ipynb @@ -109,12 +109,30 @@ "p.add(so.Range(), so.Est(seed=0))" ] }, + { + "cell_type": "markdown", + "id": "df807ef8-b5fb-4eac-b539-1bd4e797ddc2", + "metadata": {}, + "source": [ + "To compute a weighted estimate (and confidence interval), assign a `weight` variable in the layer where you use the stat:" + ] + }, { "cell_type": "code", "execution_count": null, "id": "5e4a0594-e1ee-4f72-971e-3763dd626e8b", "metadata": {}, "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(), weight=\"price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d0c34d7-fb76-44cf-9079-3ec7f45741d0", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/doc/_docstrings/objects.Plot.layout.ipynb b/doc/_docstrings/objects.Plot.layout.ipynb index 755d6d3a28..021cf7296c 100644 --- a/doc/_docstrings/objects.Plot.layout.ipynb +++ b/doc/_docstrings/objects.Plot.layout.ipynb @@ -69,10 +69,28 @@ "p.facet([\"A\", \"B\"], [\"X\", \"Y\"]).layout(engine=\"constrained\")" ] }, + { + "cell_type": "markdown", + "id": "d61054d1-dcef-4e11-9802-394bcc633f9f", + "metadata": {}, + "source": [ + "With `extent`, you can control the size of the plot relative to the underlying figure. Because the notebook display adapts the figure background to the plot, this appears only to change the plot size in a notebook context. But it can be useful when saving or displaying through a `pyplot` GUI window:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b5d5969-2925-474f-8e3c-99e4f90a7a2b", + "metadata": {}, + "outputs": [], + "source": [ + "p.layout(extent=[0, 0, .8, 1]).show()" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "781ff58c-b805-4e93-8cae-be0442e273ea", + "id": "e5c41b7d-a064-4406-8571-a544b194f3dc", "metadata": {}, "outputs": [], "source": [] diff --git a/doc/_docstrings/pointplot.ipynb b/doc/_docstrings/pointplot.ipynb index ace676b20d..efa792215e 100644 --- a/doc/_docstrings/pointplot.ipynb +++ b/doc/_docstrings/pointplot.ipynb @@ -22,7 +22,7 @@ "id": "f25d3647-9fad-47b2-b49d-db6f5b5c3795", "metadata": {}, "source": [ - "Group by a categorical varaible and plot aggregated values, with confidence intervals:" + "Group by a categorical variable and plot aggregated values, with confidence intervals:" ] }, { @@ -138,7 +138,7 @@ "id": "00273ada-cd12-410a-a268-38243d6514ae", "metadata": {}, "source": [ - "Dodge by a specific amount, relative to the width alloted for each level:" + "Dodge by a specific amount, relative to the width allotted for each level:" ] }, { @@ -164,7 +164,7 @@ "id": "e205e7c8-6b11-44e6-b43f-7416c427215d", "metadata": {}, "source": [ - "When variables are not explicity assigned and the dataset is two-dimensional, the plot will aggregate over each column:" + "When variables are not explicitly assigned and the dataset is two-dimensional, the plot will aggregate over each column:" ] }, { diff --git a/doc/_docstrings/scatterplot.ipynb b/doc/_docstrings/scatterplot.ipynb index 973a67d690..4b78f8eeab 100644 --- a/doc/_docstrings/scatterplot.ipynb +++ b/doc/_docstrings/scatterplot.ipynb @@ -167,7 +167,7 @@ "cell_type": "raw", "metadata": {}, "source": [ - "Control the range of marker areas with ``sizes``, and set ``lengend=\"full\"`` to force every unique value to appear in the legend:" + "Control the range of marker areas with ``sizes``, and set ``legend=\"full\"`` to force every unique value to appear in the legend:" ] }, { diff --git a/doc/_tutorial/color_palettes.ipynb b/doc/_tutorial/color_palettes.ipynb index a480740205..79029f421f 100644 --- a/doc/_tutorial/color_palettes.ipynb +++ b/doc/_tutorial/color_palettes.ipynb @@ -975,7 +975,7 @@ "source": [ "As you can see, there are many options for using color in your visualizations. Seaborn tries both to use good defaults and to offer a lot of flexibility.\n", "\n", - "This discussion is only the beginning, and there are a number of good resources for learning more about techniques for using color in visualizations. One great example is this `series of blog posts `_ from the NASA Earth Observatory. The matplotlib docs also have a `nice tutorial `_ that illustrates some of the perceptual properties of their colormaps." + "This discussion is only the beginning, and there are a number of good resources for learning more about techniques for using color in visualizations. One great example is this `series of blog posts `_ from the NASA Earth Observatory. The matplotlib docs also have a `nice tutorial `_ that illustrates some of the perceptual properties of their colormaps." ] } ], diff --git a/doc/_tutorial/data_structure.ipynb b/doc/_tutorial/data_structure.ipynb index 2d66370fa3..a474477002 100644 --- a/doc/_tutorial/data_structure.ipynb +++ b/doc/_tutorial/data_structure.ipynb @@ -19,7 +19,7 @@ "As a data visualization library, seaborn requires that you provide it with data. This chapter explains the various ways to accomplish that task. Seaborn supports several different dataset formats, and most functions accept data represented with objects from the `pandas `_ or `numpy `_ libraries as well as built-in Python types like lists and dictionaries. Understanding the usage patterns associated with these different options will help you quickly create useful visualizations for nearly any dataset.\n", "\n", ".. note::\n", - " As of current writing (v0.13.0), the full breadth of options covered here are supported by most, but not all, of the functions in seaborn. Namely, a few older functinos (e.g., :func:`lmplot` and :func:`regplot`) anre more limited in what they accept." + " As of current writing (v0.13.0), the full breadth of options covered here are supported by most, but not all, of the functions in seaborn. Namely, a few older functions (e.g., :func:`lmplot` and :func:`regplot`) are more limited in what they accept." ] }, { diff --git a/doc/_tutorial/properties.ipynb b/doc/_tutorial/properties.ipynb index fd064763d1..913cb5ac08 100644 --- a/doc/_tutorial/properties.ipynb +++ b/doc/_tutorial/properties.ipynb @@ -742,7 +742,7 @@ "\n", "The `pointsize` property is relevant to dot marks and to line marks that can show markers at individual data points. The units correspond to the diameter of the mark in points.\n", "\n", - "The `pointsize` scales with the square root of the data by default so that magnitude is represented by diameter rather than area:" + "Note that, while the parameterization corresponds to diameter, scales will be applied with a square root transform so that data values are linearly proportional to area:" ] }, { diff --git a/doc/conf.py b/doc/conf.py index 81d2c1b9ff..467527f3c4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -88,7 +88,7 @@ """ # Define replacements (used in whatsnew bullets) -rst_epilog = """ +rst_epilog = r""" .. role:: raw-html(raw) :format: html diff --git a/doc/installing.rst b/doc/installing.rst index e1449d32ec..d28a65ee67 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -129,7 +129,7 @@ if you try to reproduce the issue in an example that uses only matplotlib, so that you can report it in the right place. But it is alright to skip this step if it's not obvious how to do it. -General support questions are more at home on either `stackoverflow +General support questions are more at home on `stackoverflow `_, where there is a larger audience of people who will see your post and may be able to offer assistance. Your chance of getting a quick answer will be higher if you include diff --git a/doc/whatsnew/index.rst b/doc/whatsnew/index.rst index 990321f495..5926f1b9e6 100644 --- a/doc/whatsnew/index.rst +++ b/doc/whatsnew/index.rst @@ -8,6 +8,7 @@ v0.13 .. toctree:: :maxdepth: 2 + v0.13.1 v0.13.0 v0.12 diff --git a/doc/whatsnew/v0.13.1.rst b/doc/whatsnew/v0.13.1.rst new file mode 100644 index 0000000000..f92c13f44e --- /dev/null +++ b/doc/whatsnew/v0.13.1.rst @@ -0,0 +1,22 @@ +v0.13.1 (December 2023) +----------------------- + +This is a minor release with some bug fixes and a couple new features. All users are encouraged to update. + +- |Feature| Added support for weighted mean estimation (with boostrap CIs) in :func:`lineplot`, :func:`barplot`, :func:`pointplot`, and :class:`objects.Est` (:pr:`3580`, :pr:`3586`). + +- |Feature| Added the `extent` option to :meth:`objects.Plot.layout` (:pr:`3552`). + +- |Fix| Fixed a regression in v0.13.0 that triggered an exception when working with non-numpy data types (:pr:`3516`). + +- |Fix| Fixed a bug in :class:`objects.Plot` so that tick labels are shown for wrapped axes that aren't in the bottom-most row (:pr:`3600`). + +- |Fix| Fixed a bug in :func:`catplot` where a blank legend would be added when `hue` was redundantly assigned (:pr:`3540`). + +- |Fix| Fixed a bug in :func:`catplot` where the `edgecolor` parameter was ignored with `kind="bar"` (:pr:`3547`). + +- |Fix| Fixed a bug in :func:`boxplot` where an exception was raised when using the matplotlib `bootstrap` option (:pr:`3562`). + +- |Fix| Fixed a bug in :func:`lineplot` where an exception was raised when `hue` was assigned with an empty dataframe (:pr:`3569`). + +- |Fix| Fixed a bug in multiple categorical plots that raised with `hue=None` and `dodge=True`; this is now has no effect (:pr:`3605`). diff --git a/pyproject.toml b/pyproject.toml index 531c3770c6..0f8e7cfd01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Visualization", "Topic :: Multimedia :: Graphics", @@ -25,7 +26,7 @@ requires-python = ">=3.8" dependencies = [ "numpy>=1.20,!=1.24.0", "pandas>=1.2", - "matplotlib>=3.3,!=3.6.1", + "matplotlib>=3.4,!=3.6.1", ] [project.optional-dependencies] diff --git a/seaborn/__init__.py b/seaborn/__init__.py index 63dfcf76b9..5a3d9eed10 100644 --- a/seaborn/__init__.py +++ b/seaborn/__init__.py @@ -18,4 +18,4 @@ _orig_rc_params = mpl.rcParams.copy() # Define the seaborn version -__version__ = "0.13.0" +__version__ = "0.13.1" diff --git a/seaborn/_base.py b/seaborn/_base.py index e312923f5d..e96954c07b 100644 --- a/seaborn/_base.py +++ b/seaborn/_base.py @@ -933,7 +933,8 @@ def iter_data( grouping_keys = [] for var in grouping_vars: - grouping_keys.append(levels.get(var, [])) + key = levels.get(var) + grouping_keys.append([] if key is None else key) iter_keys = itertools.product(*grouping_keys) if reverse: @@ -1159,11 +1160,7 @@ def _attach( # For categorical y, we want the "first" level to be at the top of the axis if self.var_types.get("y", None) == "categorical": for ax in ax_list: - try: - ax.yaxis.set_inverted(True) - except AttributeError: # mpl < 3.1 - if not ax.yaxis_inverted(): - ax.invert_yaxis() + ax.yaxis.set_inverted(True) # TODO -- Add axes labels @@ -1520,8 +1517,13 @@ def variable_type(vector, boolean_type="numeric"): warnings.simplefilter( action='ignore', category=(FutureWarning, DeprecationWarning) ) - if np.isin(vector, [0, 1]).all(): - return VariableType(boolean_type) + try: + if np.isin(vector, [0, 1]).all(): + return VariableType(boolean_type) + except TypeError: + # .isin comparison is not guaranteed to be possible under NumPy + # casting rules, depending on the (unknown) dtype of 'vector' + pass # Defer to positive pandas tests if pd.api.types.is_numeric_dtype(vector): diff --git a/seaborn/_compat.py b/seaborn/_compat.py index 05a4a5f2c2..5427b7c843 100644 --- a/seaborn/_compat.py +++ b/seaborn/_compat.py @@ -1,24 +1,12 @@ +from __future__ import annotations +from typing import Literal + import numpy as np import matplotlib as mpl +from matplotlib.figure import Figure from seaborn.utils import _version_predates -def MarkerStyle(marker=None, fillstyle=None): - """ - Allow MarkerStyle to accept a MarkerStyle object as parameter. - - Supports matplotlib < 3.3.0 - https://github.com/matplotlib/matplotlib/pull/16692 - - """ - if isinstance(marker, mpl.markers.MarkerStyle): - if fillstyle is None: - return marker - else: - marker = marker.get_marker() - return mpl.markers.MarkerStyle(marker, fillstyle) - - def norm_from_scale(scale, norm): """Produce a Normalize object given a Scale and min/max domain limits.""" # This is an internal maplotlib function that simplifies things to access @@ -67,66 +55,6 @@ def __call__(self, value, clip=None): return new_norm -def scale_factory(scale, axis, **kwargs): - """ - Backwards compatability for creation of independent scales. - - Matplotlib scales require an Axis object for instantiation on < 3.4. - But the axis is not used, aside from extraction of the axis_name in LogScale. - - """ - modify_transform = False - if _version_predates(mpl, "3.4"): - if axis[0] in "xy": - modify_transform = True - axis = axis[0] - base = kwargs.pop("base", None) - if base is not None: - kwargs[f"base{axis}"] = base - nonpos = kwargs.pop("nonpositive", None) - if nonpos is not None: - kwargs[f"nonpos{axis}"] = nonpos - - if isinstance(scale, str): - class Axis: - axis_name = axis - axis = Axis() - - scale = mpl.scale.scale_factory(scale, axis, **kwargs) - - if modify_transform: - transform = scale.get_transform() - transform.base = kwargs.get("base", 10) - if kwargs.get("nonpositive") == "mask": - # Setting a private attribute, but we only get here - # on an old matplotlib, so this won't break going forwards - transform._clip = False - - return scale - - -def set_scale_obj(ax, axis, scale): - """Handle backwards compatability with setting matplotlib scale.""" - if _version_predates(mpl, "3.4"): - # The ability to pass a BaseScale instance to Axes.set_{}scale was added - # to matplotlib in version 3.4.0: GH: matplotlib/matplotlib/pull/19089 - # Workaround: use the scale name, which is restrictive only if the user - # wants to define a custom scale; they'll need to update the registry too. - if scale.name is None: - # Hack to support our custom Formatter-less CatScale - return - method = getattr(ax, f"set_{axis}scale") - kws = {} - if scale.name == "function": - trans = scale.get_transform() - kws["functions"] = (trans._forward, trans._inverse) - method(scale.name, **kws) - axis_obj = getattr(ax, f"{axis}axis") - scale.set_default_locators_and_formatters(axis_obj) - else: - ax.set(**{f"{axis}scale": scale}) - - def get_colormap(name): """Handle changes to matplotlib colormap interface in 3.6.""" try: @@ -144,19 +72,31 @@ def register_colormap(name, cmap): mpl.cm.register_cmap(name, cmap) -def set_layout_engine(fig, engine): +def set_layout_engine( + fig: Figure, + engine: Literal["constrained", "compressed", "tight", "none"], +) -> None: """Handle changes to auto layout engine interface in 3.6""" if hasattr(fig, "set_layout_engine"): fig.set_layout_engine(engine) else: # _version_predates(mpl, 3.6) if engine == "tight": - fig.set_tight_layout(True) + fig.set_tight_layout(True) # type: ignore # predates typing elif engine == "constrained": - fig.set_constrained_layout(True) + fig.set_constrained_layout(True) # type: ignore elif engine == "none": - fig.set_tight_layout(False) - fig.set_constrained_layout(False) + fig.set_tight_layout(False) # type: ignore + fig.set_constrained_layout(False) # type: ignore + + +def get_layout_engine(fig: Figure) -> mpl.layout_engine.LayoutEngine | None: + """Handle changes to auto layout engine interface in 3.6""" + if hasattr(fig, "get_layout_engine"): + return fig.get_layout_engine() + else: + # _version_predates(mpl, 3.6) + return None def share_axis(ax0, ax1, which): diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index d92b6ecb59..c432056baf 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -27,7 +27,7 @@ from seaborn._stats.base import Stat from seaborn._core.data import PlotData from seaborn._core.moves import Move -from seaborn._core.scales import Scale, Nominal +from seaborn._core.scales import Scale from seaborn._core.subplots import Subplots from seaborn._core.groupby import GroupBy from seaborn._core.properties import PROPERTIES, Property @@ -40,10 +40,9 @@ ) from seaborn._core.exceptions import PlotSpecError from seaborn._core.rules import categorical_order -from seaborn._compat import set_scale_obj, set_layout_engine +from seaborn._compat import get_layout_engine, set_layout_engine from seaborn.rcmod import axes_style, plotting_context from seaborn.palettes import color_palette -from seaborn.utils import _version_predates from typing import TYPE_CHECKING, TypedDict if TYPE_CHECKING: @@ -462,16 +461,12 @@ def on(self, target: Axes | SubFigure | Figure) -> Plot: """ accepted_types: tuple # Allow tuple of various length - if hasattr(mpl.figure, "SubFigure"): # Added in mpl 3.4 - accepted_types = ( - mpl.axes.Axes, mpl.figure.SubFigure, mpl.figure.Figure - ) - accepted_types_str = ( - f"{mpl.axes.Axes}, {mpl.figure.SubFigure}, or {mpl.figure.Figure}" - ) - else: - accepted_types = mpl.axes.Axes, mpl.figure.Figure - accepted_types_str = f"{mpl.axes.Axes} or {mpl.figure.Figure}" + accepted_types = ( + mpl.axes.Axes, mpl.figure.SubFigure, mpl.figure.Figure + ) + accepted_types_str = ( + f"{mpl.axes.Axes}, {mpl.figure.SubFigure}, or {mpl.figure.Figure}" + ) if not isinstance(target, accepted_types): err = ( @@ -815,6 +810,7 @@ def layout( *, size: tuple[float, float] | Default = default, engine: str | None | Default = default, + extent: tuple[float, float, float, float] | Default = default, ) -> Plot: """ Control the figure size and layout. @@ -830,9 +826,14 @@ def layout( size : (width, height) Size of the resulting figure, in inches. Size is inclusive of legend when using pyplot, but not otherwise. - engine : {{"tight", "constrained", None}} + engine : {{"tight", "constrained", "none"}} Name of method for automatically adjusting the layout to remove overlap. The default depends on whether :meth:`Plot.on` is used. + extent : (left, bottom, right, top) + Boundaries of the plot layout, in fractions of the figure size. Takes + effect through the layout engine; exact results will vary across engines. + Note: the extent includes axis decorations when using a layout engine, + but it is exclusive of them when `engine="none"`. Examples -------- @@ -850,12 +851,14 @@ def layout( new._figure_spec["figsize"] = size if engine is not default: new._layout_spec["engine"] = engine + if extent is not default: + new._layout_spec["extent"] = extent return new # TODO def legend (ugh) - def theme(self, *args: dict[str, Any]) -> Plot: + def theme(self, config: dict[str, Any], /) -> Plot: """ Control the appearance of elements in the plot. @@ -877,13 +880,7 @@ def theme(self, *args: dict[str, Any]) -> Plot: """ new = self._clone() - # We can skip this whole block on Python 3.8+ with positional-only syntax - nargs = len(args) - if nargs != 1: - err = f"theme() takes 1 positional argument, but {nargs} were given" - raise TypeError(err) - - rc = mpl.RcParams(args[0]) + rc = mpl.RcParams(config) new._theme.update(rc) return new @@ -1174,6 +1171,8 @@ def _setup_figure(self, p: Plot, common: PlotData, layers: list[Layer]) -> None: ) ) for group in ("major", "minor"): + side = {"x": "bottom", "y": "left"}[axis] + axis_obj.set_tick_params(**{f"label{side}": show_tick_labels}) for t in getattr(axis_obj, f"get_{group}ticklabels")(): t.set_visible(show_tick_labels) @@ -1369,19 +1368,6 @@ def _setup_scales( share_state = self._subplots.subplot_spec[f"share{axis}"] subplots = [view for view in self._subplots if view[axis] == coord] - # Shared categorical axes are broken on matplotlib<3.4.0. - # https://github.com/matplotlib/matplotlib/pull/18308 - # This only affects us when sharing *paired* axes. This is a novel/niche - # behavior, so we will raise rather than hack together a workaround. - if axis is not None and _version_predates(mpl, "3.4"): - paired_axis = axis in p._pair_spec.get("structure", {}) - cat_scale = isinstance(scale, Nominal) - ok_dim = {"x": "col", "y": "row"}[axis] - shared_axes = share_state not in [False, "none", ok_dim] - if paired_axis and cat_scale and shared_axes: - err = "Sharing paired categorical axes requires matplotlib>=3.4.0" - raise RuntimeError(err) - if scale is None: self._scales[var] = Scale._identity() else: @@ -1407,7 +1393,7 @@ def _setup_scales( axis_obj = getattr(view["ax"], f"{axis}axis") seed_values = self._get_subplot_data(var_df, var, view, share_state) view_scale = scale._setup(seed_values, prop, axis=axis_obj) - set_scale_obj(view["ax"], axis, view_scale._matplotlib_scale) + view["ax"].set(**{f"{axis}scale": view_scale._matplotlib_scale}) for layer, new_series in zip(layers, transformed_data): layer_df = layer["data"].frame @@ -1811,12 +1797,32 @@ def _finalize_figure(self, p: Plot) -> None: if axis_key in self._scales: # TODO when would it not be? self._scales[axis_key]._finalize(p, axis_obj) - if (engine := p._layout_spec.get("engine", default)) is not default: + if (engine_name := p._layout_spec.get("engine", default)) is not default: # None is a valid arg for Figure.set_layout_engine, hence `default` - set_layout_engine(self._figure, engine) + set_layout_engine(self._figure, engine_name) elif p._target is None: # Don't modify the layout engine if the user supplied their own # matplotlib figure and didn't specify an engine through Plot # TODO switch default to "constrained"? # TODO either way, make configurable set_layout_engine(self._figure, "tight") + + if (extent := p._layout_spec.get("extent")) is not None: + engine = get_layout_engine(self._figure) + if engine is None: + self._figure.subplots_adjust(*extent) + else: + # Note the different parameterization for the layout engine rect... + left, bottom, right, top = extent + width, height = right - left, top - bottom + try: + # The base LayoutEngine.set method doesn't have rect= so we need + # to avoid typechecking this statement. We also catch a TypeError + # as a plugin LayoutEngine may not support it either. + # Alternatively we could guard this with a check on the engine type, + # but that would make later-developed engines would un-useable. + engine.set(rect=[left, bottom, width, height]) # type: ignore + except TypeError: + # Should we warn / raise? Note that we don't expect to get here + # under any normal circumstances. + pass diff --git a/seaborn/_core/properties.py b/seaborn/_core/properties.py index 8658fd22c0..4e2df91b49 100644 --- a/seaborn/_core/properties.py +++ b/seaborn/_core/properties.py @@ -3,25 +3,20 @@ import warnings import numpy as np +from numpy.typing import ArrayLike from pandas import Series import matplotlib as mpl from matplotlib.colors import to_rgb, to_rgba, to_rgba_array +from matplotlib.markers import MarkerStyle from matplotlib.path import Path from seaborn._core.scales import Scale, Boolean, Continuous, Nominal, Temporal from seaborn._core.rules import categorical_order, variable_type -from seaborn._compat import MarkerStyle from seaborn.palettes import QUAL_PALETTES, color_palette, blend_palette from seaborn.utils import get_color_cycle from typing import Any, Callable, Tuple, List, Union, Optional -try: - from numpy.typing import ArrayLike -except ImportError: - # numpy<1.20.0 (Jan 2021) - ArrayLike = Any - RGBTuple = Tuple[float, float, float] RGBATuple = Tuple[float, float, float, float] ColorSpec = Union[RGBTuple, RGBATuple, str] diff --git a/seaborn/_core/rules.py b/seaborn/_core/rules.py index 5057221f65..de6c651d97 100644 --- a/seaborn/_core/rules.py +++ b/seaborn/_core/rules.py @@ -97,7 +97,12 @@ def variable_type( boolean_dtypes = ["bool"] boolean_vector = vector.dtype in boolean_dtypes else: - boolean_vector = bool(np.isin(vector, [0, 1]).all()) + try: + boolean_vector = bool(np.isin(vector, [0, 1]).all()) + except TypeError: + # .isin comparison is not guaranteed to be possible under NumPy + # casting rules, depending on the (unknown) dtype of 'vector' + boolean_vector = False if boolean_vector: return VarType(boolean_type) diff --git a/seaborn/_core/scales.py b/seaborn/_core/scales.py index 8c597e126e..1e7bef8a5d 100644 --- a/seaborn/_core/scales.py +++ b/seaborn/_core/scales.py @@ -278,8 +278,6 @@ def _setup( # major_formatter = new._get_formatter(major_locator, **new._label_params) class CatScale(mpl.scale.LinearScale): - name = None # To work around mpl<3.4 compat issues - def set_default_locators_and_formatters(self, axis): ... # axis.set_major_locator(major_locator) diff --git a/seaborn/_core/subplots.py b/seaborn/_core/subplots.py index 83b8e136ad..287f441670 100644 --- a/seaborn/_core/subplots.py +++ b/seaborn/_core/subplots.py @@ -144,7 +144,7 @@ def init_figure( pair_spec: PairSpec, pyplot: bool = False, figure_kws: dict | None = None, - target: Axes | Figure | SubFigure = None, + target: Axes | Figure | SubFigure | None = None, ) -> Figure: """Initialize matplotlib objects and add seaborn-relevant metadata.""" # TODO reduce need to pass pair_spec here? @@ -158,11 +158,8 @@ def init_figure( err = " ".join([ "Cannot create multiple subplots after calling `Plot.on` with", f"a {mpl.axes.Axes} object.", + f" You may want to use a {mpl.figure.SubFigure} instead.", ]) - try: - err += f" You may want to use a {mpl.figure.SubFigure} instead." - except AttributeError: # SubFigure added in mpl 3.4 - pass raise RuntimeError(err) self._subplot_list = [{ @@ -179,10 +176,7 @@ def init_figure( self._figure = target.figure return self._figure - elif ( - hasattr(mpl.figure, "SubFigure") # Added in mpl 3.4 - and isinstance(target, mpl.figure.SubFigure) - ): + elif isinstance(target, mpl.figure.SubFigure): figure = target.figure elif isinstance(target, mpl.figure.Figure): figure = target diff --git a/seaborn/_marks/bar.py b/seaborn/_marks/bar.py index 4b1c072999..2aed6830a6 100644 --- a/seaborn/_marks/bar.py +++ b/seaborn/_marks/bar.py @@ -16,7 +16,6 @@ resolve_color, document_properties ) -from seaborn.utils import _version_predates from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -170,11 +169,8 @@ def _plot(self, split_gen, scales, orient): ax.add_patch(bar) # Add a container which is useful for, e.g. Axes.bar_label - if _version_predates(mpl, "3.4"): - container_kws = {} - else: - orientation = {"x": "vertical", "y": "horizontal"}[orient] - container_kws = dict(datavalues=vals, orientation=orientation) + orientation = {"x": "vertical", "y": "horizontal"}[orient] + container_kws = dict(datavalues=vals, orientation=orientation) container = mpl.container.BarContainer(bars, **container_kws) ax.add_container(container) diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py index c2f01ce7b5..40346b0269 100644 --- a/seaborn/_statistics.py +++ b/seaborn/_statistics.py @@ -25,6 +25,7 @@ class instantiation. """ from numbers import Number +from statistics import NormalDist import numpy as np import pandas as pd try: @@ -35,7 +36,7 @@ class instantiation. _no_scipy = True from .algorithms import bootstrap -from .utils import _check_argument, _normal_quantile_func +from .utils import _check_argument class KDE: @@ -466,7 +467,8 @@ def __init__(self, estimator, errorbar=None, **boot_kws): errorbar : string, (string, number) tuple, or callable Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple with a method name and a level parameter, or a function that maps from a - vector to a (min, max) interval. + vector to a (min, max) interval, or None to hide errorbar. See the + :doc:`errorbar tutorial ` for more information. boot_kws Additional keywords are passed to bootstrap when error_method is "ci". @@ -518,6 +520,62 @@ def __call__(self, data, var): return pd.Series({var: estimate, f"{var}min": err_min, f"{var}max": err_max}) +class WeightedAggregator: + + def __init__(self, estimator, errorbar=None, **boot_kws): + """ + Data aggregator that produces a weighted estimate and error bar interval. + + Parameters + ---------- + estimator : string + Function (or method name) that maps a vector to a scalar. Currently + supports only "mean". + errorbar : string or (string, number) tuple + Name of errorbar method or a tuple with a method name and a level parameter. + Currently the only supported method is "ci". + boot_kws + Additional keywords are passed to bootstrap when error_method is "ci". + + """ + if estimator != "mean": + # Note that, while other weighted estimators may make sense (e.g. median), + # I'm not aware of an implementation in our dependencies. We can add one + # in seaborn later, if there is sufficient interest. For now, limit to mean. + raise ValueError(f"Weighted estimator must be 'mean', not {estimator!r}.") + self.estimator = estimator + + method, level = _validate_errorbar_arg(errorbar) + if method is not None and method != "ci": + # As with the estimator, weighted 'sd' or 'pi' error bars may make sense. + # But we'll keep things simple for now and limit to (bootstrap) CI. + raise ValueError(f"Error bar method must be 'ci', not {method!r}.") + self.error_method = method + self.error_level = level + + self.boot_kws = boot_kws + + def __call__(self, data, var): + """Aggregate over `var` column of `data` with estimate and error interval.""" + vals = data[var] + weights = data["weight"] + + estimate = np.average(vals, weights=weights) + + if self.error_method == "ci" and len(data) > 1: + + def error_func(x, w): + return np.average(x, weights=w) + + boots = bootstrap(vals, weights, func=error_func, **self.boot_kws) + err_min, err_max = _percentile_interval(boots, self.error_level) + + else: + err_min = err_max = np.nan + + return pd.Series({var: estimate, f"{var}min": err_min, f"{var}max": err_max}) + + class LetterValues: def __init__(self, k_depth, outlier_prop, trust_alpha): @@ -570,7 +628,8 @@ def _compute_k(self, n): elif self.k_depth == "proportion": k = int(np.log2(n)) - int(np.log2(n * self.outlier_prop)) + 1 elif self.k_depth == "trustworthy": - point_conf = 2 * _normal_quantile_func(1 - self.trust_alpha / 2) ** 2 + normal_quantile_func = np.vectorize(NormalDist().inv_cdf) + point_conf = 2 * normal_quantile_func(1 - self.trust_alpha / 2) ** 2 k = int(np.log2(n / point_conf)) + 1 else: # Allow having k directly specified as input diff --git a/seaborn/_stats/aggregation.py b/seaborn/_stats/aggregation.py index d175273e78..7e7d60212a 100644 --- a/seaborn/_stats/aggregation.py +++ b/seaborn/_stats/aggregation.py @@ -8,7 +8,10 @@ from seaborn._core.scales import Scale from seaborn._core.groupby import GroupBy from seaborn._stats.base import Stat -from seaborn._statistics import EstimateAggregator +from seaborn._statistics import ( + EstimateAggregator, + WeightedAggregator, +) from seaborn._core.typing import Vector @@ -54,8 +57,14 @@ class Est(Stat): """ Calculate a point estimate and error bar interval. - For additional information about the various `errorbar` choices, see - the :doc:`errorbar tutorial `. + For more information about the various `errorbar` choices, see the + :doc:`errorbar tutorial `. + + Additional variables: + + - **weight**: When passed to a layer that uses this stat, a weighted estimate + will be computed. Note that use of weights currently limits the choice of + function and error bar method to `"mean"` and `"ci"`, respectively. Parameters ---------- @@ -95,7 +104,10 @@ def __call__( ) -> DataFrame: boot_kws = {"n_boot": self.n_boot, "seed": self.seed} - engine = EstimateAggregator(self.func, self.errorbar, **boot_kws) + if "weight" in data: + engine = WeightedAggregator(self.func, self.errorbar, **boot_kws) + else: + engine = EstimateAggregator(self.func, self.errorbar, **boot_kws) var = {"x": "y", "y": "x"}[orient] res = ( diff --git a/seaborn/categorical.py b/seaborn/categorical.py index bb40d5d392..99a813b2a7 100644 --- a/seaborn/categorical.py +++ b/seaborn/categorical.py @@ -8,7 +8,9 @@ import pandas as pd import matplotlib as mpl +from matplotlib.cbook import normalize_kwargs from matplotlib.collections import PatchCollection +from matplotlib.markers import MarkerStyle from matplotlib.patches import Rectangle import matplotlib.pyplot as plt @@ -23,12 +25,14 @@ _default_color, _get_patch_legend_artist, _get_transform_functions, - _normalize_kwargs, _scatter_legend_artist, _version_predates, ) -from seaborn._compat import MarkerStyle -from seaborn._statistics import EstimateAggregator, LetterValues +from seaborn._statistics import ( + EstimateAggregator, + LetterValues, + WeightedAggregator, +) from seaborn.palettes import light_palette from seaborn.axisgrid import FacetGrid, _facet_docs @@ -388,6 +392,11 @@ def _dodge_needed(self): def _dodge(self, keys, data): """Apply a dodge transform to coordinates in place.""" + if "hue" not in self.variables: + # Short-circuit if hue variable was not assigned + # We could potentially warn when hue=None, dodge=True, user may be confused + # But I think it's fine to just treat it as a no-op. + return hue_idx = self._hue_map.levels.index(keys["hue"]) n = len(self._hue_map.levels) data["width"] /= n @@ -409,12 +418,10 @@ def _invert_scale(self, ax, data, vars=("x", "y")): data[col] = inv(data[col]) def _configure_legend(self, ax, func, common_kws=None, semantic_kws=None): - if self.legend == "auto": show_legend = not self._redundant_hue and self.input_format != "wide" else: show_legend = bool(self.legend) - if show_legend: self.add_legend_data(ax, func, common_kws, semantic_kws=semantic_kws) handles, _ = ax.get_legend_handles_labels() @@ -598,11 +605,11 @@ def plot_boxes( value_var = {"x": "y", "y": "x"}[self.orient] def get_props(element, artist=mpl.lines.Line2D): - return _normalize_kwargs(plot_kws.pop(f"{element}props", {}), artist) + return normalize_kwargs(plot_kws.pop(f"{element}props", {}), artist) if not fill and linewidth is None: linewidth = mpl.rcParams["lines.linewidth"] - + bootstrap = plot_kws.pop("bootstrap", mpl.rcParams["boxplot.bootstrap"]) plot_kws.setdefault("shownotches", plot_kws.pop("notch", False)) box_artist = mpl.patches.Rectangle if fill else mpl.lines.Line2D @@ -628,7 +635,8 @@ def get_props(element, artist=mpl.lines.Line2D): grouped = sub_data.groupby(self.orient)[value_var] value_data = [x.to_numpy() for _, x in grouped] - stats = pd.DataFrame(mpl.cbook.boxplot_stats(value_data, whis=whis)) + stats = pd.DataFrame(mpl.cbook.boxplot_stats(value_data, whis=whis, + bootstrap=bootstrap)) positions = grouped.grouper.result_index.to_numpy(dtype=float) orig_width = width * self._native_width @@ -1167,7 +1175,7 @@ def plot_points( agg_var = {"x": "y", "y": "x"}[self.orient] iter_vars = ["hue"] - plot_kws = _normalize_kwargs(plot_kws, mpl.lines.Line2D) + plot_kws = normalize_kwargs(plot_kws, mpl.lines.Line2D) plot_kws.setdefault("linewidth", mpl.rcParams["lines.linewidth"] * 1.8) plot_kws.setdefault("markeredgewidth", plot_kws["linewidth"] * 0.75) plot_kws.setdefault("markersize", plot_kws["linewidth"] * np.sqrt(2 * np.pi)) @@ -1381,16 +1389,22 @@ class _CategoricalAggPlotter(_CategoricalPlotter): errorbar : string, (string, number) tuple, callable or None Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple with a method name and a level parameter, or a function that maps from a - vector to a (min, max) interval, or None to hide errorbar. + vector to a (min, max) interval, or None to hide errorbar. See the + :doc:`errorbar tutorial ` for more information. .. versionadded:: v0.12.0 n_boot : int Number of bootstrap samples used to compute confidence intervals. + seed : int, `numpy.random.Generator`, or `numpy.random.RandomState` + Seed or random number generator for reproducible bootstrapping. units : name of variable in `data` or vector data Identifier of sampling units; used by the errorbar function to perform a multilevel bootstrap and account for repeated measures - seed : int, `numpy.random.Generator`, or `numpy.random.RandomState` - Seed or random number generator for reproducible bootstrapping.\ + weights : name of variable in `data` or vector data + Data values or column used to compute weighted statistics. + Note that the use of weights may limit other statistical options. + + .. versionadded:: v0.13.1\ """), ci=dedent("""\ ci : float @@ -2309,10 +2323,10 @@ def swarmplot( def barplot( data=None, *, x=None, y=None, hue=None, order=None, hue_order=None, - estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None, - orient=None, color=None, palette=None, saturation=.75, fill=True, hue_norm=None, - width=.8, dodge="auto", gap=0, log_scale=None, native_scale=False, formatter=None, - legend="auto", capsize=0, err_kws=None, + estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None, + weights=None, orient=None, color=None, palette=None, saturation=.75, + fill=True, hue_norm=None, width=.8, dodge="auto", gap=0, log_scale=None, + native_scale=False, formatter=None, legend="auto", capsize=0, err_kws=None, ci=deprecated, errcolor=deprecated, errwidth=deprecated, ax=None, **kwargs, ): @@ -2325,7 +2339,7 @@ def barplot( p = _CategoricalAggPlotter( data=data, - variables=dict(x=x, y=y, hue=hue, units=units), + variables=dict(x=x, y=y, hue=hue, units=units, weight=weights), order=order, orient=orient, color=color, @@ -2355,8 +2369,9 @@ def barplot( p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation) color = _default_color(ax.bar, hue, color, kwargs, saturation=saturation) - aggregator = EstimateAggregator(estimator, errorbar, n_boot=n_boot, seed=seed) - err_kws = {} if err_kws is None else _normalize_kwargs(err_kws, mpl.lines.Line2D) + agg_cls = WeightedAggregator if "weight" in p.plot_data else EstimateAggregator + aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed) + err_kws = {} if err_kws is None else normalize_kwargs(err_kws, mpl.lines.Line2D) # Deprecations to remove in v0.15.0. err_kws, capsize = p._err_kws_backcompat(err_kws, errcolor, errwidth, capsize) @@ -2450,20 +2465,19 @@ def barplot( def pointplot( data=None, *, x=None, y=None, hue=None, order=None, hue_order=None, - estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None, - color=None, palette=None, hue_norm=None, markers=default, linestyles=default, - dodge=False, log_scale=None, native_scale=False, orient=None, capsize=0, - formatter=None, legend="auto", err_kws=None, + estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None, + weights=None, color=None, palette=None, hue_norm=None, markers=default, + linestyles=default, dodge=False, log_scale=None, native_scale=False, + orient=None, capsize=0, formatter=None, legend="auto", err_kws=None, ci=deprecated, errwidth=deprecated, join=deprecated, scale=deprecated, - ax=None, - **kwargs, + ax=None, **kwargs, ): errorbar = utils._deprecate_ci(errorbar, ci) p = _CategoricalAggPlotter( data=data, - variables=dict(x=x, y=y, hue=hue, units=units), + variables=dict(x=x, y=y, hue=hue, units=units, weight=weights), order=order, orient=orient, # Handle special backwards compatibility where pointplot originally @@ -2490,8 +2504,9 @@ def pointplot( p.map_hue(palette=palette, order=hue_order, norm=hue_norm) color = _default_color(ax.plot, hue, color, kwargs) - aggregator = EstimateAggregator(estimator, errorbar, n_boot=n_boot, seed=seed) - err_kws = {} if err_kws is None else _normalize_kwargs(err_kws, mpl.lines.Line2D) + agg_cls = WeightedAggregator if "weight" in p.plot_data else EstimateAggregator + aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed) + err_kws = {} if err_kws is None else normalize_kwargs(err_kws, mpl.lines.Line2D) # Deprecations to remove in v0.15.0. p._point_kwargs_backcompat(scale, join, kwargs) @@ -2730,12 +2745,12 @@ def countplot( def catplot( data=None, *, x=None, y=None, hue=None, row=None, col=None, kind="strip", - estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None, - order=None, hue_order=None, row_order=None, col_order=None, col_wrap=None, - height=5, aspect=1, log_scale=None, native_scale=False, formatter=None, - orient=None, color=None, palette=None, hue_norm=None, legend="auto", - legend_out=True, sharex=True, sharey=True, margin_titles=False, facet_kws=None, - ci=deprecated, **kwargs + estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None, + weights=None, order=None, hue_order=None, row_order=None, col_order=None, + col_wrap=None, height=5, aspect=1, log_scale=None, native_scale=False, + formatter=None, orient=None, color=None, palette=None, hue_norm=None, + legend="auto", legend_out=True, sharex=True, sharey=True, + margin_titles=False, facet_kws=None, ci=deprecated, **kwargs ): # Check for attempt to plot onto specific axes and warn @@ -2765,7 +2780,9 @@ def catplot( p = Plotter( data=data, - variables=dict(x=x, y=y, hue=hue, row=row, col=col, units=units), + variables=dict( + x=x, y=y, hue=hue, row=row, col=col, units=units, weight=weights + ), order=order, orient=orient, # Handle special backwards compatibility where pointplot originally @@ -2830,18 +2847,29 @@ def catplot( if saturation < 1: color = desaturate(color, saturation) - edgecolor = p._complement_color(kwargs.pop("edgecolor", default), color, p._hue_map) + if kind in ["strip", "swarm"]: + kwargs = normalize_kwargs(kwargs, mpl.collections.PathCollection) + kwargs["edgecolor"] = p._complement_color( + kwargs.pop("edgecolor", default), color, p._hue_map + ) width = kwargs.pop("width", 0.8) dodge = kwargs.pop("dodge", False if kind in undodged_kinds else "auto") if dodge == "auto": dodge = p._dodge_needed() + if "weight" in p.plot_data: + if kind not in ["bar", "point"]: + msg = f"The `weights` parameter has no effect with kind={kind!r}." + warnings.warn(msg, stacklevel=2) + agg_cls = WeightedAggregator + else: + agg_cls = EstimateAggregator + if kind == "strip": jitter = kwargs.pop("jitter", True) plot_kws = kwargs.copy() - plot_kws["edgecolor"] = edgecolor plot_kws.setdefault("zorder", 3) plot_kws.setdefault("linewidth", 0) if "s" not in plot_kws: @@ -2858,7 +2886,6 @@ def catplot( warn_thresh = kwargs.pop("warn_thresh", .05) plot_kws = kwargs.copy() - plot_kws["edgecolor"] = edgecolor plot_kws.setdefault("zorder", 3) if "s" not in plot_kws: plot_kws["s"] = plot_kws.pop("size", 5) ** 2 @@ -2988,9 +3015,7 @@ def catplot( elif kind == "point": - aggregator = EstimateAggregator( - estimator, errorbar, n_boot=n_boot, seed=seed - ) + aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed) markers = kwargs.pop("markers", default) linestyles = kwargs.pop("linestyles", default) @@ -2998,14 +3023,14 @@ def catplot( # Deprecations to remove in v0.15.0. # TODO Uncomment when removing deprecation backcompat # capsize = kwargs.pop("capsize", 0) - # err_kws = _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D) + # err_kws = normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D) p._point_kwargs_backcompat( kwargs.pop("scale", deprecated), kwargs.pop("join", deprecated), kwargs ) err_kws, capsize = p._err_kws_backcompat( - _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D), + normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D), None, errwidth=kwargs.pop("errwidth", deprecated), capsize=kwargs.pop("capsize", 0), @@ -3024,11 +3049,10 @@ def catplot( elif kind == "bar": - aggregator = EstimateAggregator( - estimator, errorbar, n_boot=n_boot, seed=seed - ) + aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed) + err_kws, capsize = p._err_kws_backcompat( - _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D), + normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D), errcolor=kwargs.pop("errcolor", deprecated), errwidth=kwargs.pop("errwidth", deprecated), capsize=kwargs.pop("capsize", 0), @@ -3095,7 +3119,11 @@ def catplot( g._update_legend_data(ax) ax.legend_ = None - if legend and "hue" in p.variables and p.input_format == "long": + if legend == "auto": + show_legend = not p._redundant_hue and p.input_format != "wide" + else: + show_legend = bool(legend) + if show_legend: g.add_legend(title=p.variables.get("hue"), label_order=hue_order) if data is not None: diff --git a/seaborn/distributions.py b/seaborn/distributions.py index 4953f01d59..f8ec166cf4 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -10,6 +10,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.transforms as tx +from matplotlib.cbook import normalize_kwargs from matplotlib.colors import to_rgba from matplotlib.collections import LineCollection @@ -28,7 +29,6 @@ remove_na, _get_transform_functions, _kde_support, - _normalize_kwargs, _check_argument, _assign_default_kwargs, _default_color, @@ -171,7 +171,7 @@ def _artist_kws(self, kws, fill, element, multiple, color, alpha): """Handle differences between artists in filled/unfilled plots.""" kws = kws.copy() if fill: - kws = _normalize_kwargs(kws, mpl.collections.PolyCollection) + kws = normalize_kwargs(kws, mpl.collections.PolyCollection) kws.setdefault("facecolor", to_rgba(color, alpha)) if element == "bars": @@ -916,7 +916,7 @@ def plot_univariate_density( artist = mpl.collections.PolyCollection else: artist = mpl.lines.Line2D - plot_kws = _normalize_kwargs(plot_kws, artist) + plot_kws = normalize_kwargs(plot_kws, artist) # Input checking _check_argument("multiple", ["layer", "stack", "fill"], multiple) @@ -1593,7 +1593,7 @@ def kdeplot( # Handle (past) deprecation of `data2` if "data2" in kwargs: msg = "`data2` has been removed (replaced by `y`); please update your code." - TypeError(msg) + raise TypeError(msg) # Handle deprecation of `vertical` vertical = kwargs.pop("vertical", None) diff --git a/seaborn/rcmod.py b/seaborn/rcmod.py index 978dc175a4..de23832314 100644 --- a/seaborn/rcmod.py +++ b/seaborn/rcmod.py @@ -336,9 +336,8 @@ def plotting_context(context=None, font_scale=1, rc=None): """ Get the parameters that control the scaling of plot elements. - This affects things like the size of the labels, lines, and other elements - of the plot, but not the overall style. This is accomplished using the - matplotlib rcParams system. + These parameters correspond to label size, line thickness, etc. For more + information, see the :doc:`aesthetics tutorial <../tutorial/aesthetics>`. The base context is "notebook", and the other contexts are "paper", "talk", and "poster", which are version of the notebook parameters scaled by different @@ -437,9 +436,9 @@ def set_context(context=None, font_scale=1, rc=None): """ Set the parameters that control the scaling of plot elements. - This affects things like the size of the labels, lines, and other elements - of the plot, but not the overall style. This is accomplished using the - matplotlib rcParams system. + These parameters correspond to label size, line thickness, etc. + Calling this function modifies the global matplotlib `rcParams`. For more + information, see the :doc:`aesthetics tutorial <../tutorial/aesthetics>`. The base context is "notebook", and the other contexts are "paper", "talk", and "poster", which are version of the notebook parameters scaled by different diff --git a/seaborn/regression.py b/seaborn/regression.py index 5a16fc96ac..5e5503a422 100644 --- a/seaborn/regression.py +++ b/seaborn/regression.py @@ -574,7 +574,7 @@ def lineplot(self, ax, kws): def lmplot( - data=None, *, + data, *, x=None, y=None, hue=None, col=None, row=None, palette=None, col_wrap=None, height=5, aspect=1, markers="o", sharex=None, sharey=None, hue_order=None, col_order=None, row_order=None, diff --git a/seaborn/relational.py b/seaborn/relational.py index d4ade9d46a..bd5ecfdfd1 100644 --- a/seaborn/relational.py +++ b/seaborn/relational.py @@ -5,6 +5,7 @@ import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt +from matplotlib.cbook import normalize_kwargs from ._base import ( VectorPlotter, @@ -14,10 +15,9 @@ _default_color, _deprecate_ci, _get_transform_functions, - _normalize_kwargs, _scatter_legend_artist, ) -from ._statistics import EstimateAggregator +from ._statistics import EstimateAggregator, WeightedAggregator from .axisgrid import FacetGrid, _facet_docs from ._docstrings import DocstringComponents, _core_docs @@ -237,7 +237,7 @@ def plot(self, ax, kws): # gotten from the corresponding matplotlib function, and calling the # function will advance the axes property cycle. - kws = _normalize_kwargs(kws, mpl.lines.Line2D) + kws = normalize_kwargs(kws, mpl.lines.Line2D) kws.setdefault("markeredgewidth", 0.75) kws.setdefault("markeredgecolor", "w") @@ -252,7 +252,8 @@ def plot(self, ax, kws): raise ValueError(err.format(self.err_style)) # Initialize the aggregation object - agg = EstimateAggregator( + weighted = "weight" in self.plot_data + agg = (WeightedAggregator if weighted else EstimateAggregator)( self.estimator, self.errorbar, n_boot=self.n_boot, seed=self.seed, ) @@ -399,7 +400,7 @@ def plot(self, ax, kws): if data.empty: return - kws = _normalize_kwargs(kws, mpl.collections.PathCollection) + kws = normalize_kwargs(kws, mpl.collections.PathCollection) # Define the vectors of x and y positions empty = np.full(len(data), np.nan) @@ -464,7 +465,7 @@ def plot(self, ax, kws): def lineplot( data=None, *, - x=None, y=None, hue=None, size=None, style=None, units=None, + x=None, y=None, hue=None, size=None, style=None, units=None, weights=None, palette=None, hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None, dashes=True, markers=None, style_order=None, @@ -478,7 +479,9 @@ def lineplot( p = _LinePlotter( data=data, - variables=dict(x=x, y=y, hue=hue, size=size, style=style, units=units), + variables=dict( + x=x, y=y, hue=hue, size=size, style=style, units=units, weight=weights + ), estimator=estimator, n_boot=n_boot, seed=seed, errorbar=errorbar, sort=sort, orient=orient, err_style=err_style, err_kws=err_kws, legend=legend, @@ -536,6 +539,10 @@ def lineplot( and/or markers. Can have a numeric dtype but will always be treated as categorical. {params.rel.units} +weights : vector or key in `data` + Data values or column used to compute weighted estimation. + Note that use of weights currently limits the choice of statistics + to a 'mean' estimator and 'ci' errorbar. {params.core.palette} {params.core.hue_order} {params.core.hue_norm} @@ -687,7 +694,7 @@ def scatterplot( def relplot( data=None, *, - x=None, y=None, hue=None, size=None, style=None, units=None, + x=None, y=None, hue=None, size=None, style=None, units=None, weights=None, row=None, col=None, col_wrap=None, row_order=None, col_order=None, palette=None, hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None, @@ -725,9 +732,14 @@ def relplot( variables = dict(x=x, y=y, hue=hue, size=size, style=style) if kind == "line": variables["units"] = units - elif units is not None: - msg = "The `units` parameter of `relplot` has no effect with kind='scatter'" - warnings.warn(msg, stacklevel=2) + variables["weight"] = weights + else: + if units is not None: + msg = "The `units` parameter has no effect with kind='scatter'." + warnings.warn(msg, stacklevel=2) + if weights is not None: + msg = "The `weights` parameter has no effect with kind='scatter'." + warnings.warn(msg, stacklevel=2) p = Plotter( data=data, variables=variables, @@ -780,17 +792,18 @@ def relplot( # Add the grid semantics onto the plotter grid_variables = dict( - x=x, y=y, row=row, col=col, - hue=hue, size=size, style=style, + x=x, y=y, row=row, col=col, hue=hue, size=size, style=style, ) if kind == "line": - grid_variables["units"] = units + grid_variables.update(units=units, weights=weights) p.assign_variables(data, grid_variables) # Define the named variables for plotting on each facet # Rename the variables with a leading underscore to avoid # collisions with faceting variable names plot_variables = {v: f"_{v}" for v in variables} + if "weight" in plot_variables: + plot_variables["weights"] = plot_variables.pop("weight") plot_kws.update(plot_variables) # Pass the row/col variables to FacetGrid with their original @@ -918,6 +931,10 @@ def relplot( Grouping variable that will produce elements with different styles. Can have a numeric dtype but will always be treated as categorical. {params.rel.units} +weights : vector or key in `data` + Data values or column used to compute weighted estimation. + Note that use of weights currently limits the choice of statistics + to a 'mean' estimator and 'ci' errorbar. {params.facets.rowcol} {params.facets.col_wrap} row_order, col_order : lists of strings diff --git a/seaborn/utils.py b/seaborn/utils.py index 83527ba445..98720ba36d 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -55,29 +55,6 @@ def ci_to_errsize(cis, heights): return errsize -def _normal_quantile_func(q): - """ - Compute the quantile function of the standard normal distribution. - - This wrapper exists because we are dropping scipy as a mandatory dependency - but statistics.NormalDist was added to the standard library in 3.8. - - """ - try: - from statistics import NormalDist - qf = np.vectorize(NormalDist().inv_cdf) - except ImportError: - try: - from scipy.stats import norm - qf = norm.ppf - except ImportError: - msg = ( - "Standard normal quantile functions require either Python>=3.8 or scipy" - ) - raise RuntimeError(msg) - return qf(q) - - def _draw_figure(fig): """Force draw of a matplotlib figure, accounting for back-compat.""" # See https://github.com/matplotlib/matplotlib/issues/19197 for context @@ -110,7 +87,7 @@ def _default_color(method, hue, color, kws, saturation=1): elif method.__name__ == "plot": - color = _normalize_kwargs(kws, mpl.lines.Line2D).get("color") + color = normalize_kwargs(kws, mpl.lines.Line2D).get("color") scout, = method([], [], scalex=False, scaley=False, color=color) color = scout.get_color() scout.remove() @@ -155,7 +132,7 @@ def _default_color(method, hue, color, kws, saturation=1): elif method.__name__ == "fill_between": - kws = _normalize_kwargs(kws, mpl.collections.PolyCollection) + kws = normalize_kwargs(kws, mpl.collections.PolyCollection) scout = method([], [], **kws) facecolor = scout.get_facecolor() color = to_rgb(facecolor[0]) @@ -714,11 +691,7 @@ def get_view_interval(self): formatter.set_scientific(False) formatter.axis = dummy_axis() - # TODO: The following two lines should be replaced - # once pinned matplotlib>=3.1.0 with: - # formatted_levels = formatter.format_ticks(raw_levels) - formatter.set_locs(raw_levels) - formatted_levels = [formatter(x) for x in raw_levels] + formatted_levels = formatter.format_ticks(raw_levels) return raw_levels, formatted_levels @@ -774,26 +747,6 @@ def to_utf8(obj): return str(obj) -def _normalize_kwargs(kws, artist): - """Wrapper for mpl.cbook.normalize_kwargs that supports <= 3.2.1.""" - _alias_map = { - 'color': ['c'], - 'linewidth': ['lw'], - 'linestyle': ['ls'], - 'facecolor': ['fc'], - 'edgecolor': ['ec'], - 'markerfacecolor': ['mfc'], - 'markeredgecolor': ['mec'], - 'markeredgewidth': ['mew'], - 'markersize': ['ms'] - } - try: - kws = normalize_kwargs(kws, artist) - except AttributeError: - kws = normalize_kwargs(kws, _alias_map) - return kws - - def _check_argument(param, options, value, prefix=False): """Raise if value for param is not in options.""" if prefix and value is not None: @@ -905,7 +858,7 @@ def _version_predates(lib: ModuleType, version: str) -> bool: def _scatter_legend_artist(**kws): - kws = _normalize_kwargs(kws, mpl.collections.PathCollection) + kws = normalize_kwargs(kws, mpl.collections.PathCollection) edgecolor = kws.pop("edgecolor", None) rc = mpl.rcParams diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index f6b5cd0bec..5554ea650f 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -579,10 +579,6 @@ def test_pair_categories(self): assert_vector_equal(m.passed_data[0]["x"], pd.Series([0., 1.], [0, 1])) assert_vector_equal(m.passed_data[1]["x"], pd.Series([0., 1.], [0, 1])) - @pytest.mark.xfail( - _version_predates(mpl, "3.4.0"), - reason="Sharing paired categorical axes requires matplotlib>3.4.0" - ) def test_pair_categories_shared(self): data = [("a", "a"), ("b", "c")] @@ -938,7 +934,7 @@ def test_theme_params(self): def test_theme_error(self): p = Plot() - with pytest.raises(TypeError, match=r"theme\(\) takes 1 positional"): + with pytest.raises(TypeError, match=r"theme\(\) takes 2 positional"): p.theme("arg1", "arg2") def test_theme_validation(self): @@ -1095,6 +1091,32 @@ def test_layout_size(self): p = Plot().layout(size=size).plot() assert tuple(p._figure.get_size_inches()) == size + @pytest.mark.skipif( + _version_predates(mpl, "3.6"), + reason="mpl<3.6 does not have get_layout_engine", + ) + def test_layout_extent(self): + + p = Plot().layout(extent=(.1, .2, .6, 1)).plot() + assert p._figure.get_layout_engine().get()["rect"] == [.1, .2, .5, .8] + + @pytest.mark.skipif( + _version_predates(mpl, "3.6"), + reason="mpl<3.6 does not have get_layout_engine", + ) + def test_constrained_layout_extent(self): + + p = Plot().layout(engine="constrained", extent=(.1, .2, .6, 1)).plot() + assert p._figure.get_layout_engine().get()["rect"] == [.1, .2, .5, .8] + + def test_base_layout_extent(self): + + p = Plot().layout(engine=None, extent=(.1, .2, .6, 1)).plot() + assert p._figure.subplotpars.left == 0.1 + assert p._figure.subplotpars.right == 0.6 + assert p._figure.subplotpars.bottom == 0.2 + assert p._figure.subplotpars.top == 1 + def test_on_axes(self): ax = mpl.figure.Figure().subplots() @@ -1115,10 +1137,6 @@ def test_on_figure(self, facet): assert m.passed_axes == f.axes assert p._figure is f - @pytest.mark.skipif( - _version_predates(mpl, "3.4"), - reason="mpl<3.4 does not have SubFigure", - ) @pytest.mark.parametrize("facet", [True, False]) def test_on_subfigure(self, facet): @@ -1834,6 +1852,12 @@ def test_1d_column_wrapped(self): for s in subplots[1:]: ax = s["ax"] assert ax.xaxis.get_label().get_visible() + # mpl3.7 added a getter for tick params, but both yaxis and xaxis return + # the same entry of "labelleft" instead of "labelbottom" for xaxis + if not _version_predates(mpl, "3.7"): + assert ax.xaxis.get_tick_params()["labelleft"] + else: + assert len(ax.get_xticklabels()) > 0 assert all(t.get_visible() for t in ax.get_xticklabels()) for s in subplots[1:-1]: @@ -1858,6 +1882,12 @@ def test_1d_row_wrapped(self): for s in subplots[-2:]: ax = s["ax"] assert ax.xaxis.get_label().get_visible() + # mpl3.7 added a getter for tick params, but both yaxis and xaxis return + # the same entry of "labelleft" instead of "labelbottom" for xaxis + if not _version_predates(mpl, "3.7"): + assert ax.xaxis.get_tick_params()["labelleft"] + else: + assert len(ax.get_xticklabels()) > 0 assert all(t.get_visible() for t in ax.get_xticklabels()) for s in subplots[:-2]: diff --git a/tests/_core/test_properties.py b/tests/_core/test_properties.py index b4764762eb..c87dd918d0 100644 --- a/tests/_core/test_properties.py +++ b/tests/_core/test_properties.py @@ -3,11 +3,11 @@ import pandas as pd import matplotlib as mpl from matplotlib.colors import same_color, to_rgb, to_rgba +from matplotlib.markers import MarkerStyle import pytest from numpy.testing import assert_array_equal -from seaborn.utils import _version_predates from seaborn._core.rules import categorical_order from seaborn._core.scales import Nominal, Continuous, Boolean from seaborn._core.properties import ( @@ -21,7 +21,7 @@ Marker, PointSize, ) -from seaborn._compat import MarkerStyle, get_colormap +from seaborn._compat import get_colormap from seaborn.palettes import color_palette @@ -250,9 +250,8 @@ def test_standardization(self): assert f("#123456") == to_rgb("#123456") assert f("#12345678") == to_rgba("#12345678") - if not _version_predates(mpl, "3.4.0"): - assert f("#123") == to_rgb("#123") - assert f("#1234") == to_rgba("#1234") + assert f("#123") == to_rgb("#123") + assert f("#1234") == to_rgba("#1234") class ObjectPropertyBase(DataFixtures): @@ -360,6 +359,17 @@ class TestMarker(ObjectPropertyBase): values = ["o", (5, 2, 0), MarkerStyle("^")] standardized_values = [MarkerStyle(x) for x in values] + def assert_equal(self, a, b): + a_path, b_path = a.get_path(), b.get_path() + assert_array_equal(a_path.vertices, b_path.vertices) + assert_array_equal(a_path.codes, b_path.codes) + assert a_path.simplify_threshold == b_path.simplify_threshold + assert a_path.should_simplify == b_path.should_simplify + + assert a.get_joinstyle() == b.get_joinstyle() + assert a.get_transform().to_values() == b.get_transform().to_values() + assert a.get_fillstyle() == b.get_fillstyle() + def unpack(self, x): return ( x.get_path(), diff --git a/tests/_core/test_rules.py b/tests/_core/test_rules.py index 714d5ead5c..161d2af292 100644 --- a/tests/_core/test_rules.py +++ b/tests/_core/test_rules.py @@ -52,6 +52,11 @@ def test_variable_type(): assert variable_type(s, boolean_type="categorical") == "categorical" assert variable_type(s, boolean_type="boolean") == "boolean" + # This should arguably be datmetime, but we don't currently handle it correctly + # Test is mainly asserting that this doesn't fail on the boolean check. + s = pd.timedelta_range(1, periods=3, freq="D").to_series() + assert variable_type(s) == "categorical" + s_cat = s.astype("category") assert variable_type(s_cat, boolean_type="categorical") == "categorical" assert variable_type(s_cat, boolean_type="numeric") == "categorical" @@ -61,6 +66,9 @@ def test_variable_type(): assert variable_type(s, boolean_type="boolean") == "boolean" assert variable_type(s, boolean_type="boolean", strict_boolean=True) == "numeric" + s = pd.Series([1, 0, 0]) + assert variable_type(s, boolean_type="boolean") == "boolean" + s = pd.Series([pd.Timestamp(1), pd.Timestamp(2)]) assert variable_type(s) == "datetime" assert variable_type(s.astype(object)) == "datetime" diff --git a/tests/_core/test_scales.py b/tests/_core/test_scales.py index 8be674f86e..3218a8ac03 100644 --- a/tests/_core/test_scales.py +++ b/tests/_core/test_scales.py @@ -571,10 +571,6 @@ def test_empty_data(self): s = Nominal()._setup(x, Coordinate()) assert_array_equal(s(x), []) - @pytest.mark.skipif( - _version_predates(mpl, "3.4.0"), - reason="Test failing on older matplotlib for unclear reasons", - ) def test_finalize(self, x): ax = mpl.figure.Figure().subplots() diff --git a/tests/_stats/test_aggregation.py b/tests/_stats/test_aggregation.py index 08291d449b..b3a5d58aab 100644 --- a/tests/_stats/test_aggregation.py +++ b/tests/_stats/test_aggregation.py @@ -115,6 +115,17 @@ def test_median_pi(self, df): expected = est.assign(ymin=grouped.min()["y"], ymax=grouped.max()["y"]) assert_frame_equal(res, expected) + def test_weighted_mean(self, df, rng): + + weights = rng.uniform(0, 5, len(df)) + gb = self.get_groupby(df[["x", "y"]], "x") + df = df.assign(weight=weights) + res = Est("mean")(df, gb, "x", {}) + for _, res_row in res.iterrows(): + rows = df[df["x"] == res_row["x"]] + expected = np.average(rows["y"], weights=rows["weight"]) + assert res_row["y"] == expected + def test_seed(self, df): ori = "x" diff --git a/tests/test_base.py b/tests/test_base.py index eea967cf93..4dfb3edfb4 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1508,6 +1508,11 @@ def test_variable_type(self): assert variable_type(s.to_numpy()) == "categorical" assert variable_type(s.to_list()) == "categorical" + # This should arguably be datmetime, but we don't currently handle it correctly + # Test is mainly asserting that this doesn't fail on the boolean check. + s = pd.timedelta_range(1, periods=3, freq="D").to_series() + assert variable_type(s) == "categorical" + s = pd.Series([True, False, False]) assert variable_type(s) == "numeric" assert variable_type(s, boolean_type="categorical") == "categorical" diff --git a/tests/test_categorical.py b/tests/test_categorical.py index 7031d0940c..eaca2e78de 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -999,6 +999,16 @@ def test_dodge_native_scale_log(self, long_df): widths.append(np.ptp(coords)) assert np.std(widths) == approx(0) + def test_dodge_without_hue(self, long_df): + + ax = boxplot(long_df, x="a", y="y", dodge=True) + bxp, = ax.containers + levels = categorical_order(long_df["a"]) + for i, level in enumerate(levels): + data = long_df.loc[long_df["a"] == level, "y"] + self.check_box(bxp[i], data, "x", i) + self.check_whiskers(bxp[i], data, "x", i) + @pytest.mark.parametrize("orient", ["x", "y"]) def test_log_data_scale(self, long_df, orient): @@ -2131,6 +2141,13 @@ def test_estimate_func(self, long_df): for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) + def test_weighted_estimate(self, long_df): + + ax = barplot(long_df, y="y", weights="x") + height = ax.patches[0].get_height() + expected = np.average(long_df["y"], weights=long_df["x"]) + assert height == expected + def test_estimate_log_transform(self, long_df): ax = mpl.figure.Figure().subplots() @@ -2307,7 +2324,7 @@ def test_err_kws(self, fill): dict(data="long", x="a", y="y", errorbar=("pi", 50)), dict(data="long", x="a", y="y", errorbar=None), dict(data="long", x="a", y="y", capsize=.3, err_kws=dict(c="k")), - dict(data="long", x="a", y="y", color="blue", ec="green", alpha=.5), + dict(data="long", x="a", y="y", color="blue", edgecolor="green", alpha=.5), ] ) def test_vs_catplot(self, long_df, wide_df, null_df, flat_series, kwargs): @@ -2490,6 +2507,13 @@ def test_estimate(self, long_df, estimator): for i, xy in enumerate(ax.lines[0].get_xydata()): assert tuple(xy) == approx((i, agg_df[order[i]])) + def test_weighted_estimate(self, long_df): + + ax = pointplot(long_df, y="y", weights="x") + val = ax.lines[0].get_ydata().item() + expected = np.average(long_df["y"], weights=long_df["x"]) + assert val == expected + def test_estimate_log_transform(self, long_df): ax = mpl.figure.Figure().subplots() @@ -3125,6 +3149,20 @@ def test_invalid_kind(self, long_df): with pytest.raises(ValueError, match="Invalid `kind`: 'wrong'"): catplot(long_df, kind="wrong") + def test_legend_with_auto(self): + + g1 = catplot(self.df, x="g", y="y", hue="g", legend='auto') + assert g1._legend is None + + g2 = catplot(self.df, x="g", y="y", hue="g", legend=True) + assert g2._legend is not None + + def test_weights_warning(self, long_df): + + with pytest.warns(UserWarning, match="The `weights` parameter"): + g = catplot(long_df, x="a", y="y", weights="z") + assert g.ax is not None + class TestBeeswarm: diff --git a/tests/test_distributions.py b/tests/test_distributions.py index e5f5c4aad8..0df1a15beb 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -920,6 +920,10 @@ def test_legend(self, long_df): assert ax.legend_ is None + def test_replaced_kws(self, long_df): + with pytest.raises(TypeError, match=r"`data2` has been removed"): + kdeplot(data=long_df, x="x", data2="y") + class TestKDEPlotBivariate: diff --git a/tests/test_matrix.py b/tests/test_matrix.py index 110f4f5c79..889e5da461 100644 --- a/tests/test_matrix.py +++ b/tests/test_matrix.py @@ -387,8 +387,6 @@ def test_heatmap_cbar(self): assert len(f.axes) == 2 plt.close(f) - @pytest.mark.xfail(mpl.__version__ == "3.1.1", - reason="matplotlib 3.1.1 bug") def test_heatmap_axes(self): ax = mat.heatmap(self.df_norm) @@ -443,10 +441,7 @@ def test_heatmap_inner_lines(self): def test_square_aspect(self): ax = mat.heatmap(self.df_norm, square=True) - obs_aspect = ax.get_aspect() - # mpl>3.3 returns 1 for setting "equal" aspect - # so test for the two possible equal outcomes - assert obs_aspect == "equal" or obs_aspect == 1 + npt.assert_equal(ax.get_aspect(), 1) def test_mask_validation(self): @@ -679,8 +674,6 @@ def test_dendrogram_plot(self): assert len(ax.collections[0].get_paths()) == len(d.dependent_coord) - @pytest.mark.xfail(mpl.__version__ == "3.1.1", - reason="matplotlib 3.1.1 bug") def test_dendrogram_rotate(self): kws = self.default_kws.copy() kws['rotate'] = True diff --git a/tests/test_regression.py b/tests/test_regression.py index 436759721c..368f6c50a6 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -16,7 +16,6 @@ _no_statsmodels = True from seaborn import regression as lm -from seaborn.utils import _version_predates from seaborn.palettes import color_palette rs = np.random.RandomState(0) @@ -611,8 +610,6 @@ def test_lmplot_scatter_kws(self): npt.assert_array_equal(red, red_scatter.get_facecolors()[0, :3]) npt.assert_array_equal(blue, blue_scatter.get_facecolors()[0, :3]) - @pytest.mark.skipif(_version_predates(mpl, "3.4"), - reason="MPL bug #15967") @pytest.mark.parametrize("sharex", [True, False]) def test_lmplot_facet_truncate(self, sharex): diff --git a/tests/test_relational.py b/tests/test_relational.py index 4c7ff43ee9..f4f97068a9 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -578,6 +578,15 @@ def test_relplot_styles(self, long_df): expected_paths = [paths[val] for val in grp_df["a"]] assert self.paths_equal(points.get_paths(), expected_paths) + def test_relplot_weighted_estimator(self, long_df): + + g = relplot(data=long_df, x="a", y="y", weights="x", kind="line") + ydata = g.ax.lines[0].get_ydata() + for i, level in enumerate(categorical_order(long_df["a"])): + pos_df = long_df[long_df["a"] == level] + expected = np.average(pos_df["y"], weights=pos_df["x"]) + assert ydata[i] == pytest.approx(expected) + def test_relplot_stringy_numerics(self, long_df): long_df["x_str"] = long_df["x"].astype(str) @@ -668,12 +677,16 @@ def test_facet_variable_collision(self, long_df): ) assert g.axes.shape == (1, len(col_data.unique())) - def test_relplot_scatter_units(self, long_df): + def test_relplot_scatter_unused_variables(self, long_df): with pytest.warns(UserWarning, match="The `units` parameter"): g = relplot(long_df, x="x", y="y", units="a") assert g.ax is not None + with pytest.warns(UserWarning, match="The `weights` parameter"): + g = relplot(long_df, x="x", y="y", weights="x") + assert g.ax is not None + def test_ax_kwarg_removal(self, long_df): f, ax = plt.subplots() @@ -1055,6 +1068,15 @@ def test_plot(self, long_df, repeated_df): ax.clear() p.plot(ax, {}) + def test_weights(self, long_df): + + ax = lineplot(long_df, x="a", y="y", weights="x") + vals = ax.lines[0].get_ydata() + for i, level in enumerate(categorical_order(long_df["a"])): + pos_df = long_df[long_df["a"] == level] + expected = np.average(pos_df["y"], weights=pos_df["x"]) + assert vals[i] == pytest.approx(expected) + def test_non_aggregated_data(self): x = [1, 2, 3, 4] @@ -1302,6 +1324,9 @@ def test_lineplot_smoke( lineplot(x="x", y="y", hue="f", size="s", data=object_df) ax.clear() + lineplot(x="x", y="y", hue="a", data=long_df.iloc[:0]) + ax.clear() + def test_ci_deprecation(self, long_df): axs = plt.figure().subplots(2) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index c0d4e83cf0..ab6cc027f1 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -15,6 +15,7 @@ ECDF, EstimateAggregator, LetterValues, + WeightedAggregator, _validate_errorbar_arg, _no_scipy, ) @@ -632,6 +633,39 @@ def test_errorbar_validation(self): _validate_errorbar_arg(arg) +class TestWeightedAggregator: + + def test_weighted_mean(self, long_df): + + long_df["weight"] = long_df["x"] + est = WeightedAggregator("mean") + out = est(long_df, "y") + expected = np.average(long_df["y"], weights=long_df["weight"]) + assert_array_equal(out["y"], expected) + assert_array_equal(out["ymin"], np.nan) + assert_array_equal(out["ymax"], np.nan) + + def test_weighted_ci(self, long_df): + + long_df["weight"] = long_df["x"] + est = WeightedAggregator("mean", "ci") + out = est(long_df, "y") + expected = np.average(long_df["y"], weights=long_df["weight"]) + assert_array_equal(out["y"], expected) + assert (out["ymin"] <= out["y"]).all() + assert (out["ymax"] >= out["y"]).all() + + def test_limited_estimator(self): + + with pytest.raises(ValueError, match="Weighted estimator must be 'mean'"): + WeightedAggregator("median") + + def test_limited_ci(self): + + with pytest.raises(ValueError, match="Error bar method must be 'ci'"): + WeightedAggregator("mean", "sd") + + class TestLetterValues: @pytest.fixture diff --git a/tests/test_utils.py b/tests/test_utils.py index be3744cea2..be237d5a10 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -145,13 +145,13 @@ def test_despine(self): utils.despine() for side in self.outer_sides: - assert ~ax.spines[side].get_visible() + assert not ax.spines[side].get_visible() for side in self.inner_sides: assert ax.spines[side].get_visible() utils.despine(**dict(zip(self.sides, [True] * 4))) for side in self.sides: - assert ~ax.spines[side].get_visible() + assert not ax.spines[side].get_visible() def test_despine_specific_axes(self): f, (ax1, ax2) = plt.subplots(2, 1) @@ -162,7 +162,7 @@ def test_despine_specific_axes(self): assert ax1.spines[side].get_visible() for side in self.outer_sides: - assert ~ax2.spines[side].get_visible() + assert not ax2.spines[side].get_visible() for side in self.inner_sides: assert ax2.spines[side].get_visible()