-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Add built-in function for Styler to format the text displayed for missing values #29118
Changes from 11 commits
c42de40
01632ce
53b0843
7a5dd65
da3cb43
bdfff98
b86bdc6
a1e9a9e
def71c9
af396b1
3d4cfd0
bd99db9
346eee6
7935359
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
import copy | ||
from functools import partial | ||
from itertools import product | ||
from typing import Optional | ||
from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence, Tuple | ||
from uuid import uuid1 | ||
|
||
import numpy as np | ||
|
@@ -71,6 +71,11 @@ class Styler: | |
The ``id`` takes the form ``T_<uuid>_row<num_row>_col<num_col>`` | ||
where ``<uuid>`` is the unique identifier, ``<num_row>`` is the row | ||
number and ``<num_col>`` is the column number. | ||
na_rep : str, optional | ||
Representation for missing values. | ||
If ``na_rep`` is None, no special formatting is applied | ||
|
||
.. versionadded:: 1.0.0 | ||
|
||
Attributes | ||
---------- | ||
|
@@ -126,9 +131,10 @@ def __init__( | |
caption=None, | ||
table_attributes=None, | ||
cell_ids=True, | ||
na_rep: Optional[str] = None, | ||
): | ||
self.ctx = defaultdict(list) | ||
self._todo = [] | ||
self.ctx = defaultdict(list) # type: DefaultDict[Tuple[int, int], List[str]] | ||
self._todo = [] # type: List[Tuple[Callable, Tuple, Dict]] | ||
|
||
if not isinstance(data, (pd.Series, pd.DataFrame)): | ||
raise TypeError("``data`` must be a Series or DataFrame") | ||
|
@@ -149,19 +155,24 @@ def __init__( | |
self.precision = precision | ||
self.table_attributes = table_attributes | ||
self.hidden_index = False | ||
self.hidden_columns = [] | ||
self.hidden_columns = [] # type: Sequence[int] | ||
self.cell_ids = cell_ids | ||
self.na_rep = na_rep | ||
|
||
# display_funcs maps (row, col) -> formatting function | ||
|
||
def default_display_func(x): | ||
if is_float(x): | ||
if self.na_rep is not None and pd.isna(x): | ||
return self.na_rep | ||
elif is_float(x): | ||
display_format = "{0:.{precision}f}".format(x, precision=self.precision) | ||
return display_format | ||
else: | ||
return x | ||
|
||
self._display_funcs = defaultdict(lambda: default_display_func) | ||
self._display_funcs = defaultdict( | ||
lambda: default_display_func | ||
) # type: DefaultDict[Tuple[int, int], Callable[[Any], str]] | ||
|
||
def _repr_html_(self): | ||
""" | ||
|
@@ -416,16 +427,22 @@ def format_attr(pair): | |
table_attributes=table_attr, | ||
) | ||
|
||
def format(self, formatter, subset=None): | ||
def format(self, formatter, subset=None, na_rep: Optional[str] = None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not required here but if you wanted to in a follow up annotate other arguments would be super helpful |
||
""" | ||
Format the text display value of cells. | ||
|
||
Parameters | ||
---------- | ||
formatter : str, callable, or dict | ||
formatter : str, callable, dict or None | ||
If ``formatter`` is None, the default formatter is used | ||
subset : IndexSlice | ||
An argument to ``DataFrame.loc`` that restricts which elements | ||
``formatter`` is applied to. | ||
na_rep : str, optional | ||
Representation for missing values. | ||
If ``na_rep`` is None, no special formatting is applied | ||
|
||
.. versionadded:: 1.0.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -451,6 +468,10 @@ def format(self, formatter, subset=None): | |
>>> df['c'] = ['a', 'b', 'c', 'd'] | ||
>>> df.style.format({'c': str.upper}) | ||
""" | ||
if formatter is None: | ||
assert self._display_funcs.default_factory is not None | ||
formatter = self._display_funcs.default_factory() | ||
|
||
if subset is None: | ||
row_locs = range(len(self.data)) | ||
col_locs = range(len(self.data.columns)) | ||
|
@@ -466,16 +487,16 @@ def format(self, formatter, subset=None): | |
if is_dict_like(formatter): | ||
for col, col_formatter in formatter.items(): | ||
# formatter must be callable, so '{}' are converted to lambdas | ||
col_formatter = _maybe_wrap_formatter(col_formatter) | ||
col_formatter = _maybe_wrap_formatter(col_formatter, na_rep) | ||
col_num = self.data.columns.get_indexer_for([col])[0] | ||
|
||
for row_num in row_locs: | ||
self._display_funcs[(row_num, col_num)] = col_formatter | ||
else: | ||
# single scalar to format all cells with | ||
formatter = _maybe_wrap_formatter(formatter, na_rep) | ||
locs = product(*(row_locs, col_locs)) | ||
for i, j in locs: | ||
formatter = _maybe_wrap_formatter(formatter) | ||
self._display_funcs[(i, j)] = formatter | ||
return self | ||
|
||
|
@@ -553,6 +574,7 @@ def _copy(self, deepcopy=False): | |
caption=self.caption, | ||
uuid=self.uuid, | ||
table_styles=self.table_styles, | ||
na_rep=self.na_rep, | ||
) | ||
if deepcopy: | ||
styler.ctx = copy.deepcopy(self.ctx) | ||
|
@@ -896,6 +918,23 @@ def set_table_styles(self, table_styles): | |
self.table_styles = table_styles | ||
return self | ||
|
||
def set_na_rep(self, na_rep: str) -> "Styler": | ||
""" | ||
Set the missing data representation on a Styler. | ||
|
||
.. versionadded:: 1.0.0 | ||
|
||
Parameters | ||
---------- | ||
na_rep : str | ||
|
||
Returns | ||
------- | ||
self : Styler | ||
""" | ||
self.na_rep = na_rep | ||
return self | ||
|
||
def hide_index(self): | ||
""" | ||
Hide any indices from rendering. | ||
|
@@ -1487,14 +1526,22 @@ def _get_level_lengths(index, hidden_elements=None): | |
return non_zero_lengths | ||
|
||
|
||
def _maybe_wrap_formatter(formatter): | ||
def _maybe_wrap_formatter(formatter, na_rep: Optional[str]): | ||
if isinstance(formatter, str): | ||
return lambda x: formatter.format(x) | ||
formatter_func = lambda x: formatter.format(x) | ||
elif callable(formatter): | ||
return formatter | ||
formatter_func = formatter | ||
else: | ||
msg = ( | ||
"Expected a template string or callable, got {formatter} " | ||
"instead".format(formatter=formatter) | ||
) | ||
raise TypeError(msg) | ||
|
||
if na_rep is None: | ||
return formatter_func | ||
elif isinstance(na_rep, str): | ||
return lambda x: na_rep if pd.isna(x) else formatter_func(x) | ||
else: | ||
msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a test case that hits this? |
||
raise TypeError(msg) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1009,6 +1009,75 @@ def test_bar_bad_align_raises(self): | |
with pytest.raises(ValueError): | ||
df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) | ||
|
||
def test_format_with_na_rep(self): | ||
# GH 21527 28358 | ||
df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) | ||
|
||
ctx = df.style.format(None, na_rep="-")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "-" | ||
assert ctx["body"][0][2]["display_value"] == "-" | ||
|
||
ctx = df.style.format("{:.2%}", na_rep="-")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "-" | ||
assert ctx["body"][0][2]["display_value"] == "-" | ||
assert ctx["body"][1][1]["display_value"] == "110.00%" | ||
assert ctx["body"][1][2]["display_value"] == "120.00%" | ||
|
||
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate() | ||
assert ctx["body"][0][2]["display_value"] == "-" | ||
assert ctx["body"][1][2]["display_value"] == "120.00%" | ||
|
||
def test_init_with_na_rep(self): | ||
# GH 21527 28358 | ||
df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) | ||
|
||
ctx = Styler(df, na_rep="NA")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "NA" | ||
assert ctx["body"][0][2]["display_value"] == "NA" | ||
|
||
def test_set_na_rep(self): | ||
# GH 21527 28358 | ||
df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) | ||
|
||
ctx = df.style.set_na_rep("NA")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "NA" | ||
assert ctx["body"][0][2]["display_value"] == "NA" | ||
|
||
ctx = ( | ||
df.style.set_na_rep("NA") | ||
.format(None, na_rep="-", subset=["B"]) | ||
._translate() | ||
) | ||
assert ctx["body"][0][1]["display_value"] == "NA" | ||
assert ctx["body"][0][2]["display_value"] == "-" | ||
|
||
def test_format_non_numeric_na(self): | ||
# GH 21527 28358 | ||
df = pd.DataFrame( | ||
{ | ||
"object": [None, np.nan, "foo"], | ||
"datetime": [None, pd.NaT, pd.Timestamp("20120101")], | ||
} | ||
) | ||
|
||
ctx = df.style.set_na_rep("NA")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "NA" | ||
assert ctx["body"][0][2]["display_value"] == "NA" | ||
assert ctx["body"][1][1]["display_value"] == "NA" | ||
assert ctx["body"][1][2]["display_value"] == "NA" | ||
|
||
ctx = df.style.format(None, na_rep="-")._translate() | ||
assert ctx["body"][0][1]["display_value"] == "-" | ||
assert ctx["body"][0][2]["display_value"] == "-" | ||
assert ctx["body"][1][1]["display_value"] == "-" | ||
assert ctx["body"][1][2]["display_value"] == "-" | ||
|
||
@pytest.mark.xfail | ||
def test_format_with_bad_na_rep(self): | ||
# GH 21527 28358 | ||
df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) | ||
df.style.format(None, na_rep=-1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of marking this as fail should use pytest.raises as a context manager (you'll find other examples throughout tests) |
||
|
||
def test_highlight_null(self, null_color="red"): | ||
df = pd.DataFrame({"A": [0, np.nan]}) | ||
result = df.style.highlight_null()._compute().ctx | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to add sub-types for Callable, Tuple, Dict here?