Skip to content

Commit

Permalink
add new custom formatter to readme
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPowers committed Feb 19, 2024
1 parent 9199070 commit 0d98253
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 17 deletions.
33 changes: 26 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,23 +303,42 @@ nan2 = float('nan')
nan1 == nan2 # False
```

Pandas, a popular DataFrame library, does consider NaN values to be equal by default.

This library requires you to set a flag to consider two NaN values to be equal.
pandas considers NaN values to be equal by default, but this library requires you to set a flag to consider two NaN values to be equal.

```python
assert_df_equality(df1, df2, allow_nan_equality=True)
```

### Underline differences within rows
## Customize formatting

*Available in chispa 0.10+*.

You can choose to underline columns within a row that are different by setting `underline_cells` to True, i.e.:
You can specify custom formats for the printed error messages as follows:

```python
assert_df_equality(df1, df2, underline_cells=True)
@dataclass
class MyFormats:
mismatched_rows = ["light_yellow"]
matched_rows = ["cyan", "bold"]
mismatched_cells = ["purple"]
matched_cells = ["blue"]

assert_basic_rows_equality(df1.collect(), df2.collect(), formats=MyFormats())
```

You can also define these formats in `conftest.py` and inject them via a fixture:

```python
@pytest.fixture()
def my_formats():
return MyFormats()

def test_shows_assert_basic_rows_equality(my_formats):
...
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=my_formats)
```

![DfsNotEqualUnderlined](https://github.com/MrPowers/chispa/blob/main/images/df_not_equal_underlined.png)
![custom_formats](https://github.com/MrPowers/chispa/blob/main/images/custom_formats.png)

## Approximate column equality

Expand Down
16 changes: 8 additions & 8 deletions chispa/rows_comparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
for r1_field, r2_field in r_zipped:
if r1[r1_field] != r2[r2_field]:
all_rows_equal = False
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
else:
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
r1_res = ", ".join(r1_string)
r2_res = ", ".join(r2_string)

Expand Down Expand Up @@ -64,11 +64,11 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
for r1_field, r2_field in r_zipped:
if r1[r1_field] != r2[r2_field]:
all_rows_equal = False
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
else:
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
r1_res = ", ".join(r1_string)
r2_res = ", ".join(r2_string)

Expand Down
Binary file added images/custom_formats.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/dfs_not_equal_error.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/dfs_not_equal_error_old.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/ignore_row_order_false.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/ignore_row_order_false_old.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 4 additions & 2 deletions tests/test_readme_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def remove_non_word_characters(col):


def describe_column_equality():
def it_removes_non_word_characters_short():
def test_removes_non_word_characters_short():
data = [
("jo&&se", "jose"),
("**li**", "li"),
Expand All @@ -31,7 +31,7 @@ def it_removes_non_word_characters_short():
assert_column_equality(df, "clean_name", "expected_name")


def it_removes_non_word_characters_nice_error():
def test_remove_non_word_characters_nice_error():
data = [
("matt7", "matt"),
("bill&", "bill"),
Expand All @@ -40,6 +40,7 @@ def it_removes_non_word_characters_nice_error():
]
df = spark.createDataFrame(data, ["name", "expected_name"])\
.withColumn("clean_name", remove_non_word_characters(F.col("name")))
# assert_column_equality(df, "clean_name", "expected_name")
with pytest.raises(ColumnsNotEqualError) as e_info:
assert_column_equality(df, "clean_name", "expected_name")

Expand Down Expand Up @@ -95,6 +96,7 @@ def test_remove_non_word_characters_long_error():
def ignore_row_order():
df1 = spark.createDataFrame([(1,), (2,), (3,)], ["some_num"])
df2 = spark.createDataFrame([(2,), (1,), (3,)], ["some_num"])
# assert_df_equality(df1, df2)
assert_df_equality(df1, df2, ignore_row_order=True)


Expand Down

0 comments on commit 0d98253

Please sign in to comment.