add new custom formatter to readme

MrPowers · Feb 19, 2024 · 0d98253 · 0d98253
1 parent 9199070
commit 0d98253
Show file tree

Hide file tree

Showing 8 changed files with 38 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -303,23 +303,42 @@ nan2 = float('nan')
 nan1 == nan2 # False
 ```
 
-Pandas, a popular DataFrame library, does consider NaN values to be equal by default.
-
-This library requires you to set a flag to consider two NaN values to be equal.
+pandas considers NaN values to be equal by default, but this library requires you to set a flag to consider two NaN values to be equal.
 
 ```python
 assert_df_equality(df1, df2, allow_nan_equality=True)
 ```
 
-### Underline differences within rows
+## Customize formatting
+
+*Available in chispa 0.10+*.
 
-You can choose to underline columns within a row that are different by setting `underline_cells` to True, i.e.:
+You can specify custom formats for the printed error messages as follows:
 
 ```python
-assert_df_equality(df1, df2, underline_cells=True)
+@dataclass
+class MyFormats:
+    mismatched_rows = ["light_yellow"]
+    matched_rows = ["cyan", "bold"]
+    mismatched_cells = ["purple"]
+    matched_cells = ["blue"]
+
+assert_basic_rows_equality(df1.collect(), df2.collect(), formats=MyFormats())
+```
+
+You can also define these formats in `conftest.py` and inject them via a fixture:
+
+```python
+@pytest.fixture()
+def my_formats():
+    return MyFormats()
+
+def test_shows_assert_basic_rows_equality(my_formats):
+  ...
+  assert_basic_rows_equality(df1.collect(), df2.collect(), formats=my_formats)
 ```
 
-![DfsNotEqualUnderlined](https://github.com/MrPowers/chispa/blob/main/images/df_not_equal_underlined.png)
+![custom_formats](https://github.com/MrPowers/chispa/blob/main/images/custom_formats.png)
 
 ## Approximate column equality
 

diff --git a/chispa/rows_comparer.py b/chispa/rows_comparer.py
@@ -27,11 +27,11 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
                 for r1_field, r2_field in r_zipped:
                     if r1[r1_field] != r2[r2_field]:
                         all_rows_equal = False
-                        r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
-                        r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
+                        r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
+                        r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
                     else:
-                        r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
-                        r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
+                        r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
+                        r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
                 r1_res = ", ".join(r1_string)
                 r2_res = ", ".join(r2_string)
 
@@ -64,11 +64,11 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
             for r1_field, r2_field in r_zipped:
                 if r1[r1_field] != r2[r2_field]:
                     all_rows_equal = False
-                    r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
-                    r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
+                    r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
+                    r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
                 else:
-                    r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
-                    r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
+                    r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
+                    r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
             r1_res = ", ".join(r1_string)
             r2_res = ", ".join(r2_string)
 

diff --git a/images/custom_formats.png b/images/custom_formats.png
diff --git a/images/dfs_not_equal_error.png b/images/dfs_not_equal_error.png
diff --git a/images/dfs_not_equal_error_old.png b/images/dfs_not_equal_error_old.png
diff --git a/images/ignore_row_order_false.png b/images/ignore_row_order_false.png
diff --git a/images/ignore_row_order_false_old.png b/images/ignore_row_order_false_old.png
diff --git a/tests/test_readme_examples.py b/tests/test_readme_examples.py
@@ -19,7 +19,7 @@ def remove_non_word_characters(col):
 
 
 def describe_column_equality():
-    def it_removes_non_word_characters_short():
+    def test_removes_non_word_characters_short():
         data = [
             ("jo&&se", "jose"),
             ("**li**", "li"),
@@ -31,7 +31,7 @@ def it_removes_non_word_characters_short():
         assert_column_equality(df, "clean_name", "expected_name")
 
 
-    def it_removes_non_word_characters_nice_error():
+    def test_remove_non_word_characters_nice_error():
         data = [
             ("matt7", "matt"),
             ("bill&", "bill"),
@@ -40,6 +40,7 @@ def it_removes_non_word_characters_nice_error():
         ]
         df = spark.createDataFrame(data, ["name", "expected_name"])\
             .withColumn("clean_name", remove_non_word_characters(F.col("name")))
+        # assert_column_equality(df, "clean_name", "expected_name")
         with pytest.raises(ColumnsNotEqualError) as e_info:
             assert_column_equality(df, "clean_name", "expected_name")
 
@@ -95,6 +96,7 @@ def test_remove_non_word_characters_long_error():
     def ignore_row_order():
         df1 = spark.createDataFrame([(1,), (2,), (3,)], ["some_num"])
         df2 = spark.createDataFrame([(2,), (1,), (3,)], ["some_num"])
+        # assert_df_equality(df1, df2)
         assert_df_equality(df1, df2, ignore_row_order=True)