diff --git a/dataprofiler/profilers/categorical_column_profile.py b/dataprofiler/profilers/categorical_column_profile.py index 70186c65..1148e26d 100644 --- a/dataprofiler/profilers/categorical_column_profile.py +++ b/dataprofiler/profilers/categorical_column_profile.py @@ -305,7 +305,10 @@ def diff(self, other_profile: CategoricalColumn, options: dict = None) -> dict: other_profile._categories.items(), key=itemgetter(1), reverse=True ) ) - self_cat_count, other_cat_count = self._preprocess_for_categorical_psi_calculation( + ( + self_cat_count, + other_cat_count, + ) = self._preprocess_for_categorical_psi_calculation( self_cat_count=cat_count1, other_cat_count=cat_count2, ) @@ -315,7 +318,9 @@ def diff(self, other_profile: CategoricalColumn, options: dict = None) -> dict: percent_self = self_cat_count[iter_key] / self.sample_size percent_other = other_cat_count[iter_key] / other_profile.sample_size try: - total_psi += (percent_other - percent_self) * math.log(percent_other / percent_self) + total_psi += (percent_other - percent_self) * math.log( + percent_other / percent_self + ) except Exception: total_psi += 0.0 differences["statistics"]["psi"] = total_psi @@ -430,7 +435,9 @@ def is_match(self) -> bool: is_match = True return is_match - def _preprocess_for_categorical_psi_calculation(self, self_cat_count, other_cat_count): + def _preprocess_for_categorical_psi_calculation( + self, self_cat_count, other_cat_count + ): super_set_categories = set(self_cat_count.keys()) | set(other_cat_count.keys()) for iter_key in super_set_categories: for iter_dictionary in [self_cat_count, other_cat_count]: diff --git a/dataprofiler/tests/profilers/test_column_profile_compilers.py b/dataprofiler/tests/profilers/test_column_profile_compilers.py index 6561b498..46b0212d 100644 --- a/dataprofiler/tests/profilers/test_column_profile_compilers.py +++ b/dataprofiler/tests/profilers/test_column_profile_compilers.py @@ -506,7 +506,7 @@ def test_column_stats_profile_compiler_stats_diff(self): "df": 2, "p-value": 0.3499377491111554, }, - "psi": 0.009815252971365292 + "psi": 0.009815252971365292, }, } self.assertDictEqual(expected_diff, compiler1.diff(compiler2))