Skip to content

Commit

Permalink
fix: Handle infinity and large values in input to avoid ValueError wi…
Browse files Browse the repository at this point in the history
…th log1p (#99)

Signed-off-by: HimanshuRRai <[email protected]>
  • Loading branch information
HimanshuRRai authored Nov 14, 2024
1 parent db785da commit 1218fff
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
6 changes: 4 additions & 2 deletions sapientml_core/templates/preprocessing_templates/log.py.jinja
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import numpy as np

NUMERIC_COLS_TO_SCALE = {{ columns }}
{{ train_dataset }}[NUMERIC_COLS_TO_SCALE] = np.log1p({{ train_dataset }}[NUMERIC_COLS_TO_SCALE])
{{ train_dataset }}[NUMERIC_COLS_TO_SCALE] = np.log1p({{ train_dataset }}[NUMERIC_COLS_TO_SCALE]).replace([np.inf, -np.inf], np.nan).fillna({{ train_dataset }}[NUMERIC_COLS_TO_SCALE].mean())


NUMERIC_COLS_TO_SCALE_FOR_TEST = list(set(test_dataset.columns) & set(NUMERIC_COLS_TO_SCALE))
{{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST] = np.log1p({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST])
{{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST] = np.log1p({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST]).replace([np.inf, -np.inf], np.nan).fillna({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST].mean())

Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ import numpy as np

NUMERIC_COLS_TO_SCALE = {{ columns }}
NUMERIC_COLS_TO_SCALE_FOR_TEST = list(set(test_dataset.columns) & set(NUMERIC_COLS_TO_SCALE))
{{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST] = np.log1p({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST])
{{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST] = np.log1p({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST]).replace([np.inf, -np.inf], np.nan).fillna({{ test_dataset }}[NUMERIC_COLS_TO_SCALE_FOR_TEST].mean())

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

NUMERIC_COLS_TO_SCALE = {{ columns }}
{{ train_dataset }}[NUMERIC_COLS_TO_SCALE] = np.log1p({{ train_dataset }}[NUMERIC_COLS_TO_SCALE])
{{ train_dataset }}[NUMERIC_COLS_TO_SCALE] = np.log1p({{ train_dataset }}[NUMERIC_COLS_TO_SCALE]).replace([np.inf, -np.inf], np.nan).fillna({{ train_dataset }}[NUMERIC_COLS_TO_SCALE].mean())

0 comments on commit 1218fff

Please sign in to comment.