From cc2ed432c700dde084eca087e851d125b8fa42dc Mon Sep 17 00:00:00 2001 From: zzccchen Date: Sat, 27 Jul 2024 11:03:15 +0800 Subject: [PATCH 1/4] Enhance cross-platform compatibility for loading PySRRegressor models --- pysr/sr.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 0054ce502..1f3b4a937 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -14,11 +14,12 @@ from multiprocessing import cpu_count from pathlib import Path from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast +from pathlib import Path, PureWindowsPath, PurePosixPath import numpy as np import pandas as pd from numpy import ndarray -from numpy.typing import NDArray +from numpy.typing import ArrayLike, NDArray from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin from sklearn.utils import check_array, check_consistent_length, check_random_state from sklearn.utils.validation import _check_feature_names_in # type: ignore @@ -949,7 +950,7 @@ def __init__( @classmethod def from_file( cls, - equation_file: PathLike, + equation_file: Union[str, Path], *, binary_operators: Optional[List[str]] = None, unary_operators: Optional[List[str]] = None, @@ -997,6 +998,20 @@ def from_file( The model with fitted equations. """ + class CustomUnpickler(pkl.Unpickler): + def find_class(self, module, name): + if module == 'pathlib': + if name == 'PosixPath': + return PurePosixPath + elif name == 'WindowsPath': + return PureWindowsPath + return super().find_class(module, name) + + def path_to_str(path): + if isinstance(path, (PurePosixPath, PureWindowsPath)): + return str(path) + return path + pkl_filename = _csv_filename_to_pkl_filename(equation_file) # Try to load model from .pkl @@ -1007,11 +1022,10 @@ def from_file( assert unary_operators is None assert n_features_in is None with open(pkl_filename, "rb") as f: - model = pkl.load(f) - # Change equation_file_ to be in the same dir as the pickle file - base_dir = os.path.dirname(pkl_filename) - base_equation_file = os.path.basename(model.equation_file_) - model.equation_file_ = os.path.join(base_dir, base_equation_file) + unpickler = CustomUnpickler(f) + model = unpickler.load() + # Convert equation_file_ to string to ensure cross-platform compatibility + model.equation_file_ = path_to_str(model.equation_file_) # Update any parameters if necessary, such as # extra_sympy_mappings: From 8f984db381ae49f62ae67a1967df8ce43d649420 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 27 Jul 2024 03:08:44 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pysr/sr.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 1f3b4a937..4cc30a19b 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -12,9 +12,8 @@ from datetime import datetime from io import StringIO from multiprocessing import cpu_count -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast -from pathlib import Path, PureWindowsPath, PurePosixPath import numpy as np import pandas as pd @@ -1000,10 +999,10 @@ def from_file( class CustomUnpickler(pkl.Unpickler): def find_class(self, module, name): - if module == 'pathlib': - if name == 'PosixPath': + if module == "pathlib": + if name == "PosixPath": return PurePosixPath - elif name == 'WindowsPath': + elif name == "WindowsPath": return PureWindowsPath return super().find_class(module, name) From 648921854e21dcb263c1d10adfc809c4d7397798 Mon Sep 17 00:00:00 2001 From: zzccchen Date: Mon, 29 Jul 2024 13:26:32 +0800 Subject: [PATCH 3/4] moved _path_to_str _CrossPlatformPathUnpickler to utils.py --- pysr/sr.py | 26 +++++++------------------- pysr/utils.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 4cc30a19b..158896e13 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -12,13 +12,13 @@ from datetime import datetime from io import StringIO from multiprocessing import cpu_count -from pathlib import Path, PurePosixPath, PureWindowsPath +from pathlib import Path from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast import numpy as np import pandas as pd from numpy import ndarray -from numpy.typing import ArrayLike, NDArray +from numpy.typing import NDArray from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin from sklearn.utils import check_array, check_consistent_length, check_random_state from sklearn.utils.validation import _check_feature_names_in # type: ignore @@ -51,11 +51,13 @@ from .utils import ( ArrayLike, PathLike, + _CrossPlatformPathUnpickler, _csv_filename_to_pkl_filename, _preprocess_julia_floats, _safe_check_feature_names_in, _subscriptify, _suggest_keywords, + _path_to_str, ) ALREADY_RAN = False @@ -949,7 +951,7 @@ def __init__( @classmethod def from_file( cls, - equation_file: Union[str, Path], + equation_file: PathLike, *, binary_operators: Optional[List[str]] = None, unary_operators: Optional[List[str]] = None, @@ -997,20 +999,6 @@ def from_file( The model with fitted equations. """ - class CustomUnpickler(pkl.Unpickler): - def find_class(self, module, name): - if module == "pathlib": - if name == "PosixPath": - return PurePosixPath - elif name == "WindowsPath": - return PureWindowsPath - return super().find_class(module, name) - - def path_to_str(path): - if isinstance(path, (PurePosixPath, PureWindowsPath)): - return str(path) - return path - pkl_filename = _csv_filename_to_pkl_filename(equation_file) # Try to load model from .pkl @@ -1021,10 +1009,10 @@ def path_to_str(path): assert unary_operators is None assert n_features_in is None with open(pkl_filename, "rb") as f: - unpickler = CustomUnpickler(f) + unpickler = _CrossPlatformPathUnpickler(f) model = unpickler.load() # Convert equation_file_ to string to ensure cross-platform compatibility - model.equation_file_ = path_to_str(model.equation_file_) + model.equation_file_ = _path_to_str(model.equation_file_) # Update any parameters if necessary, such as # extra_sympy_mappings: diff --git a/pysr/utils.py b/pysr/utils.py index de7faf16e..ffe481747 100644 --- a/pysr/utils.py +++ b/pysr/utils.py @@ -3,7 +3,9 @@ import os import re from pathlib import Path +import pickle as pkl from typing import Any, List, TypeVar, Union +from pathlib import PurePosixPath, PureWindowsPath from numpy import ndarray from sklearn.utils.validation import _check_feature_names_in # type: ignore @@ -73,3 +75,19 @@ def _suggest_keywords(cls, k: str) -> List[str]: ] suggestions = difflib.get_close_matches(k, valid_keywords, n=3) return suggestions + + +class _CrossPlatformPathUnpickler(pkl.Unpickler): + def find_class(self, module, name): + if module == "pathlib": + if name == "PosixPath": + return PurePosixPath + elif name == "WindowsPath": + return PureWindowsPath + return super().find_class(module, name) + + +def _path_to_str(path): + if isinstance(path, (PurePosixPath, PureWindowsPath)): + return str(path) + return path From 5dd2e64a604c7002f3caf37e461f4c08289ee070 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 05:26:44 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pysr/sr.py | 2 +- pysr/utils.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 158896e13..60268fa34 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -53,11 +53,11 @@ PathLike, _CrossPlatformPathUnpickler, _csv_filename_to_pkl_filename, + _path_to_str, _preprocess_julia_floats, _safe_check_feature_names_in, _subscriptify, _suggest_keywords, - _path_to_str, ) ALREADY_RAN = False diff --git a/pysr/utils.py b/pysr/utils.py index ffe481747..40c5c58ca 100644 --- a/pysr/utils.py +++ b/pysr/utils.py @@ -1,11 +1,10 @@ import difflib import inspect import os -import re -from pathlib import Path import pickle as pkl +import re +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, List, TypeVar, Union -from pathlib import PurePosixPath, PureWindowsPath from numpy import ndarray from sklearn.utils.validation import _check_feature_names_in # type: ignore