Skip to content

Commit

Permalink
Fix pipeline clone
Browse files Browse the repository at this point in the history
  • Loading branch information
lucianolorenti committed Feb 24, 2024
1 parent fbcd5a3 commit 27165a8
Show file tree
Hide file tree
Showing 10 changed files with 990 additions and 110 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,9 @@ jobs:
- name: Test with pytest
run: |
coverage run --source=. -m pytest
coverage run --source=. -m pytest --nbmake
coverage report -m
- name: Test of jupyter notebook
run: |
pytest --nbmake
- name: Coveralls
uses: AndreMiras/coveralls-python-action@develop
Expand Down
14 changes: 11 additions & 3 deletions ceruleo/dataset/ts_dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Iterable
from pathlib import Path
from re import S
from types import EllipsisType
from typing import Any, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -101,6 +102,7 @@ def __call__(self, i):
def get_features_of_life(self, i: int) -> pd.DataFrame:
return self[i]


def __getitem__(
self, i: Union[int, Iterable]
) -> Union[pd.DataFrame, "FoldedDataset"]:
Expand All @@ -126,18 +128,24 @@ def __getitem__(
)
if TENSORFLOW_ENABLED and isinstance(i, tf.Tensor):
return self.get_time_series(i.ref())



if isinstance(i, Iterable):
if not all(isinstance(item, (int, np.integer)) for item in i):
raise ValueError("Invalid iterable index passed")
if len(i) == 2:
if not isinstance(i[1], EllipsisType):
raise ValueError("Invalid iterable index passed")
i = i[0]

return FoldedDataset(self, i)
else:
df = self.get_time_series(i)
return df

@property
def shape(self) -> Tuple[int, int]:
return (self.n_time_series, 1)
def shape(self) -> Tuple[int]:
return (self.n_time_series,)

def __len__(self) -> int:
"""
Expand Down
4 changes: 3 additions & 1 deletion ceruleo/transformation/features/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
try:
divisor = self.data_max - self.data_min



mask = np.abs((divisor)) > 1e-25
X = X.copy()
X = X.astype(float)
X.loc[:, mask] = (
(X.loc[:, mask] - self.data_min[mask])
/ divisor[mask]
Expand Down
1 change: 1 addition & 0 deletions ceruleo/transformation/features/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def fit(self, df:pd.DataFrame, y=None):
features = list(set(df.columns))
self.features_computed_ = sorted(features)
return self
return X.loc[:, self.features_computed_].copy()

def transform(self, X:pd.DataFrame) -> pd.DataFrame:
"""
Expand Down
14 changes: 12 additions & 2 deletions ceruleo/transformation/functional/mixin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import hashlib
import uuid
from copy import deepcopy
from typing import List, Optional, Union

from numpy.lib.arraysetops import isin
from sklearn.base import BaseEstimator


class TransformerStepMixin(BaseEstimator):
name: Optional[str]
previous: List["TransformerStepMixin"]
next: List["TransformerStepMixin"]
uuid: str
prefer_partial_fit: bool

def __init__(self, *, name: Optional[str] = None, prefer_partial_fit:bool =True):
self.name_ = name
self.previous = []
Expand Down Expand Up @@ -65,3 +70,8 @@ def description(self):

def __str__(self):
return self.name


def clear_connections(self):
self.previous = []
self.next = []
26 changes: 20 additions & 6 deletions ceruleo/transformation/functional/pipeline/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import shelve
import uuid
from pathlib import Path
from typing import Iterable, List, Optional, Tuple, Union
from typing import Iterable, List, Union

import pandas as pd
from ceruleo import CACHE_PATH
from ceruleo.dataset.ts_dataset import AbstractPDMDataset
from ceruleo.transformation.functional.graph_utils import (
dfs_iterator,
Expand All @@ -14,7 +10,7 @@
from ceruleo.transformation.functional.pipeline.runner import CachedPipelineRunner
from ceruleo.transformation.functional.transformerstep import TransformerStep
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.base import clone as sklearn_clone

class Pipeline(BaseEstimator, TransformerMixin):
"""Transformation pipeline
Expand All @@ -24,6 +20,11 @@ class Pipeline(BaseEstimator, TransformerMixin):
cache_type: Cache storage mode
"""

final_step: TransformerStep
fitted_: bool
cache_type: CacheStoreType
runner: CachedPipelineRunner

def __init__(self, final_step, cache_type: CacheStoreType = CacheStoreType.MEMORY):
self.final_step = final_step
self.fitted_ = False
Expand Down Expand Up @@ -115,6 +116,19 @@ def get_params(self, deep: bool = False):
for k in p.keys():
params[f"{node.name}__{k}"] = p[k]
return params

def __sklearn_clone__(self):
g = {node: sklearn_clone(node) for node in dfs_iterator(self.final_step)}
for k in g.keys():
g[k].clear_connections()
for node in dfs_iterator(self.final_step):
for next_node in node.next:
g[node].add_next(g[next_node])

return Pipeline(
final_step=g[self.final_step],
cache_type=self.cache_type
)


def make_pipeline(
Expand Down
12 changes: 6 additions & 6 deletions docs/dataset/analysis/Sensor Validation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@
"outputs": [],
"source": [
"from ceruleo.dataset.analysis.numerical_features import analysis\n",
"from ceruleo.transformation.functional.transformers import Transformer\n",
"from ceruleo.transformation.features.selection import ByNameFeatureSelector, ByTypeFeatureSelector\n",
"from ceruleo.iterators.iterators import RelativeToEnd\n",
"from ceruleo.transformation.features.slicing import SliceRows\n",
"from ceruleo.transformation.functional.pipeline.pipeline import make_pipeline\n",
"from ceruleo.transformation.features.resamplers import IndexMeanResampler\n",
"from ceruleo.transformation.features.transformation import Clip\n",
"from ceruleo.transformation.features.selection import (\n",
" ByNameFeatureSelector,\n",
")\n",
"from ceruleo.transformation.features.slicing import SliceRows\n",
"from ceruleo.iterators.iterators import RelativeToEnd"
"from ceruleo.transformation.features.transformation import Clip\n",
"from ceruleo.transformation.functional.pipeline.pipeline import make_pipeline\n",
"from ceruleo.transformation.functional.transformers import Transformer"
]
},
{
Expand Down
Loading

0 comments on commit 27165a8

Please sign in to comment.