Skip to content

Commit

Permalink
Pipeline (#125)
Browse files Browse the repository at this point in the history
Added pipeline and transformers
  • Loading branch information
ivansmokovic authored Oct 9, 2019
1 parent 1cf5478 commit b9899d9
Show file tree
Hide file tree
Showing 18 changed files with 758 additions and 81 deletions.
3 changes: 2 additions & 1 deletion takepod/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"models",
"preproc",
"storage",
"validation"]
"validation",
"pipeline"]


# Reference for initialization of logging scikit-learn
Expand Down
19 changes: 11 additions & 8 deletions takepod/examples/experiment_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from takepod.datasets.impl.pauza_dataset import PauzaHRDataset
from takepod.models.impl.fc_model import ScikitMLPClassifier
from takepod.models.impl.simple_trainers import SimpleTrainer
from takepod.models import Experiment
from takepod.models import Experiment, FeatureTransformer, SklearnTensorTransformerWrapper
from takepod.validation import k_fold_classification_metrics
from takepod.model_selection import grid_search
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


def numericalize_pauza_rating(rating):
Expand Down Expand Up @@ -70,15 +71,17 @@ def train_iterator_provider(dataset):
embedding_matrix = vectorizer.get_embedding_matrix(
fields["Text"].vocab)

feature_transform = partial(feature_transform_mean_fun,
embedding_matrix=embedding_matrix)
feature_transform_fn = partial(feature_transform_mean_fun,
embedding_matrix=embedding_matrix)

tensor_transformer = SklearnTensorTransformerWrapper(StandardScaler())
feature_transformer = FeatureTransformer(feature_transform_fn, tensor_transformer)

experiment = Experiment(ScikitMLPClassifier,
trainer,
train_iterator_provider,
None,
feature_transform,
label_transform_fun)
trainer=trainer,
training_iterator_callable=train_iterator_provider,
feature_transformer=feature_transformer,
label_transform_fun=label_transform_fun)

_, model_params, train_params = \
grid_search(experiment,
Expand Down
5 changes: 4 additions & 1 deletion takepod/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from .model import AbstractFrameworkModel, AbstractSupervisedModel
from .batch_transform_functions import default_feature_transform, default_label_transform
from .transformers import FeatureTransformer, TensorTransformer, \
SklearnTensorTransformerWrapper
from .experiment import Experiment
from .trainer import AbstractTrainer

__all__ = ["AbstractFrameworkModel", "AbstractSupervisedModel",
"default_feature_transform", "default_label_transform", "Experiment",
"AbstractTrainer"]
"AbstractTrainer", "FeatureTransformer", "TensorTransformer",
"SklearnTensorTransformerWrapper"]
164 changes: 135 additions & 29 deletions takepod/models/experiment.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
"""Modules defines an experiment - class used to combine iteration over data,
model training and prediction."""
from typing import Callable, NamedTuple, Dict, Type
from typing import Callable, NamedTuple, Dict, Type, Union
from inspect import isclass
import logging

import numpy as np

from takepod.datasets.dataset import Dataset
from takepod.datasets.iterator import Iterator, SingleBatchIterator
from takepod.models import AbstractSupervisedModel,\
default_feature_transform, default_label_transform
from takepod.models import AbstractSupervisedModel, \
default_feature_transform, default_label_transform, FeatureTransformer
from takepod.models.trainer import AbstractTrainer

_LOGGER = logging.getLogger(__name__)


class Experiment:
"""Class used to streamline model fitting and prediction."""

def __init__(self,
model_class: Type[AbstractSupervisedModel],
trainer: AbstractTrainer,
training_iterator_callable: Callable[[Dataset], Iterator],
model: Union[Type[AbstractSupervisedModel], AbstractSupervisedModel],
feature_transformer: FeatureTransformer = None,
trainer: AbstractTrainer = None,
training_iterator_callable: Callable[[Dataset], Iterator] = None,
prediction_iterator_callable: Callable[[Dataset], Iterator] = None,
feature_transform_fun:
Callable[[NamedTuple], np.ndarray] = None,
label_transform_fun:
Callable[[NamedTuple], np.ndarray] = None
):
Expand All @@ -29,8 +32,11 @@ def __init__(self,
Parameters
----------
model_class : class
Class of the Model to be fitted.
model : class or model instance
Class of the Model to be fitted or a pre-trained model.
If pre-trained model is passed and `fit` is called a new model instance will
be created. For fine-tuning of the passed model instance call
`partial_fit`.
Must be a subclass of Podium's `AbstractSupervisedModel`
trainer : AbstractTrainer
Expand All @@ -46,18 +52,24 @@ def __init__(self,
a single tensor before being returned. If passed None, a SingleBatchIterator
will be used as a default.
feature_transform_fun : Callable[[NamedTuple], np.ndarray]
Callable that transforms the input part of the batch returned by the iterator
into features that can be fed into the model.
feature_transformer : FeatureTransformer
FeatureTransformer that transforms the input part of the batch returned by the
iterator into features that can be fed into the model. Will also be fitted
during Experiment fitting.
label_transform_fun : Callable[[NamedTuple], np.ndarray]
Callable that transforms the target part of the batch returned by the iterator
into the same format the model prediction is. For a hypothetical perfect model
the prediction result of the model for some examples must be identical to the
result of this callable for those same examples.
"""
self.model_class = model_class
self.model = None
if isclass(model):
self.model_class = model
self.model = None
else:
self.model_class = model.__class__
self.model = model

self.trainer = trainer
self.training_iterator_callable = training_iterator_callable

Expand All @@ -72,9 +84,9 @@ def default_prediction_iterator_callable(dataset):
else:
self.prediction_iterator_callable = prediction_iterator_callable

self.feature_transform_fun = feature_transform_fun \
if feature_transform_fun is not None \
else default_feature_transform
self.feature_transformer = feature_transformer \
if feature_transformer is not None \
else FeatureTransformer(default_feature_transform)

self.label_transform_fun = label_transform_fun \
if label_transform_fun is not None \
Expand Down Expand Up @@ -107,7 +119,10 @@ def set_default_trainer_args(self, **kwargs):
def fit(self,
dataset: Dataset,
model_kwargs: Dict = None,
trainer_kwargs: Dict = None
trainer_kwargs: Dict = None,
feature_transformer: FeatureTransformer = None,
trainer: AbstractTrainer = None,
training_iterator_callable: Callable[[Dataset], Iterator] = None,
):
"""Fits the model to the provided Dataset. During fitting, the provided Iterator
and Trainer are used.
Expand All @@ -126,25 +141,108 @@ def fit(self,
Dict containing trainer arguments. Arguments passed to the trainer are the
default arguments defined with `set_default_trainer_args` updated/overridden
by 'trainer_kwargs'.
feature_transformer : FeatureTransformer, Optional
FeatureTransformer that transforms the input part of the batch returned by the
iterator into features that can be fed into the model. Will also be fitted
during Experiment fitting.
If None, the default FeatureTransformer provided in the constructor will be
used. Otherwise, this will overwrite the default feature transformer.
trainer : AbstractTrainer, Optional
Trainer used to fit the model. If None, the trainer provided in the
constructor will be used.
training_iterator_callable: Callable[[Dataset], Iterator]
Callable used to instantiate new instances of the Iterator used in fitting the
model. If None, the training_iterator_callable provided in the
constructor will be used.
"""

model_kwargs = {} if model_kwargs is None else model_kwargs
trainer_kwargs = {} if trainer_kwargs is None else trainer_kwargs

model_args = self.default_model_args.copy()
model_args.update(model_kwargs)

trainer_args = self.default_trainer_args.copy()
trainer_args.update(trainer_kwargs)
trainer = trainer if trainer is not None else self.trainer
if trainer is None:
errmsg = "No trainer provided. Trainer must be provided either in the " \
"constructor or as an argument to the fit method."
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)

if feature_transformer is not None:
self.feature_transformer = feature_transformer

# Fit the feature transformer if it needs fitting
if self.feature_transformer.requires_fitting():
x_batch, y_batch = next(SingleBatchIterator(dataset).__iter__())
y = self.label_transform_fun(y_batch)
self.feature_transformer.fit(x_batch, y)

# Create new model instance
self.model = self.model_class(**model_args)
train_iterator = self.training_iterator_callable(dataset)

self.trainer.train(self.model,
train_iterator,
self.feature_transform_fun,
self.label_transform_fun,
**trainer_args)
# Train the model
self.partial_fit(dataset,
trainer_kwargs,
trainer,
training_iterator_callable)

def partial_fit(self,
dataset: Dataset,
trainer_kwargs: Dict = None,
trainer: AbstractTrainer = None,
training_iterator_callable: Callable[[Dataset], Iterator] = None):
"""Fits the model to the data without resetting the model.
Parameters
----------
dataset : Dataset
Dataset to fit the model to.
trainer_kwargs : dict
Dict containing trainer arguments. Arguments passed to the trainer are the
default arguments defined with `set_default_trainer_args` updated/overridden
by 'trainer_kwargs'.
trainer : AbstractTrainer, Optional
Trainer used to fit the model. If None, the trainer provided in the
constructor will be used.
training_iterator_callable: Callable[[Dataset], Iterator]
Callable used to instantiate new instances of the Iterator used in fitting the
model. If None, the training_iterator_callable provided in the
constructor will be used.
Returns
-------
"""
self._check_if_model_exists()

trainer = trainer if trainer is not None else self.trainer
if trainer is None:
errmsg = "No trainer provided. Trainer must be provided either in the " \
"constructor or as an argument to the partial_fit method."
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)

trainer_kwargs = {} if trainer_kwargs is None else trainer_kwargs
trainer_args = self.default_trainer_args.copy()
trainer_args.update(trainer_kwargs)

training_iterator_callable = training_iterator_callable \
if training_iterator_callable is not None \
else self.training_iterator_callable

train_iterator = training_iterator_callable(dataset)

trainer.train(self.model,
train_iterator,
self.feature_transformer,
self.label_transform_fun,
**trainer_args)

def predict(self,
dataset: Dataset,
Expand All @@ -167,13 +265,21 @@ def predict(self,
"""
# TODO: new method of providing examples must be defined.
# examples is taken in dataset form as proof-of-concept.
self._check_if_model_exists()

y = []

for x_batch, _ in self.prediction_iterator_callable(dataset):
x_batch_tensor = self.feature_transform_fun(x_batch)
x_batch_tensor = self.feature_transformer.transform(x_batch)
batch_prediction = self.model.predict(x_batch_tensor, **kwargs)
prediction_tensor = batch_prediction[AbstractSupervisedModel.PREDICTION_KEY]
y.append(prediction_tensor)

return np.concatenate(y)

def _check_if_model_exists(self):
if self.model is None:
errmsg = "Model instance not available. Please provide a model instance in " \
"the constructor or call `fit` before calling `partial_fit.`"
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)
4 changes: 2 additions & 2 deletions takepod/models/impl/simple_trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ class SimpleTrainer(AbstractTrainer):
def train(self,
model,
iterator,
feature_transform_fun,
feature_transformer,
label_transform_fun,
**kwargs):
self._check_kwargs(**kwargs)
for _ in range(kwargs[SimpleTrainer.MAX_EPOCH_KEY]):
for x_batch, y_batch in iterator:
x = feature_transform_fun(x_batch)
x = feature_transformer.transform(x_batch)
y = label_transform_fun(y_batch)
model.fit(X=x, y=y)

Expand Down
7 changes: 3 additions & 4 deletions takepod/models/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from takepod.models import AbstractSupervisedModel
from takepod.models import AbstractSupervisedModel, FeatureTransformer
from takepod.datasets import Iterator


Expand All @@ -15,8 +15,7 @@ class AbstractTrainer(ABC):
def train(self,
model: AbstractSupervisedModel,
iterator: Iterator,
feature_transform_fun:
Callable[[NamedTuple], np.ndarray],
feature_transformer: FeatureTransformer,
label_transform_fun:
Callable[[NamedTuple], np.ndarray],
**kwargs):
Expand All @@ -27,7 +26,7 @@ def train(self,
The model that needs to be trained.
iterator : Iterator
Iterator instance that provides data from a dataset
feature_transform_fun: Callable[[NamedTuple], np.ndarray]
feature_transformer: Callable[[NamedTuple], np.ndarray]
Callable that transforms the input part of the batch returned by the iterator
into features that can be fed into the model.
label_transform_fun: Callable[[NamedTuple], np.ndarray]
Expand Down
Loading

0 comments on commit b9899d9

Please sign in to comment.