Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline #125

Merged
merged 23 commits into from
Oct 9, 2019
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1711cbe
WIP: pipeline
ivansmokovic Sep 17, 2019
cc2a5e4
Merge remote-tracking branch 'origin/master' into pipeline
ivansmokovic Sep 18, 2019
fff3eb2
Added ExampleFormat enum to ExampleFactory
ivansmokovic Sep 20, 2019
51604aa
Implemented FeatureTransformer
ivansmokovic Sep 20, 2019
849386d
Made podium use FeatureTransformer
ivansmokovic Sep 23, 2019
43b972a
WIP
ivansmokovic Sep 24, 2019
4522b44
implemented pipeline
ivansmokovic Sep 25, 2019
a09d683
Finished pipeline, added documentation
ivansmokovic Sep 26, 2019
838a90d
Added some documentation
ivansmokovic Sep 26, 2019
e0cb520
Added transformer test
ivansmokovic Sep 26, 2019
3b4e2fc
style fixes
ivansmokovic Sep 26, 2019
12c6729
Merge branch 'master' of github.com:FilipBolt/takepod into pipeline
ivansmokovic Sep 26, 2019
2cf1cd8
updated __init__
ivansmokovic Sep 26, 2019
ad28d4c
Implemented partial test for pipeline predict_raw
ivansmokovic Sep 27, 2019
20906aa
Style correction
ivansmokovic Sep 27, 2019
00383bb
Removed model_args and trainer_args from pipeline constructor
ivansmokovic Oct 7, 2019
48270a7
Removed duplicated code in fit and partial_fit
ivansmokovic Oct 7, 2019
6694d75
Added SklearnTensorTransformerWrapper
ivansmokovic Oct 7, 2019
2a17a47
Added Documentation, fixed tests
ivansmokovic Oct 7, 2019
35f2831
Added test for sklearn wrapper
ivansmokovic Oct 9, 2019
43c7060
Added test for sklearn wrapper
ivansmokovic Oct 9, 2019
d13ddc0
Improved tests
ivansmokovic Oct 9, 2019
34da938
addressed minor PR complaints
ivansmokovic Oct 9, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion takepod/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"models",
"preproc",
"storage",
"validation"]
"validation",
"pipeline"]


# Reference for initialization of logging scikit-learn
Expand Down
19 changes: 11 additions & 8 deletions takepod/examples/experiment_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from takepod.datasets.impl.pauza_dataset import PauzaHRDataset
from takepod.models.impl.fc_model import ScikitMLPClassifier
from takepod.models.impl.simple_trainers import SimpleTrainer
from takepod.models import Experiment
from takepod.models import Experiment, FeatureTransformer, SklearnTensorTransformerWrapper
from takepod.validation import k_fold_classification_metrics
from takepod.model_selection import grid_search
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


def numericalize_pauza_rating(rating):
Expand Down Expand Up @@ -70,15 +71,17 @@ def train_iterator_provider(dataset):
embedding_matrix = vectorizer.get_embedding_matrix(
fields["Text"].vocab)

feature_transform = partial(feature_transform_mean_fun,
embedding_matrix=embedding_matrix)
feature_transform_fn = partial(feature_transform_mean_fun,
embedding_matrix=embedding_matrix)

tensor_transformer = SklearnTensorTransformerWrapper(StandardScaler())
feature_transformer = FeatureTransformer(feature_transform_fn, tensor_transformer)

experiment = Experiment(ScikitMLPClassifier,
trainer,
train_iterator_provider,
None,
feature_transform,
label_transform_fun)
trainer=trainer,
training_iterator_callable=train_iterator_provider,
feature_transformer=feature_transformer,
label_transform_fun=label_transform_fun)

_, model_params, train_params = \
grid_search(experiment,
Expand Down
5 changes: 4 additions & 1 deletion takepod/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from .model import AbstractFrameworkModel, AbstractSupervisedModel
from .batch_transform_functions import default_feature_transform, default_label_transform
from .transformers import FeatureTransformer, TensorTransformer, \
SklearnTensorTransformerWrapper
from .experiment import Experiment
from .trainer import AbstractTrainer

__all__ = ["AbstractFrameworkModel", "AbstractSupervisedModel",
"default_feature_transform", "default_label_transform", "Experiment",
"AbstractTrainer"]
"AbstractTrainer", "FeatureTransformer", "TensorTransformer",
"SklearnTensorTransformerWrapper"]
164 changes: 135 additions & 29 deletions takepod/models/experiment.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
"""Modules defines an experiment - class used to combine iteration over data,
model training and prediction."""
from typing import Callable, NamedTuple, Dict, Type
from typing import Callable, NamedTuple, Dict, Type, Union
from inspect import isclass
import logging

import numpy as np

from takepod.datasets.dataset import Dataset
from takepod.datasets.iterator import Iterator, SingleBatchIterator
from takepod.models import AbstractSupervisedModel,\
default_feature_transform, default_label_transform
from takepod.models import AbstractSupervisedModel, \
default_feature_transform, default_label_transform, FeatureTransformer
from takepod.models.trainer import AbstractTrainer

_LOGGER = logging.getLogger(__name__)


class Experiment:
"""Class used to streamline model fitting and prediction."""

def __init__(self,
model_class: Type[AbstractSupervisedModel],
trainer: AbstractTrainer,
training_iterator_callable: Callable[[Dataset], Iterator],
model: Union[Type[AbstractSupervisedModel], AbstractSupervisedModel],
feature_transformer: FeatureTransformer = None,
trainer: AbstractTrainer = None,
training_iterator_callable: Callable[[Dataset], Iterator] = None,
prediction_iterator_callable: Callable[[Dataset], Iterator] = None,
feature_transform_fun:
Callable[[NamedTuple], np.ndarray] = None,
label_transform_fun:
Callable[[NamedTuple], np.ndarray] = None
):
Expand All @@ -29,8 +32,11 @@ def __init__(self,

Parameters
----------
model_class : class
Class of the Model to be fitted.
model : class or model instance
Class of the Model to be fitted or a pre-trained model.
If pre-trained model is passed and `fit` is called a new model instance will
be created. For fine-tuning of the passed model instance call
`partial_fit`.
Must be a subclass of Podium's `AbstractSupervisedModel`

trainer : AbstractTrainer
Expand All @@ -46,18 +52,24 @@ def __init__(self,
a single tensor before being returned. If passed None, a SingleBatchIterator
will be used as a default.

feature_transform_fun : Callable[[NamedTuple], np.ndarray]
Callable that transforms the input part of the batch returned by the iterator
into features that can be fed into the model.
feature_transformer : FeatureTransformer
FeatureTransformer that transforms the input part of the batch returned by the
iterator into features that can be fed into the model. Will also be fitted
during Experiment fitting.

label_transform_fun : Callable[[NamedTuple], np.ndarray]
Callable that transforms the target part of the batch returned by the iterator
into the same format the model prediction is. For a hypothetical perfect model
the prediction result of the model for some examples must be identical to the
result of this callable for those same examples.
"""
self.model_class = model_class
self.model = None
if isclass(model):
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
self.model_class = model
self.model = None
else:
self.model_class = model.__class__
self.model = model

self.trainer = trainer
self.training_iterator_callable = training_iterator_callable

Expand All @@ -72,9 +84,9 @@ def default_prediction_iterator_callable(dataset):
else:
self.prediction_iterator_callable = prediction_iterator_callable

self.feature_transform_fun = feature_transform_fun \
if feature_transform_fun is not None \
else default_feature_transform
self.feature_transformer = feature_transformer \
if feature_transformer is not None \
else FeatureTransformer(default_feature_transform)

self.label_transform_fun = label_transform_fun \
if label_transform_fun is not None \
Expand Down Expand Up @@ -107,7 +119,10 @@ def set_default_trainer_args(self, **kwargs):
def fit(self,
dataset: Dataset,
model_kwargs: Dict = None,
trainer_kwargs: Dict = None
trainer_kwargs: Dict = None,
feature_transformer: FeatureTransformer = None,
trainer: AbstractTrainer = None,
training_iterator_callable: Callable[[Dataset], Iterator] = None,
):
"""Fits the model to the provided Dataset. During fitting, the provided Iterator
and Trainer are used.
Expand All @@ -126,25 +141,108 @@ def fit(self,
Dict containing trainer arguments. Arguments passed to the trainer are the
default arguments defined with `set_default_trainer_args` updated/overridden
by 'trainer_kwargs'.

feature_transformer : FeatureTransformer, Optional
FeatureTransformer that transforms the input part of the batch returned by the
iterator into features that can be fed into the model. Will also be fitted
during Experiment fitting.
If None, the default FeatureTransformer provided in the constructor will be
used. Otherwise, this will overwrite the default feature transformer.

trainer : AbstractTrainer, Optional
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
Trainer used to fit the model. If None, the trainer provided in the
constructor will be used.

training_iterator_callable: Callable[[Dataset], Iterator]
Callable used to instantiate new instances of the Iterator used in fitting the
model. If None, the training_iterator_callable provided in the
constructor will be used.
"""

model_kwargs = {} if model_kwargs is None else model_kwargs
trainer_kwargs = {} if trainer_kwargs is None else trainer_kwargs

model_args = self.default_model_args.copy()
model_args.update(model_kwargs)

trainer_args = self.default_trainer_args.copy()
trainer_args.update(trainer_kwargs)
trainer = trainer if trainer is not None else self.trainer
if trainer is None:
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
errmsg = "No trainer provided. Trainer must be provided either in the " \
"constructor or as an argument to the fit method."
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)

if feature_transformer is not None:
self.feature_transformer = feature_transformer

# Fit the feature transformer if it needs fitting
if self.feature_transformer.requires_fitting():
x_batch, y_batch = next(SingleBatchIterator(dataset).__iter__())
y = self.label_transform_fun(y_batch)
self.feature_transformer.fit(x_batch, y)

# Create new model instance
self.model = self.model_class(**model_args)
train_iterator = self.training_iterator_callable(dataset)

self.trainer.train(self.model,
train_iterator,
self.feature_transform_fun,
self.label_transform_fun,
**trainer_args)
# Train the model
self.partial_fit(dataset,
trainer_kwargs,
trainer,
training_iterator_callable)

def partial_fit(self,
dataset: Dataset,
trainer_kwargs: Dict = None,
trainer: AbstractTrainer = None,
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
training_iterator_callable: Callable[[Dataset], Iterator] = None):
"""Fits the model to the data without resetting the model.
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
dataset : Dataset
Dataset to fit the model to.

trainer_kwargs : dict
Dict containing trainer arguments. Arguments passed to the trainer are the
default arguments defined with `set_default_trainer_args` updated/overridden
by 'trainer_kwargs'.

trainer : AbstractTrainer, Optional
Trainer used to fit the model. If None, the trainer provided in the
constructor will be used.

training_iterator_callable: Callable[[Dataset], Iterator]
Callable used to instantiate new instances of the Iterator used in fitting the
model. If None, the training_iterator_callable provided in the
constructor will be used.

Returns
-------

"""
self._check_if_model_exists()

trainer = trainer if trainer is not None else self.trainer
if trainer is None:
errmsg = "No trainer provided. Trainer must be provided either in the " \
"constructor or as an argument to the partial_fit method."
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)

trainer_kwargs = {} if trainer_kwargs is None else trainer_kwargs
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
trainer_args = self.default_trainer_args.copy()
trainer_args.update(trainer_kwargs)

training_iterator_callable = training_iterator_callable \
if training_iterator_callable is not None \
else self.training_iterator_callable

train_iterator = training_iterator_callable(dataset)

trainer.train(self.model,
train_iterator,
self.feature_transformer,
self.label_transform_fun,
**trainer_args)

def predict(self,
dataset: Dataset,
Expand All @@ -167,13 +265,21 @@ def predict(self,
"""
# TODO: new method of providing examples must be defined.
# examples is taken in dataset form as proof-of-concept.
self._check_if_model_exists()

y = []

for x_batch, _ in self.prediction_iterator_callable(dataset):
x_batch_tensor = self.feature_transform_fun(x_batch)
x_batch_tensor = self.feature_transformer.transform(x_batch)
batch_prediction = self.model.predict(x_batch_tensor, **kwargs)
prediction_tensor = batch_prediction[AbstractSupervisedModel.PREDICTION_KEY]
y.append(prediction_tensor)

return np.concatenate(y)

def _check_if_model_exists(self):
if self.model is None:
errmsg = "Model instance not available. Please provide a model instance in " \
"the constructor or call `fit` before calling `partial_fit.`"
_LOGGER.error(errmsg)
raise RuntimeError(errmsg)
ivansmokovic marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 2 additions & 2 deletions takepod/models/impl/simple_trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ class SimpleTrainer(AbstractTrainer):
def train(self,
model,
iterator,
feature_transform_fun,
feature_transformer,
label_transform_fun,
**kwargs):
self._check_kwargs(**kwargs)
for _ in range(kwargs[SimpleTrainer.MAX_EPOCH_KEY]):
for x_batch, y_batch in iterator:
x = feature_transform_fun(x_batch)
x = feature_transformer.transform(x_batch)
y = label_transform_fun(y_batch)
model.fit(X=x, y=y)

Expand Down
7 changes: 3 additions & 4 deletions takepod/models/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from takepod.models import AbstractSupervisedModel
from takepod.models import AbstractSupervisedModel, FeatureTransformer
from takepod.datasets import Iterator


Expand All @@ -15,8 +15,7 @@ class AbstractTrainer(ABC):
def train(self,
model: AbstractSupervisedModel,
iterator: Iterator,
feature_transform_fun:
Callable[[NamedTuple], np.ndarray],
feature_transformer: FeatureTransformer,
label_transform_fun:
Callable[[NamedTuple], np.ndarray],
**kwargs):
Expand All @@ -27,7 +26,7 @@ def train(self,
The model that needs to be trained.
iterator : Iterator
Iterator instance that provides data from a dataset
feature_transform_fun: Callable[[NamedTuple], np.ndarray]
feature_transformer: Callable[[NamedTuple], np.ndarray]
Callable that transforms the input part of the batch returned by the iterator
into features that can be fed into the model.
label_transform_fun: Callable[[NamedTuple], np.ndarray]
Expand Down
Loading