Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sklearn adapter #70

Merged
merged 7 commits into from
Dec 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/libact.models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ libact.models package
Submodules
----------

libact.models.sklearn_adapter module
----------------------------------------

.. automodule:: libact.models.sklearn_adapter
:members:
:undoc-members:
:show-inheritance:

libact.models.logistic_regression module
----------------------------------------

Expand Down
4 changes: 4 additions & 0 deletions docs/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ A :py:class:`libact.base.interfaces.Model` object represents a supervised classi

A :py:class:`libact.base.interfaces.ContinuousModel` object represents an algorithm that supports continuous outputs during predictions, which includes an additional predict_real method.

Note that there is a :py:class:`libact.base.interfaces.SklearnAdapter` which
takes a sklearn classifier instance and adaptes it to the libact Model
interface.

Example Usage
-------------
Here is an example usage of `libact`:
Expand Down
27 changes: 27 additions & 0 deletions libact/base/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,30 @@ def predict_real(self, feature, *args, **kwargs):
combination.
"""
pass


class ProbabilisticModel(ContinuousModel):

"""Classification Model with probability output

A probabilistic classification model is able to output a real-valued vector
for each features provided.
"""
def predict_real(self, feature, *args, **kwargs):
return self.predict_proba(feature, *args, **kwargs)

@abstractmethod
def predict_proba(self, feature, *args, **kwargs):
"""Predict probability estimate for samples.

Parameters
----------
feature : array-like, shape (n_samples, n_features)
The samples whose probability estimation are to be predicted.

Returns
-------
X : array-like, shape (n_samples, n_classes)
Each entry is the prabablity estimate for each class.
"""
pass
1 change: 1 addition & 0 deletions libact/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .logistic_regression import LogisticRegression
from .perceptron import Perceptron
from .svm import SVM
from .sklearn_adapter import SklearnAdapter, SklearnProbaAdapter
100 changes: 100 additions & 0 deletions libact/models/sklearn_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""scikit-learn classifier adapter
"""
from libact.base.interfaces import Model, ContinuousModel, ProbabilisticModel


class SklearnAdapter(Model):
"""Implementation of the scikit-learn classifier to libact model interface.

Parameters
----------
clf : scikit-learn classifier object instance
The classifier object that is intended to be use with libact

Examples
--------
Here is an example of using SklearnAdapter to classify the iris dataset:

.. code-block:: python
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression

from libact.base.dataset import Dataset
from libact.models import SklearnAdapter

iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

adapter = SklearnAdapter(LogisticRegression(random_state=1126))

adapter.train(Dataset(X_train, y_train))
adapter.predict(X_test)
"""

def __init__(self, clf):
self._model = clf

def train(self, dataset, *args, **kwargs):
return self._model.fit(*(dataset.format_sklearn() + args), **kwargs)

def predict(self, feature, *args, **kwargs):
return self._model.predict(feature, *args, **kwargs)

def score(self, testing_dataset, *args, **kwargs):
return self._model.score(*(testing_dataset.format_sklearn() + args),
**kwargs)


class SklearnProbaAdapter(ProbabilisticModel):
"""Implementation of the scikit-learn classifier to libact model interface.
It should support predict_proba method.

Parameters
----------
clf : scikit-learn classifier object instance
The classifier object that is intended to be use with libact

Examples
--------
Here is an example of using SklearnAdapter to classify the iris dataset:

.. code-block:: python
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression

from libact.base.dataset import Dataset
from libact.models import SklearnAdapter

iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

adapter = SklearnAdapter(LogisticRegression(random_state=1126))

adapter.train(Dataset(X_train, y_train))
adapter.predict(X_test)
"""

def __init__(self, clf):
self._model = clf

def train(self, dataset, *args, **kwargs):
return self._model.fit(*(dataset.format_sklearn() + args), **kwargs)

def predict(self, feature, *args, **kwargs):
return self._model.predict(feature, *args, **kwargs)

def score(self, testing_dataset, *args, **kwargs):
return self._model.score(*(testing_dataset.format_sklearn() + args),
**kwargs)

def predict_real(self, feature, *args, **kwargs):
return self._model.predict_proba(feature, *args, **kwargs)

def predict_proba(self, feature, *args, **kwargs):
return self._model.predict_proba(feature, *args, **kwargs)
66 changes: 66 additions & 0 deletions libact/models/tests/test_sklearn_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
""" Test sklearn adapter Model
"""
import unittest

from numpy.testing import assert_array_equal
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier

from libact.base.dataset import Dataset
from libact.models import SklearnAdapter, SklearnProbaAdapter


class IrisTestCase(unittest.TestCase):

def setUp(self):
iris = datasets.load_iris()
X = iris.data
y = iris.target
self.X_train, self.X_test, self.y_train, self.y_test = \
train_test_split(X, y, test_size=0.3, random_state=1126)

def check_functions(self, adapter, clf):
adapter.train(Dataset(self.X_train, self.y_train))
clf.fit(self.X_train, self.y_train)

assert_array_equal(
adapter.predict(self.X_train), clf.predict(self.X_train))
assert_array_equal(
adapter.predict(self.X_test), clf.predict(self.X_test))
self.assertEqual(
adapter.score(Dataset(self.X_train, self.y_train)),
clf.score(self.X_train, self.y_train))
self.assertEqual(
adapter.score(Dataset(self.X_test, self.y_test)),
clf.score(self.X_test, self.y_test))

def check_proba(self, adapter, clf):
adapter.train(Dataset(self.X_train, self.y_train))
clf.fit(self.X_train, self.y_train)

assert_array_equal(adapter.predict_proba(self.X_train),
clf.predict_proba(self.X_train))
assert_array_equal(adapter.predict_real(self.X_train),
clf.predict_proba(self.X_train))

def test_adapt_logistic_regression(self):
adapter = SklearnProbaAdapter(LogisticRegression(random_state=1126))
clf = LogisticRegression(random_state=1126)
self.check_functions(adapter, clf)

def test_adapt_linear_svc(self):
adapter = SklearnAdapter(LinearSVC(random_state=1126))
clf = LinearSVC(random_state=1126)
self.check_functions(adapter, clf)

def test_adapt_knn(self):
adapter = SklearnAdapter(KNeighborsClassifier())
clf = KNeighborsClassifier()
self.check_functions(adapter, clf)


if __name__ == '__main__':
unittest.main()
17 changes: 11 additions & 6 deletions libact/query_strategies/uncertainty_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"""
import numpy as np

from libact.base.interfaces import QueryStrategy, ContinuousModel
from libact.base.interfaces import QueryStrategy, ContinuousModel, \
ProbabilisticModel
from libact.utils import inherit_docstring_from, zip


Expand Down Expand Up @@ -71,9 +72,10 @@ def __init__(self, *args, **kwargs):
raise TypeError(
"__init__() missing required keyword-only argument: 'model'"
)
if not isinstance(self.model, ContinuousModel):
if not isinstance(self.model, ContinuousModel) and \
not isinstance(self.model, ProbabilisticModel):
raise TypeError(
"model has to be a ContinuousModel"
"model has to be a ContinuousModel or ProbabilisticModel"
)
self.model.train(self.dataset)

Expand All @@ -91,14 +93,17 @@ def make_query(self):

unlabeled_entry_ids, X_pool = zip(*dataset.get_unlabeled_entries())

if isinstance(self.model, ContinuousModel):
dvalue = self.model.predict_real(X_pool)
elif isinstance(self.model, ProbabilisticModel):
dvalue = self.model.predict_proba(X_pool)

if self.method == 'lc': # least confident
ask_id = np.argmin(
np.max(self.model.predict_real(X_pool), axis=1)
np.max(dvalue, axis=1)
)

elif self.method == 'sm': # smallest margin
dvalue = self.model.predict_real(X_pool)

if np.shape(dvalue)[1] > 2:
# Find 2 largest decision values
dvalue = -(np.partition(-dvalue, 2, axis=1)[:, :2])
Expand Down