From 399b655b2da3b53f7c823c22f74b44c04e6aa0d3 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Fri, 4 Nov 2016 13:43:51 +0800 Subject: [PATCH 1/7] sklearn adapter --- libact/models/__init__.py | 1 + libact/models/sklearn_adapter.py | 26 ++++++++++ libact/models/tests/test_sklearn_adapter.py | 57 +++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 libact/models/sklearn_adapter.py create mode 100644 libact/models/tests/test_sklearn_adapter.py diff --git a/libact/models/__init__.py b/libact/models/__init__.py index 60a6f9d..f14d664 100644 --- a/libact/models/__init__.py +++ b/libact/models/__init__.py @@ -4,3 +4,4 @@ from .logistic_regression import LogisticRegression from .perceptron import Perceptron from .svm import SVM +from .sklearn_adapter import SklearnAdapter diff --git a/libact/models/sklearn_adapter.py b/libact/models/sklearn_adapter.py new file mode 100644 index 0000000..6323867 --- /dev/null +++ b/libact/models/sklearn_adapter.py @@ -0,0 +1,26 @@ +"""scikit-learn classifier adapter +""" +from libact.base.interfaces import Model + + +class SklearnAdapter(Model): + """Implementation of the scikit-learn classifier to libact model interface. + + Parameters + ---------- + clf : scikit-learn classifier object instance + The classifier object that is intended to be use with libact + """ + + def __init__(self, clf): + self._model = clf + + def train(self, dataset, *args, **kwargs): + return self._model.fit(*(dataset.format_sklearn() + args), **kwargs) + + def predict(self, feature, *args, **kwargs): + return self._model.predict(feature, *args, **kwargs) + + def score(self, testing_dataset, *args, **kwargs): + return self._model.score(*(testing_dataset.format_sklearn() + args), + **kwargs) diff --git a/libact/models/tests/test_sklearn_adapter.py b/libact/models/tests/test_sklearn_adapter.py new file mode 100644 index 0000000..3e94091 --- /dev/null +++ b/libact/models/tests/test_sklearn_adapter.py @@ -0,0 +1,57 @@ +""" Test sklearn adapter Model +""" +import unittest + +from numpy.testing import assert_array_equal +from sklearn import datasets +from sklearn.cross_validation import train_test_split +from sklearn.linear_model import LogisticRegression +from sklearn.svm import LinearSVC +from sklearn.neighbors import KNeighborsClassifier + +from libact.base.dataset import Dataset +from libact.models import SklearnAdapter + + +class IrisTestCase(unittest.TestCase): + + def setUp(self): + iris = datasets.load_iris() + X = iris.data + y = iris.target + self.X_train, self.X_test, self.y_train, self.y_test = \ + train_test_split(X, y, test_size=0.3, random_state=1126) + + def check_functions(self, adapter, clf): + adapter.train(Dataset(self.X_train, self.y_train)) + clf.fit(self.X_train, self.y_train) + + assert_array_equal( + adapter.predict(self.X_train), clf.predict(self.X_train)) + assert_array_equal( + adapter.predict(self.X_test), clf.predict(self.X_test)) + self.assertEqual( + adapter.score(Dataset(self.X_train, self.y_train)), + clf.score(self.X_train, self.y_train)) + self.assertEqual( + adapter.score(Dataset(self.X_test, self.y_test)), + clf.score(self.X_test, self.y_test)) + + def test_adapt_logistic_regression(self): + adapter = SklearnAdapter(LogisticRegression(random_state=1126)) + clf = LogisticRegression(random_state=1126) + self.check_functions(adapter, clf) + + def test_adapt_linear_svc(self): + adapter = SklearnAdapter(LinearSVC(random_state=1126)) + clf = LinearSVC(random_state=1126) + self.check_functions(adapter, clf) + + def test_adapt_knn(self): + adapter = SklearnAdapter(KNeighborsClassifier()) + clf = KNeighborsClassifier() + self.check_functions(adapter, clf) + + +if __name__ == '__main__': + unittest.main() From b6c2f54acfd06d972102f693fbb90cc2aeac5f07 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Thu, 10 Nov 2016 11:21:54 +0800 Subject: [PATCH 2/7] added example for sklearn adapter --- libact/models/sklearn_adapter.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libact/models/sklearn_adapter.py b/libact/models/sklearn_adapter.py index 6323867..1b1725f 100644 --- a/libact/models/sklearn_adapter.py +++ b/libact/models/sklearn_adapter.py @@ -10,6 +10,28 @@ class SklearnAdapter(Model): ---------- clf : scikit-learn classifier object instance The classifier object that is intended to be use with libact + + Examples + -------- + Here is an example of using SklearnAdapter to classify the iris dataset: + + .. code-block:: python + from sklearn import datasets + from sklearn.cross_validation import train_test_split + from sklearn.linear_model import LogisticRegression + + from libact.base.dataset import Dataset + from libact.models import SklearnAdapter + + iris = datasets.load_iris() + X = iris.data + y = iris.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + + adapter = SklearnAdapter(LogisticRegression(random_state=1126)) + + adapter.train(Dataset(X_train, y_train)) + adapter.predict(X_test) """ def __init__(self, clf): From f6ebe5ada756688ad9b2e29438fd602753447093 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Thu, 10 Nov 2016 11:36:18 +0800 Subject: [PATCH 3/7] add SklearnAdapter to documentation --- docs/libact.models.rst | 8 ++++++++ docs/overview.rst | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/docs/libact.models.rst b/docs/libact.models.rst index e022cb4..5b2251c 100644 --- a/docs/libact.models.rst +++ b/docs/libact.models.rst @@ -4,6 +4,14 @@ libact.models package Submodules ---------- +libact.models.sklearn_adapter module +---------------------------------------- + +.. automodule:: libact.models.sklearn_adapter + :members: + :undoc-members: + :show-inheritance: + libact.models.logistic_regression module ---------------------------------------- diff --git a/docs/overview.rst b/docs/overview.rst index 939a599..fa694ff 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -41,6 +41,10 @@ A :py:class:`libact.base.interfaces.Model` object represents a supervised classi A :py:class:`libact.base.interfaces.ContinuousModel` object represents an algorithm that supports continuous outputs during predictions, which includes an additional predict_real method. +Note that there is a :py:class:`libact.base.interfaces.SklearnAdapter` which +takes a sklearn classifier instance and adaptes it to the libact Model +interface. + Example Usage ------------- Here is an example usage of `libact`: From 9c48651f00f1db00e1eecd414b5f7359bdd7ce6b Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Wed, 28 Dec 2016 09:22:51 +0800 Subject: [PATCH 4/7] add probabilisticModel interface for predict_proba --- libact/base/interfaces.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/libact/base/interfaces.py b/libact/base/interfaces.py index e9d0285..47d1c9a 100644 --- a/libact/base/interfaces.py +++ b/libact/base/interfaces.py @@ -163,3 +163,27 @@ def predict_real(self, feature, *args, **kwargs): combination. """ pass + + +class ProbabilisticModel(Model): + + """Classification Model with probability output + + A probabilistic classification model is able to output a real-valued vector + for each features provided. + """ + @abstractmethod + def predict_proba(self, feature, *args, **kwargs): + """Predict probability estimate for samples. + + Parameters + ---------- + feature : array-like, shape (n_samples, n_features) + The samples whose probability estimation are to be predicted. + + Returns + ------- + X : array-like, shape (n_samples, n_classes) + Each entry is the prabablity estimate for each class. + """ + pass From 9ed7eb87379a741822aeeda6caf9b6292ad98e60 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Wed, 28 Dec 2016 10:15:02 +0800 Subject: [PATCH 5/7] add SklearnProbaAdapter --- libact/models/__init__.py | 2 +- libact/models/sklearn_adapter.py | 54 ++++++++++++++++++++- libact/models/tests/test_sklearn_adapter.py | 13 ++++- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/libact/models/__init__.py b/libact/models/__init__.py index f14d664..c45c374 100644 --- a/libact/models/__init__.py +++ b/libact/models/__init__.py @@ -4,4 +4,4 @@ from .logistic_regression import LogisticRegression from .perceptron import Perceptron from .svm import SVM -from .sklearn_adapter import SklearnAdapter +from .sklearn_adapter import SklearnAdapter, SklearnProbaAdapter diff --git a/libact/models/sklearn_adapter.py b/libact/models/sklearn_adapter.py index 1b1725f..c32b189 100644 --- a/libact/models/sklearn_adapter.py +++ b/libact/models/sklearn_adapter.py @@ -1,6 +1,6 @@ """scikit-learn classifier adapter """ -from libact.base.interfaces import Model +from libact.base.interfaces import Model, ContinuousModel, ProbabilisticModel class SklearnAdapter(Model): @@ -46,3 +46,55 @@ def predict(self, feature, *args, **kwargs): def score(self, testing_dataset, *args, **kwargs): return self._model.score(*(testing_dataset.format_sklearn() + args), **kwargs) + + +class SklearnProbaAdapter(ContinuousModel, ProbabilisticModel): + """Implementation of the scikit-learn classifier to libact model interface. + It should support predict_proba method. + + Parameters + ---------- + clf : scikit-learn classifier object instance + The classifier object that is intended to be use with libact + + Examples + -------- + Here is an example of using SklearnAdapter to classify the iris dataset: + + .. code-block:: python + from sklearn import datasets + from sklearn.cross_validation import train_test_split + from sklearn.linear_model import LogisticRegression + + from libact.base.dataset import Dataset + from libact.models import SklearnAdapter + + iris = datasets.load_iris() + X = iris.data + y = iris.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + + adapter = SklearnAdapter(LogisticRegression(random_state=1126)) + + adapter.train(Dataset(X_train, y_train)) + adapter.predict(X_test) + """ + + def __init__(self, clf): + self._model = clf + + def train(self, dataset, *args, **kwargs): + return self._model.fit(*(dataset.format_sklearn() + args), **kwargs) + + def predict(self, feature, *args, **kwargs): + return self._model.predict(feature, *args, **kwargs) + + def score(self, testing_dataset, *args, **kwargs): + return self._model.score(*(testing_dataset.format_sklearn() + args), + **kwargs) + + def predict_real(self, feature, *args, **kwargs): + return self._model.predict_proba(feature, *args, **kwargs) + + def predict_proba(self, feature, *args, **kwargs): + return self._model.predict_proba(feature, *args, **kwargs) diff --git a/libact/models/tests/test_sklearn_adapter.py b/libact/models/tests/test_sklearn_adapter.py index 3e94091..97eeed1 100644 --- a/libact/models/tests/test_sklearn_adapter.py +++ b/libact/models/tests/test_sklearn_adapter.py @@ -10,7 +10,7 @@ from sklearn.neighbors import KNeighborsClassifier from libact.base.dataset import Dataset -from libact.models import SklearnAdapter +from libact.models import SklearnAdapter, SklearnProbaAdapter class IrisTestCase(unittest.TestCase): @@ -37,8 +37,17 @@ def check_functions(self, adapter, clf): adapter.score(Dataset(self.X_test, self.y_test)), clf.score(self.X_test, self.y_test)) + def check_proba(self, adapter, clf): + adapter.train(Dataset(self.X_train, self.y_train)) + clf.fit(self.X_train, self.y_train) + + assert_array_equal(adapter.predict_proba(self.X_train), + clf.predict_proba(self.X_train)) + assert_array_equal(adapter.predict_real(self.X_train), + clf.predict_proba(self.X_train)) + def test_adapt_logistic_regression(self): - adapter = SklearnAdapter(LogisticRegression(random_state=1126)) + adapter = SklearnProbaAdapter(LogisticRegression(random_state=1126)) clf = LogisticRegression(random_state=1126) self.check_functions(adapter, clf) From 321c3734c8ea1f643ee09da6e4d48c25d221a754 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Wed, 28 Dec 2016 10:20:26 +0800 Subject: [PATCH 6/7] supporting probabilistic model for uncertainty sampling --- libact/query_strategies/uncertainty_sampling.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/libact/query_strategies/uncertainty_sampling.py b/libact/query_strategies/uncertainty_sampling.py index 89b6f0c..e42f0f4 100644 --- a/libact/query_strategies/uncertainty_sampling.py +++ b/libact/query_strategies/uncertainty_sampling.py @@ -7,7 +7,8 @@ """ import numpy as np -from libact.base.interfaces import QueryStrategy, ContinuousModel +from libact.base.interfaces import QueryStrategy, ContinuousModel, \ + ProbabilisticModel from libact.utils import inherit_docstring_from, zip @@ -71,9 +72,10 @@ def __init__(self, *args, **kwargs): raise TypeError( "__init__() missing required keyword-only argument: 'model'" ) - if not isinstance(self.model, ContinuousModel): + if not isinstance(self.model, ContinuousModel) and \ + not isinstance(self.model, ProbabilisticModel): raise TypeError( - "model has to be a ContinuousModel" + "model has to be a ContinuousModel or ProbabilisticModel" ) self.model.train(self.dataset) @@ -91,14 +93,17 @@ def make_query(self): unlabeled_entry_ids, X_pool = zip(*dataset.get_unlabeled_entries()) + if isinstance(self.model, ContinuousModel): + dvalue = self.model.predict_real(X_pool) + elif isinstance(self.model, ProbabilisticModel): + dvalue = self.model.predict_proba(X_pool) + if self.method == 'lc': # least confident ask_id = np.argmin( - np.max(self.model.predict_real(X_pool), axis=1) + np.max(dvalue, axis=1) ) elif self.method == 'sm': # smallest margin - dvalue = self.model.predict_real(X_pool) - if np.shape(dvalue)[1] > 2: # Find 2 largest decision values dvalue = -(np.partition(-dvalue, 2, axis=1)[:, :2]) From a9736d0a4878593060345775c0ec8d829cc96303 Mon Sep 17 00:00:00 2001 From: yangarbiter Date: Thu, 29 Dec 2016 10:17:07 +0800 Subject: [PATCH 7/7] inherit ContinuousModel for ProbabilisticModel --- libact/base/interfaces.py | 5 ++++- libact/models/sklearn_adapter.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libact/base/interfaces.py b/libact/base/interfaces.py index 47d1c9a..e6e5f48 100644 --- a/libact/base/interfaces.py +++ b/libact/base/interfaces.py @@ -165,13 +165,16 @@ def predict_real(self, feature, *args, **kwargs): pass -class ProbabilisticModel(Model): +class ProbabilisticModel(ContinuousModel): """Classification Model with probability output A probabilistic classification model is able to output a real-valued vector for each features provided. """ + def predict_real(self, feature, *args, **kwargs): + return self.predict_proba(feature, *args, **kwargs) + @abstractmethod def predict_proba(self, feature, *args, **kwargs): """Predict probability estimate for samples. diff --git a/libact/models/sklearn_adapter.py b/libact/models/sklearn_adapter.py index c32b189..6617fe9 100644 --- a/libact/models/sklearn_adapter.py +++ b/libact/models/sklearn_adapter.py @@ -48,7 +48,7 @@ def score(self, testing_dataset, *args, **kwargs): **kwargs) -class SklearnProbaAdapter(ContinuousModel, ProbabilisticModel): +class SklearnProbaAdapter(ProbabilisticModel): """Implementation of the scikit-learn classifier to libact model interface. It should support predict_proba method.