From a326d232038d9c318127cb774398bf614224fd02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE?= Date: Mon, 4 Jul 2016 10:47:13 +0200 Subject: [PATCH] Revert "Bottlechest, bottleneck" --- Orange/__init__.py | 4 - Orange/base.py | 8 +- Orange/data/filter.py | 2 +- Orange/data/io.py | 2 +- Orange/data/table.py | 9 +- Orange/misc/bottlechest.py | 179 ------------------ Orange/preprocess/preprocess.py | 6 +- Orange/tests/test_contingency.py | 8 +- Orange/util.py | 16 -- .../tests/test_owlogisticregression.py | 2 +- codecov.yml | 2 +- requirements-core.txt | 2 +- scripts/build-osx-app.sh | 6 + scripts/windows/build-win-application.sh | 6 +- 14 files changed, 35 insertions(+), 217 deletions(-) delete mode 100644 Orange/misc/bottlechest.py diff --git a/Orange/__init__.py b/Orange/__init__.py index e2e3aff9e37..1c1a24f5b88 100644 --- a/Orange/__init__.py +++ b/Orange/__init__.py @@ -1,9 +1,5 @@ from .misc.lazy_module import _LazyModule from .misc.datasets import _DatasetInfo - -# bottlechest patches bottleneck -from .misc import bottlechest # pylint: disable=unused-import - from .version import \ short_version as __version__, git_revision as __git_version__ diff --git a/Orange/base.py b/Orange/base.py index 38d59e22f97..8efdcfa4619 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -2,12 +2,12 @@ import numpy as np import scipy +import bottlechest as bn from Orange.data import Table, Storage, Instance, Value from Orange.preprocess import (RemoveNaNClasses, Continuize, RemoveNaNColumns, SklImpute) from Orange.misc.wrapper_meta import WrapperMeta -from Orange.util import one_hot __all__ = ["Learner", "Model", "SklLearner", "SklModel"] @@ -157,9 +157,11 @@ def __call__(self, data, ret=Value): for c in self.domain.class_vars) probs = np.zeros(value.shape + (max_card,), float) for i, cvar in enumerate(self.domain.class_vars): - probs[:, i, :] = one_hot(value[:, i]) + probs[:, i, :], _ = bn.bincount(np.atleast_2d(value[:, i]), + max_card - 1) else: - probs = one_hot(value) + probs, _ = bn.bincount(np.atleast_2d(value), + len(self.domain.class_var.values) - 1) if ret == Model.ValueProbs: return value, probs else: diff --git a/Orange/data/filter.py b/Orange/data/filter.py index b98ad3d2d0d..86fb0c5f3ed 100644 --- a/Orange/data/filter.py +++ b/Orange/data/filter.py @@ -5,7 +5,7 @@ from ..misc.enum import Enum import numpy as np -import bottleneck as bn +import bottlechest as bn from Orange.data import Instance, Storage, Variable diff --git a/Orange/data/io.py b/Orange/data/io.py index a23dd86b8b6..fb862d5a51a 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -18,7 +18,7 @@ from urllib.parse import urlparse, unquote as urlunquote from urllib.request import urlopen -import bottleneck as bn +import bottlechest as bn import numpy as np from chardet.universaldetector import UniversalDetector diff --git a/Orange/data/table.py b/Orange/data/table.py index ebb8d00ed75..5dd0357d11c 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -13,7 +13,7 @@ from urllib.request import urlopen from urllib.error import URLError -import bottleneck as bn +import bottlechest as bn from scipy import sparse as sp from .instance import * @@ -935,7 +935,12 @@ def __determine_density(data): if data is None: return Storage.Missing if data is not None and sp.issparse(data): - return Storage.SPARSE_BOOL if (data.data == 1).all() else Storage.SPARSE + try: + if bn.bincount(data.data, 1)[0][0] == 0: + return Storage.SPARSE_BOOL + except ValueError as e: + pass + return Storage.SPARSE else: return Storage.DENSE diff --git a/Orange/misc/bottlechest.py b/Orange/misc/bottlechest.py deleted file mode 100644 index 3e5e3da64d9..00000000000 --- a/Orange/misc/bottlechest.py +++ /dev/null @@ -1,179 +0,0 @@ -""" -This module provides alternatives for the few additional functions found in -and once used from the bottlechest package (fork of bottleneck). - -It also patches bottleneck to contain these functions. -""" -import numpy as np -from scipy.sparse import issparse -import bottleneck as bn - - -def bincount(X, max_val=None, weights=None, minlength=None): - """Return counts of values in array X. - - Works exactly like np.bincount(), except that it also supports non-integer - arrays with nans. - """ - X = np.asanyarray(X) - if X.dtype.kind == 'f' and bn.anynan(X): - nonnan = ~np.isnan(X) - nans = (~nonnan).sum(axis=0) - X = X[nonnan] - if weights is not None: - weights = weights[nonnan] - else: - nans = 0 if X.ndim == 1 else np.zeros(X.shape[1]) - return (np.bincount(X.astype(np.int32, copy=False), - weights=weights, - minlength=minlength or max_val + 1), - nans) - - -def countnans(X, weights=None, axis=None, dtype=None, keepdims=False): - """ - Count the undefined elements in arr along given axis. - - Parameters - ---------- - X : array_like - weights : array_like - Weights to weight the nans with, before or after counting (depending - on the weights shape). - - Returns - ------- - counts - """ - X = np.asanyarray(X) - isnan = np.isnan(X) - if weights is not None and weights.shape == X.shape: - isnan = isnan * weights - counts = isnan.sum(axis=axis, dtype=dtype, keepdims=keepdims) - if weights is not None and weights.shape != X.shape: - counts = counts * weights - return counts - - -def contingency(X, y, max_X=None, max_y=None, weights=None, mask=None): - """ - Compute the contingency matrices for each column of X (excluding the masked) - versus the vector y. - - If the array is 1-dimensional, a 2d contingency matrix is returned. If the - array is 2d, the function returns a 3d array, with the first dimension - corresponding to column index (variable in the input array). - - The rows of contingency matrix correspond to values of variables, the - columns correspond to values in vector `y`. - (??? isn't it the other way around ???) - - Rows in the input array can be weighted (argument `weights`). A subset of - columns can be selected by additional argument `mask`. - - The function also returns a count of NaN values per each value of `y`. - - Parameters - ---------- - X : array_like - With values in columns. - y : 1d array - Vector of true values. - max_X : int - The maximal value in the array - max_y : int - The maximal value in `y` - weights : ... - mask : sequence - Discrete columns of X. - - Returns - ------- - contingencies: (m × ny × nx) array - m number of masked (used) columns (all if mask=None), i.e. - for each column of X; - ny number of uniques in y, - nx number of uniques in column of X. - nans : array_like - Number of nans in each column of X for each unique value of y. - """ - if weights is not None and np.any(weights) and np.unique(weights)[0] != 1: - raise ValueError('weights not yet supported') - - was_1d = False - if X.ndim == 1: - X = X[..., np.newaxis] - was_1d = True - - contingencies, nans = [], [] - ny = np.unique(y).size if max_y is None else max_y + 1 - for i in range(X.shape[1]): - if mask is not None and not mask[i]: - contingencies.append(np.zeros((ny, max_X + 1))) - nans.append(np.zeros(ny)) - continue - col = X[..., i] - nx = np.unique(col[~np.isnan(col)]).size if max_X is None else max_X + 1 - if issparse(col): - col = np.ravel(col.todense()) - contingencies.append( - bincount(y + ny * col, - minlength=ny * nx)[0].reshape(nx, ny).T) - nans.append( - bincount(y[np.isnan(col)], minlength=ny)[0]) - if was_1d: - return contingencies[0], nans[0] - return np.array(contingencies), np.array(nans) - - -def stats(X, weights=None, compute_variance=False): - """ - Compute min, max, #nans, mean and variance. - - Result is a tuple (min, max, mean, variance, #nans, #non-nans) or an - array of shape (len(X), 6). - - The mean and the number of nans and non-nans are weighted. - - Computation of variance requires an additional pass and is not enabled - by default. Zeros are filled in instead of variance. - - Parameters - ---------- - X : array_like, 1 or 2 dimensions - Input array. - weights : array_like, optional - Weights, array of the same length as `x`. - compute_variance : bool, optional - If set to True, the function also computes variance. - - Returns - ------- - out : a 6-element tuple or an array of shape (len(x), 6) - Computed (min, max, mean, variance or 0, #nans, #non-nans) - - Raises - ------ - ValueError - If the length of the weight vector does not match the length of the - array - """ - if weights is not None: - X = X * weights - is_numeric = np.issubdtype(X.dtype, np.number) - nans = (np.isnan(X) if is_numeric else X.astype(bool)).sum(axis=0) - variance = np.nanvar(X, axis=0) if compute_variance else np.zeros(X.shape[1]) - return np.column_stack((np.nanmin(X, axis=0) if is_numeric else np.inf, - np.nanmax(X, axis=0) if is_numeric else -np.inf, - np.nanmean(X, axis=0) if is_numeric else 0, - nans, - variance if is_numeric else 0, - X.shape[0] - nans)) - - -# Patch bottleneck to contain these additions -for func in (bincount, countnans, contingency, stats): - if getattr(bn, func.__name__, bincount).__module__ != func.__module__: - raise DeprecationWarning('bottleneck got its own {}();' - 'consider deprecating our own.'.format(func.__name__)) - setattr(bn, func.__name__, func) diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 5bfe56dbed7..0cca3bb4025 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -5,7 +5,7 @@ """ import numpy as np import sklearn.preprocessing as skl_preprocessing -import bottleneck as bn +import bottlechest import Orange.data from Orange.data import Table @@ -198,8 +198,8 @@ def __call__(self, data): data : an input data set """ - oks = bn.nanmin(data.X, axis=0) != \ - bn.nanmax(data.X, axis=0) + oks = bottlechest.nanmin(data.X, axis=0) != \ + bottlechest.nanmax(data.X, axis=0) atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) diff --git a/Orange/tests/test_contingency.py b/Orange/tests/test_contingency.py index 17707e38359..a1824b6be01 100644 --- a/Orange/tests/test_contingency.py +++ b/Orange/tests/test_contingency.py @@ -163,7 +163,7 @@ def _construct_sparse(): def test_sparse(self): d = self._construct_sparse() cont = contingency.Discrete(d, 5) - np.testing.assert_almost_equal(cont[0], [2, 0, 0]) + np.testing.assert_almost_equal(cont[0], [1, 0, 0]) np.testing.assert_almost_equal(cont["b"], [0, 1, 1]) np.testing.assert_almost_equal(cont[2], [1, 0, 0]) @@ -193,7 +193,7 @@ def test_get_contingency(self): d = self._construct_sparse() cont = contingency.get_contingency(d, 5) self.assertIsInstance(cont, contingency.Discrete) - np.testing.assert_almost_equal(cont[0], [2, 0, 0]) + np.testing.assert_almost_equal(cont[0], [1, 0, 0]) np.testing.assert_almost_equal(cont["b"], [0, 1, 1]) np.testing.assert_almost_equal(cont[2], [1, 0, 0]) @@ -218,7 +218,7 @@ def test_get_contingencies(self): cont = conts[5] self.assertIsInstance(cont, contingency.Discrete) - np.testing.assert_almost_equal(cont[0], [2, 0, 0]) + np.testing.assert_almost_equal(cont[0], [1, 0, 0]) np.testing.assert_almost_equal(cont["b"], [0, 1, 1]) np.testing.assert_almost_equal(cont[2], [1, 0, 0]) @@ -240,7 +240,7 @@ def test_get_contingencies(self): self.assertEqual(len(conts), 10) cont = conts[5] self.assertIsInstance(cont, contingency.Discrete) - np.testing.assert_almost_equal(cont[0], [2, 0, 0]) + np.testing.assert_almost_equal(cont[0], [1, 0, 0]) np.testing.assert_almost_equal(cont["b"], [0, 1, 1]) np.testing.assert_almost_equal(cont[2], [1, 0, 0]) diff --git a/Orange/util.py b/Orange/util.py index d9700b20987..6d235a1cc19 100644 --- a/Orange/util.py +++ b/Orange/util.py @@ -90,22 +90,6 @@ def scale(values, min=0, max=1): return (-np.nanmin(values) + values) / ptp * (max - min) + min -def one_hot(values, dtype=float): - """Return a one-hot transform of values - - Parameters - ---------- - values : 1d array - Integer values (hopefully 0-max). - - Returns - ------- - result - 2d array with ones in respective indicator columns. - """ - return np.eye(np.max(values) + 1, dtype=dtype)[np.asanyarray(values, dtype=int)] - - class Registry(type): """Metaclass that registers subtypes.""" def __new__(cls, name, bases, attrs): diff --git a/Orange/widgets/classify/tests/test_owlogisticregression.py b/Orange/widgets/classify/tests/test_owlogisticregression.py index 447a2b90e7a..3c4f2e40eb0 100644 --- a/Orange/widgets/classify/tests/test_owlogisticregression.py +++ b/Orange/widgets/classify/tests/test_owlogisticregression.py @@ -1,6 +1,6 @@ import unittest -import bottleneck as bn +import bottlechest as bn from Orange.data import Table from Orange.classification import LogisticRegressionLearner diff --git a/codecov.yml b/codecov.yml index 20291a19670..545bde166be 100644 --- a/codecov.yml +++ b/codecov.yml @@ -4,7 +4,7 @@ coverage: status: patch: default: - target: '90' + target: '95' project: default: target: auto diff --git a/requirements-core.txt b/requirements-core.txt index 0c949fff0f8..e956e3ecb16 100644 --- a/requirements-core.txt +++ b/requirements-core.txt @@ -1,7 +1,7 @@ numpy>=1.9.0 scipy>=0.11.0 scikit-learn>=0.17 -bottleneck>=1.0.0 +bottlechest>=0.7.1 # Reading Excel files xlrd>=0.9.2 # Encoding detection diff --git a/scripts/build-osx-app.sh b/scripts/build-osx-app.sh index a8e295b39fe..3bb8048313b 100755 --- a/scripts/build-osx-app.sh +++ b/scripts/build-osx-app.sh @@ -113,6 +113,12 @@ echo "Installing/updating setuptools and pip" echo "======================================" "$PIP" install 'setuptools==18.*' 'pip==7.*' +echo "Installing Bottlechest" +echo "======================" +"$PIP" install --find-links http://orange.biolab.si/download/files/wheelhouse/ \ + --use-wheel --trusted-host orange.biolab.si \ + Bottlechest + echo "Installing orangeqt" echo "===================" FDIR=$TEMPLATE/Contents/Frameworks diff --git a/scripts/windows/build-win-application.sh b/scripts/windows/build-win-application.sh index 1c3dcc851e4..2f6f503cbb9 100755 --- a/scripts/windows/build-win-application.sh +++ b/scripts/windows/build-win-application.sh @@ -120,6 +120,10 @@ touch "$BUILDBASE"/requirements.txt echo " #:wheel: scikit-learn https://pypi.python.org/packages/b8/9a/02d5d76be66c57aaa9f917c87007b9b0bf486992cc7701512464d1ce11e9/scikit_learn-0.17.1-cp34-cp34m-win32.whl#md5=ab00daed7cdac4cb16ad0613b91be07e scikit-learn==0.17.1 + +#:wheel: Bottlecheset https://dl.dropboxusercontent.com/u/100248799/Bottlechest-0.7.1-cp34-none-win32.whl#md5=629ba2a148dfa784d0e6817497d42e97 +--find-links https://dl.dropboxusercontent.com/u/100248799/Bottlechest-0.7.1-cp34-none-win32.whl +Bottlechest==0.7.1 " > "$BUILDBASE"/requirements.txt function __download_url { @@ -287,7 +291,7 @@ function prepare_orange { bdist_wheel -d "$BUILDBASE/wheelhouse" # Ensure all install dependencies are available in the wheelhouse - prepare_req --only-binary numpy,scipy,scikit-learn . + prepare_req --only-binary numpy,scipy,scikit-learn,bottlechest . echo "# Orange " >> "$BUILDBASE/requirements.txt" echo "$name==$version" >> "$BUILDBASE/requirements.txt"