From a326d232038d9c318127cb774398bf614224fd02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bla=C5=BE?= <blaz.zupan@fri.uni-lj.si>
Date: Mon, 4 Jul 2016 10:47:13 +0200
Subject: [PATCH] Revert "Bottlechest, bottleneck"

---
 Orange/__init__.py                            |   4 -
 Orange/base.py                                |   8 +-
 Orange/data/filter.py                         |   2 +-
 Orange/data/io.py                             |   2 +-
 Orange/data/table.py                          |   9 +-
 Orange/misc/bottlechest.py                    | 179 ------------------
 Orange/preprocess/preprocess.py               |   6 +-
 Orange/tests/test_contingency.py              |   8 +-
 Orange/util.py                                |  16 --
 .../tests/test_owlogisticregression.py        |   2 +-
 codecov.yml                                   |   2 +-
 requirements-core.txt                         |   2 +-
 scripts/build-osx-app.sh                      |   6 +
 scripts/windows/build-win-application.sh      |   6 +-
 14 files changed, 35 insertions(+), 217 deletions(-)
 delete mode 100644 Orange/misc/bottlechest.py

diff --git a/Orange/__init__.py b/Orange/__init__.py
index e2e3aff9e37..1c1a24f5b88 100644
--- a/Orange/__init__.py
+++ b/Orange/__init__.py
@@ -1,9 +1,5 @@
 from .misc.lazy_module import _LazyModule
 from .misc.datasets import _DatasetInfo
-
-# bottlechest patches bottleneck
-from .misc import bottlechest  # pylint: disable=unused-import
-
 from .version import \
     short_version as __version__, git_revision as __git_version__
 
diff --git a/Orange/base.py b/Orange/base.py
index 38d59e22f97..8efdcfa4619 100644
--- a/Orange/base.py
+++ b/Orange/base.py
@@ -2,12 +2,12 @@
 
 import numpy as np
 import scipy
+import bottlechest as bn
 
 from Orange.data import Table, Storage, Instance, Value
 from Orange.preprocess import (RemoveNaNClasses, Continuize,
                                RemoveNaNColumns, SklImpute)
 from Orange.misc.wrapper_meta import WrapperMeta
-from Orange.util import one_hot
 
 __all__ = ["Learner", "Model", "SklLearner", "SklModel"]
 
@@ -157,9 +157,11 @@ def __call__(self, data, ret=Value):
                                for c in self.domain.class_vars)
                 probs = np.zeros(value.shape + (max_card,), float)
                 for i, cvar in enumerate(self.domain.class_vars):
-                    probs[:, i, :] = one_hot(value[:, i])
+                    probs[:, i, :], _ = bn.bincount(np.atleast_2d(value[:, i]),
+                                                    max_card - 1)
             else:
-                probs = one_hot(value)
+                probs, _ = bn.bincount(np.atleast_2d(value),
+                                       len(self.domain.class_var.values) - 1)
             if ret == Model.ValueProbs:
                 return value, probs
             else:
diff --git a/Orange/data/filter.py b/Orange/data/filter.py
index b98ad3d2d0d..86fb0c5f3ed 100644
--- a/Orange/data/filter.py
+++ b/Orange/data/filter.py
@@ -5,7 +5,7 @@
 
 from ..misc.enum import Enum
 import numpy as np
-import bottleneck as bn
+import bottlechest as bn
 from Orange.data import Instance, Storage, Variable
 
 
diff --git a/Orange/data/io.py b/Orange/data/io.py
index a23dd86b8b6..fb862d5a51a 100644
--- a/Orange/data/io.py
+++ b/Orange/data/io.py
@@ -18,7 +18,7 @@
 from urllib.parse import urlparse, unquote as urlunquote
 from urllib.request import urlopen
 
-import bottleneck as bn
+import bottlechest as bn
 import numpy as np
 from chardet.universaldetector import UniversalDetector
 
diff --git a/Orange/data/table.py b/Orange/data/table.py
index ebb8d00ed75..5dd0357d11c 100644
--- a/Orange/data/table.py
+++ b/Orange/data/table.py
@@ -13,7 +13,7 @@
 from urllib.request import urlopen
 from urllib.error import URLError
 
-import bottleneck as bn
+import bottlechest as bn
 from scipy import sparse as sp
 
 from .instance import *
@@ -935,7 +935,12 @@ def __determine_density(data):
         if data is None:
             return Storage.Missing
         if data is not None and sp.issparse(data):
-            return Storage.SPARSE_BOOL if (data.data == 1).all() else Storage.SPARSE
+            try:
+                if bn.bincount(data.data, 1)[0][0] == 0:
+                    return Storage.SPARSE_BOOL
+            except ValueError as e:
+                pass
+            return Storage.SPARSE
         else:
             return Storage.DENSE
 
diff --git a/Orange/misc/bottlechest.py b/Orange/misc/bottlechest.py
deleted file mode 100644
index 3e5e3da64d9..00000000000
--- a/Orange/misc/bottlechest.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""
-This module provides alternatives for the few additional functions found in
-and once used from the bottlechest package (fork of bottleneck).
-
-It also patches bottleneck to contain these functions.
-"""
-import numpy as np
-from scipy.sparse import issparse
-import bottleneck as bn
-
-
-def bincount(X, max_val=None, weights=None, minlength=None):
-    """Return counts of values in array X.
-
-    Works exactly like np.bincount(), except that it also supports non-integer
-    arrays with nans.
-    """
-    X = np.asanyarray(X)
-    if X.dtype.kind == 'f' and bn.anynan(X):
-        nonnan = ~np.isnan(X)
-        nans = (~nonnan).sum(axis=0)
-        X = X[nonnan]
-        if weights is not None:
-            weights = weights[nonnan]
-    else:
-        nans = 0 if X.ndim == 1 else np.zeros(X.shape[1])
-    return (np.bincount(X.astype(np.int32, copy=False),
-                        weights=weights,
-                        minlength=minlength or max_val + 1),
-            nans)
-
-
-def countnans(X, weights=None, axis=None, dtype=None, keepdims=False):
-    """
-    Count the undefined elements in arr along given axis.
-
-    Parameters
-    ----------
-    X : array_like
-    weights : array_like
-        Weights to weight the nans with, before or after counting (depending
-        on the weights shape).
-
-    Returns
-    -------
-    counts
-    """
-    X = np.asanyarray(X)
-    isnan = np.isnan(X)
-    if weights is not None and weights.shape == X.shape:
-        isnan = isnan * weights
-    counts = isnan.sum(axis=axis, dtype=dtype, keepdims=keepdims)
-    if weights is not None and weights.shape != X.shape:
-        counts = counts * weights
-    return counts
-
-
-def contingency(X, y, max_X=None, max_y=None, weights=None, mask=None):
-    """
-    Compute the contingency matrices for each column of X (excluding the masked)
-    versus the vector y.
-
-    If the array is 1-dimensional, a 2d contingency matrix is returned. If the
-    array is 2d, the function returns a 3d array, with the first dimension
-    corresponding to column index (variable in the input array).
-
-    The rows of contingency matrix correspond to values of variables, the
-    columns correspond to values in vector `y`.
-    (??? isn't it the other way around ???)
-
-    Rows in the input array can be weighted (argument `weights`). A subset of
-    columns can be selected by additional argument `mask`.
-
-    The function also returns a count of NaN values per each value of `y`.
-
-    Parameters
-    ----------
-    X : array_like
-        With values in columns.
-    y : 1d array
-        Vector of true values.
-    max_X : int
-        The maximal value in the array
-    max_y : int
-        The maximal value in `y`
-    weights : ...
-    mask : sequence
-        Discrete columns of X.
-
-    Returns
-    -------
-    contingencies: (m × ny × nx) array
-        m number of masked (used) columns (all if mask=None), i.e.
-        for each column of X;
-        ny number of uniques in y,
-        nx number of uniques in column of X.
-    nans : array_like
-        Number of nans in each column of X for each unique value of y.
-    """
-    if weights is not None and np.any(weights) and np.unique(weights)[0] != 1:
-        raise ValueError('weights not yet supported')
-
-    was_1d = False
-    if X.ndim == 1:
-        X = X[..., np.newaxis]
-        was_1d = True
-
-    contingencies, nans = [], []
-    ny = np.unique(y).size if max_y is None else max_y + 1
-    for i in range(X.shape[1]):
-        if mask is not None and not mask[i]:
-            contingencies.append(np.zeros((ny, max_X + 1)))
-            nans.append(np.zeros(ny))
-            continue
-        col = X[..., i]
-        nx = np.unique(col[~np.isnan(col)]).size if max_X is None else max_X + 1
-        if issparse(col):
-            col = np.ravel(col.todense())
-        contingencies.append(
-            bincount(y + ny * col,
-                     minlength=ny * nx)[0].reshape(nx, ny).T)
-        nans.append(
-            bincount(y[np.isnan(col)], minlength=ny)[0])
-    if was_1d:
-        return contingencies[0], nans[0]
-    return np.array(contingencies), np.array(nans)
-
-
-def stats(X, weights=None, compute_variance=False):
-    """
-    Compute min, max, #nans, mean and variance.
-
-    Result is a tuple (min, max, mean, variance, #nans, #non-nans) or an
-    array of shape (len(X), 6).
-
-    The mean and the number of nans and non-nans are weighted.
-
-    Computation of variance requires an additional pass and is not enabled
-    by default. Zeros are filled in instead of variance.
-
-    Parameters
-    ----------
-    X : array_like, 1 or 2 dimensions
-        Input array.
-    weights : array_like, optional
-        Weights, array of the same length as `x`.
-    compute_variance : bool, optional
-        If set to True, the function also computes variance.
-
-    Returns
-    -------
-    out : a 6-element tuple or an array of shape (len(x), 6)
-        Computed (min, max, mean, variance or 0, #nans, #non-nans)
-
-    Raises
-    ------
-    ValueError
-        If the length of the weight vector does not match the length of the
-        array
-    """
-    if weights is not None:
-        X = X * weights
-    is_numeric = np.issubdtype(X.dtype, np.number)
-    nans = (np.isnan(X) if is_numeric else X.astype(bool)).sum(axis=0)
-    variance = np.nanvar(X, axis=0) if compute_variance else np.zeros(X.shape[1])
-    return np.column_stack((np.nanmin(X, axis=0) if is_numeric else np.inf,
-                            np.nanmax(X, axis=0) if is_numeric else -np.inf,
-                            np.nanmean(X, axis=0) if is_numeric else 0,
-                            nans,
-                            variance if is_numeric else 0,
-                            X.shape[0] - nans))
-
-
-# Patch bottleneck to contain these additions
-for func in (bincount, countnans, contingency, stats):
-    if getattr(bn, func.__name__, bincount).__module__ != func.__module__:
-        raise DeprecationWarning('bottleneck got its own {}();'
-                                 'consider deprecating our own.'.format(func.__name__))
-    setattr(bn, func.__name__, func)
diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py
index 5bfe56dbed7..0cca3bb4025 100644
--- a/Orange/preprocess/preprocess.py
+++ b/Orange/preprocess/preprocess.py
@@ -5,7 +5,7 @@
 """
 import numpy as np
 import sklearn.preprocessing as skl_preprocessing
-import bottleneck as bn
+import bottlechest
 
 import Orange.data
 from Orange.data import Table
@@ -198,8 +198,8 @@ def __call__(self, data):
         data : an input data set
         """
 
-        oks = bn.nanmin(data.X, axis=0) != \
-              bn.nanmax(data.X, axis=0)
+        oks = bottlechest.nanmin(data.X, axis=0) != \
+              bottlechest.nanmax(data.X, axis=0)
         atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok]
         domain = Orange.data.Domain(atts, data.domain.class_vars,
                                     data.domain.metas)
diff --git a/Orange/tests/test_contingency.py b/Orange/tests/test_contingency.py
index 17707e38359..a1824b6be01 100644
--- a/Orange/tests/test_contingency.py
+++ b/Orange/tests/test_contingency.py
@@ -163,7 +163,7 @@ def _construct_sparse():
     def test_sparse(self):
         d = self._construct_sparse()
         cont = contingency.Discrete(d, 5)
-        np.testing.assert_almost_equal(cont[0], [2, 0, 0])
+        np.testing.assert_almost_equal(cont[0], [1, 0, 0])
         np.testing.assert_almost_equal(cont["b"], [0, 1, 1])
         np.testing.assert_almost_equal(cont[2], [1, 0, 0])
 
@@ -193,7 +193,7 @@ def test_get_contingency(self):
         d = self._construct_sparse()
         cont = contingency.get_contingency(d, 5)
         self.assertIsInstance(cont, contingency.Discrete)
-        np.testing.assert_almost_equal(cont[0], [2, 0, 0])
+        np.testing.assert_almost_equal(cont[0], [1, 0, 0])
         np.testing.assert_almost_equal(cont["b"], [0, 1, 1])
         np.testing.assert_almost_equal(cont[2], [1, 0, 0])
 
@@ -218,7 +218,7 @@ def test_get_contingencies(self):
 
         cont = conts[5]
         self.assertIsInstance(cont, contingency.Discrete)
-        np.testing.assert_almost_equal(cont[0], [2, 0, 0])
+        np.testing.assert_almost_equal(cont[0], [1, 0, 0])
         np.testing.assert_almost_equal(cont["b"], [0, 1, 1])
         np.testing.assert_almost_equal(cont[2], [1, 0, 0])
 
@@ -240,7 +240,7 @@ def test_get_contingencies(self):
         self.assertEqual(len(conts), 10)
         cont = conts[5]
         self.assertIsInstance(cont, contingency.Discrete)
-        np.testing.assert_almost_equal(cont[0], [2, 0, 0])
+        np.testing.assert_almost_equal(cont[0], [1, 0, 0])
         np.testing.assert_almost_equal(cont["b"], [0, 1, 1])
         np.testing.assert_almost_equal(cont[2], [1, 0, 0])
 
diff --git a/Orange/util.py b/Orange/util.py
index d9700b20987..6d235a1cc19 100644
--- a/Orange/util.py
+++ b/Orange/util.py
@@ -90,22 +90,6 @@ def scale(values, min=0, max=1):
     return (-np.nanmin(values) + values) / ptp * (max - min) + min
 
 
-def one_hot(values, dtype=float):
-    """Return a one-hot transform of values
-
-    Parameters
-    ----------
-    values : 1d array
-        Integer values (hopefully 0-max).
-
-    Returns
-    -------
-    result
-        2d array with ones in respective indicator columns.
-    """
-    return np.eye(np.max(values) + 1, dtype=dtype)[np.asanyarray(values, dtype=int)]
-
-
 class Registry(type):
     """Metaclass that registers subtypes."""
     def __new__(cls, name, bases, attrs):
diff --git a/Orange/widgets/classify/tests/test_owlogisticregression.py b/Orange/widgets/classify/tests/test_owlogisticregression.py
index 447a2b90e7a..3c4f2e40eb0 100644
--- a/Orange/widgets/classify/tests/test_owlogisticregression.py
+++ b/Orange/widgets/classify/tests/test_owlogisticregression.py
@@ -1,6 +1,6 @@
 import unittest
 
-import bottleneck as bn
+import bottlechest as bn
 
 from Orange.data import Table
 from Orange.classification import LogisticRegressionLearner
diff --git a/codecov.yml b/codecov.yml
index 20291a19670..545bde166be 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -4,7 +4,7 @@ coverage:
   status:
     patch:
       default:
-        target: '90'
+        target: '95'
     project:
       default:
         target: auto
diff --git a/requirements-core.txt b/requirements-core.txt
index 0c949fff0f8..e956e3ecb16 100644
--- a/requirements-core.txt
+++ b/requirements-core.txt
@@ -1,7 +1,7 @@
 numpy>=1.9.0
 scipy>=0.11.0
 scikit-learn>=0.17
-bottleneck>=1.0.0
+bottlechest>=0.7.1
 # Reading Excel files
 xlrd>=0.9.2
 # Encoding detection
diff --git a/scripts/build-osx-app.sh b/scripts/build-osx-app.sh
index a8e295b39fe..3bb8048313b 100755
--- a/scripts/build-osx-app.sh
+++ b/scripts/build-osx-app.sh
@@ -113,6 +113,12 @@ echo "Installing/updating setuptools and pip"
 echo "======================================"
 "$PIP" install 'setuptools==18.*' 'pip==7.*'
 
+echo "Installing Bottlechest"
+echo "======================"
+"$PIP" install --find-links http://orange.biolab.si/download/files/wheelhouse/ \
+               --use-wheel --trusted-host orange.biolab.si \
+               Bottlechest
+
 echo "Installing orangeqt"
 echo "==================="
 FDIR=$TEMPLATE/Contents/Frameworks
diff --git a/scripts/windows/build-win-application.sh b/scripts/windows/build-win-application.sh
index 1c3dcc851e4..2f6f503cbb9 100755
--- a/scripts/windows/build-win-application.sh
+++ b/scripts/windows/build-win-application.sh
@@ -120,6 +120,10 @@ touch "$BUILDBASE"/requirements.txt
 echo "
 #:wheel: scikit-learn https://pypi.python.org/packages/b8/9a/02d5d76be66c57aaa9f917c87007b9b0bf486992cc7701512464d1ce11e9/scikit_learn-0.17.1-cp34-cp34m-win32.whl#md5=ab00daed7cdac4cb16ad0613b91be07e
 scikit-learn==0.17.1
+
+#:wheel: Bottlecheset https://dl.dropboxusercontent.com/u/100248799/Bottlechest-0.7.1-cp34-none-win32.whl#md5=629ba2a148dfa784d0e6817497d42e97
+--find-links https://dl.dropboxusercontent.com/u/100248799/Bottlechest-0.7.1-cp34-none-win32.whl
+Bottlechest==0.7.1
 " > "$BUILDBASE"/requirements.txt
 
 function __download_url {
@@ -287,7 +291,7 @@ function prepare_orange {
         bdist_wheel -d "$BUILDBASE/wheelhouse"
 
     # Ensure all install dependencies are available in the wheelhouse
-    prepare_req --only-binary numpy,scipy,scikit-learn .
+    prepare_req --only-binary numpy,scipy,scikit-learn,bottlechest .
 
     echo "# Orange " >> "$BUILDBASE/requirements.txt"
     echo "$name==$version" >> "$BUILDBASE/requirements.txt"