Skip to content

Commit

Permalink
Merge pull request GUDHI#348 from martinroyer/atol
Browse files Browse the repository at this point in the history
Introduction of ATOL in finite vectorisation method
  • Loading branch information
VincentRouvreau authored Jul 23, 2020
2 parents 444ec77 + 96eb09e commit 7bd8c85
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 4 deletions.
9 changes: 9 additions & 0 deletions biblio/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,15 @@ @misc{gmplib_cite
%TEMPORARY
%------------------------------------------------------------------
@misc{royer2019atol,
title={ATOL: Measure Vectorisation for Automatic Topologically-Oriented Learning},
author={Martin Royer and Frédéric Chazal and Clément Levrard and Yuichi Ike and Yuhei Umeda},
year={2019},
eprint={1909.13472},
archivePrefix={arXiv},
primaryClass={cs.CG}
}

@inproceedings{deSilva:2013:GSP:2493132.2462402,
author = {de Silva, Vin and Nanda, Vidit},
title = {Geometry in the space of persistence modules},
Expand Down
2 changes: 1 addition & 1 deletion src/python/doc/representations_sum.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
:widths: 30 40 30

+------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
| .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière |
| .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
| img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
| | | :Since: GUDHI 3.1.0 |
| | | |
Expand Down
144 changes: 141 additions & 3 deletions src/python/gudhi/representations/vector_methods.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s): Mathieu Carrière
# Author(s): Mathieu Carrière, Martin Royer
#
# Copyright (C) 2018-2019 Inria
# Copyright (C) 2018-2020 Inria
#
# Modification(s):
# - YYYY/MM Author: Description of the modification
# - 2020/06 Martin: ATOL integration

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.neighbors import DistanceMetric
from sklearn.metrics import pairwise

from .preprocessing import DiagramScaler, BirthPersistenceTransform

Expand Down Expand Up @@ -574,3 +575,140 @@ def __call__(self, diag):
numpy array with shape (**threshold**): output complex vector of coefficients.
"""
return self.fit_transform([diag])[0,:]

def _lapl_contrast(measure, centers, inertias):
"""contrast function for vectorising `measure` in ATOL"""
return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / inertias)

def _gaus_contrast(measure, centers, inertias):
"""contrast function for vectorising `measure` in ATOL"""
return np.exp(-pairwise.pairwise_distances(measure, Y=centers, squared=True) / inertias**2)

def _indicator_contrast(diags, centers, inertias):
"""contrast function for vectorising `measure` in ATOL"""
robe_curve = np.clip(2-pairwise.pairwise_distances(diags, Y=centers)/inertias, 0, 1)
return robe_curve

def _cloud_weighting(measure):
"""automatic uniform weighting with mass 1 for `measure` in ATOL"""
return np.ones(shape=measure.shape[0])

def _iidproba_weighting(measure):
"""automatic uniform weighting with mass 1/N for `measure` in ATOL"""
return np.ones(shape=measure.shape[0]) / measure.shape[0]

class Atol(BaseEstimator, TransformerMixin):
"""
This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
ATOL paper: :cite:`royer2019atol`
Example
--------
>>> from sklearn.cluster import KMeans
>>> from gudhi.representations.vector_methods import Atol
>>> import numpy as np
>>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
>>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
>>> c = np.array([[3, 2, -1], [1, 2, -1]])
>>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
>>> atol_vectoriser.fit(X=[a, b, c]).centers
array([[ 2. , 0.66666667, 3.33333333],
[ 2.6 , 2.8 , -0.4 ]])
>>> atol_vectoriser(a)
array([1.18168665, 0.42375966])
>>> atol_vectoriser(c)
array([0.02062512, 1.25157463])
>>> atol_vectoriser.transform(X=[a, b, c])
array([[1.18168665, 0.42375966],
[0.29861028, 1.06330156],
[0.02062512, 1.25157463]])
"""
def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
"""
Constructor for the Atol measure vectorisation class.
Parameters:
quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
attributes, e.g. sklearn.cluster.KMeans. It will be fitted when the Atol object function `fit` is called.
weighting_method (string): constant generic function for weighting the measure points
choose from {"cloud", "iidproba"}
(default: constant function, i.e. the measure is seen as a point cloud by default).
This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
contrast (string): constant function for evaluating proximity of a measure with respect to centers
choose from {"gaussian", "laplacian", "indicator"}
(default: gaussian contrast function, see page 3 in the ATOL paper).
"""
self.quantiser = quantiser
self.contrast = {
"gaussian": _gaus_contrast,
"laplacian": _lapl_contrast,
"indicator": _indicator_contrast,
}.get(contrast, _gaus_contrast)
self.weighting_method = {
"cloud" : _cloud_weighting,
"iidproba": _iidproba_weighting,
}.get(weighting_method, _cloud_weighting)

def fit(self, X, y=None, sample_weight=None):
"""
Calibration step: fit centers to the sample measures and derive inertias between centers.
Parameters:
X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
(measures can have different N).
y: Ignored, present for API consistency by convention.
sample_weight (list of numpy arrays): weights for each measure point in X, optional.
If None, the object's weighting_method will be used.
Returns:
self
"""
if not hasattr(self.quantiser, 'fit'):
raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
if sample_weight is None:
sample_weight = np.concatenate([self.weighting_method(measure) for measure in X])

measures_concat = np.concatenate(X)
self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
self.centers = self.quantiser.cluster_centers_
if self.quantiser.n_clusters == 1:
dist_centers = pairwise.pairwise_distances(measures_concat)
np.fill_diagonal(dist_centers, 0)
self.inertias = np.array([np.max(dist_centers)/2])
else:
dist_centers = pairwise.pairwise_distances(self.centers)
dist_centers[dist_centers == 0] = np.inf
self.inertias = np.min(dist_centers, axis=0)/2
return self

def __call__(self, measure, sample_weight=None):
"""
Apply measure vectorisation on a single measure.
Parameters:
measure (n x d numpy array): input measure in R^d.
Returns:
numpy array in R^self.quantiser.n_clusters.
"""
if sample_weight is None:
sample_weight = self.weighting_method(measure)
return np.sum(sample_weight * self.contrast(measure, self.centers, self.inertias.T).T, axis=1)

def transform(self, X, sample_weight=None):
"""
Apply measure vectorisation on a list of measures.
Parameters:
X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
(measures can have different N).
sample_weight (list of numpy arrays): weights for each measure point in X, optional.
If None, the object's weighting_method will be used.
Returns:
numpy array with shape (number of measures) x (self.quantiser.n_clusters).
"""
if sample_weight is None:
sample_weight = [self.weighting_method(measure) for measure in X]
return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
17 changes: 17 additions & 0 deletions src/python/test/test_representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from sklearn.cluster import KMeans


def test_representations_examples():
# Disable graphics for testing purposes
Expand All @@ -15,6 +17,7 @@ def test_representations_examples():
return None


from gudhi.representations.vector_methods import Atol
from gudhi.representations.metrics import *
from gudhi.representations.kernel_methods import *

Expand All @@ -41,3 +44,17 @@ def test_multiple():
d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
print(d1.shape, d2.shape)
assert d1 == pytest.approx(d2, rel=.02)


def test_dummy_atol():
a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
c = np.array([[3, 2, -1], [1, 2, -1]])

for weighting_method in ["cloud", "iidproba"]:
for contrast in ["gaussian", "laplacian", "indicator"]:
atol_vectoriser = Atol(quantiser=KMeans(n_clusters=1, random_state=202006), weighting_method=weighting_method, contrast=contrast)
atol_vectoriser.fit([a, b, c])
atol_vectoriser(a)
atol_vectoriser.transform(X=[a, b, c])

0 comments on commit 7bd8c85

Please sign in to comment.