Lightning-AI · Borda · Sep 11, 2020 · Sep 5, 2020 · Sep 5, 2020 · Sep 5, 2020
@@ -13,6 +13,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Added `LightningModule.to_torchscript` to support exporting as `ScriptModule` ([#3258](https://github.com/PyTorchLightning/pytorch-lightning/pull/3258/))
 
+- Added `EmbeddingSimilarity` metric:
+   * functional interface ([#3349](https://github.com/PyTorchLightning/pytorch-lightning/pull/3349))
+   * class based interface + tests ([#3358](https://github.com/PyTorchLightning/pytorch-lightning/pull/3358))
+
 ### Changed
 
 - Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/PyTorchLightning/pytorch-lightning/pull/3251))
@@ -134,7 +138,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed adding val step argument to metrics ([#2986](https://github.com/PyTorchLightning/pytorch-lightning/pull/2986))
 - Fixed an issue that caused `Trainer.test()` to stall in ddp mode ([#2997](https://github.com/PyTorchLightning/pytorch-lightning/pull/2997))
 - Fixed gathering of results with tensors of varying shape ([#3020](https://github.com/PyTorchLightning/pytorch-lightning/pull/3020))
-- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043)) 
+- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043))
 - Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/PyTorchLightning/pytorch-lightning/pull/3045))
 - Fixed setting device to root gpu ([#3042](https://github.com/PyTorchLightning/pytorch-lightning/pull/3042))
 

@@ -158,6 +158,12 @@ DiceCoefficient
 .. autoclass:: pytorch_lightning.metrics.classification.DiceCoefficient
     :noindex:
 
+EmbeddingSimilarity
+^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: pytorch_lightning.metrics.self_supervised.EmbeddingSimilarity
+    :noindex:
+
 F1
 ^^
 
@@ -629,4 +635,3 @@ MeanTweedieDeviance (sk)
 
 .. autofunction:: pytorch_lightning.metrics.sklearns.MeanTweedieDeviance
     :noindex:
-
@@ -1,59 +1,18 @@
-from pytorch_lightning.metrics.classification import (
-    Accuracy,
-    AveragePrecision,
-    ConfusionMatrix,
-    F1,
-    FBeta,
-    Recall,
-    ROC,
-    AUROC,
-    DiceCoefficient,
-    MulticlassPrecisionRecallCurve,
-    MulticlassROC,
-    Precision,
-    PrecisionRecallCurve,
-    IoU,
-)
 from pytorch_lightning.metrics.converters import numpy_metric, tensor_metric
-from pytorch_lightning.metrics.metric import Metric, TensorMetric, NumpyMetric
-from pytorch_lightning.metrics.nlp import BLEUScore
-from pytorch_lightning.metrics.regression import (
-    MAE,
-    MSE,
-    PSNR,
-    RMSE,
-    RMSLE,
-    SSIM
-)
-from pytorch_lightning.metrics.sklearns import (
-    AUC,
-    SklearnMetric,
-)
+from pytorch_lightning.metrics.metric import *
+from pytorch_lightning.metrics.metric import __all__ as __base_metrics
+from pytorch_lightning.metrics.classification import *
+from pytorch_lightning.metrics.classification import __all__ as __classification_metrics
+from pytorch_lightning.metrics.nlp import *
+from pytorch_lightning.metrics.nlp import __all__ as __nlp_metrics
+from pytorch_lightning.metrics.regression import *
+from pytorch_lightning.metrics.regression import __all__ as __regression_metrics
+from pytorch_lightning.metrics.self_supervised import *
+from pytorch_lightning.metrics.self_supervised import __all__ as __selfsupervised_metrics
 
-__classification_metrics = [
-    "AUC",
-    "AUROC",
-    "Accuracy",
-    "AveragePrecision",
-    "ConfusionMatrix",
-    "DiceCoefficient",
-    "F1",
-    "FBeta",
-    "MulticlassPrecisionRecallCurve",
-    "MulticlassROC",
-    "Precision",
-    "PrecisionRecallCurve",
-    "ROC",
-    "Recall",
-    "IoU",
-]
-__regression_metrics = [
-    "MAE",
-    "MSE",
-    "PSNR",
-    "RMSE",
-    "RMSLE",
-    "SSIM"
-]
-__sequence_metrics = ["BLEUScore"]
-__all__ = __regression_metrics + __classification_metrics + ["SklearnMetric"] + __sequence_metrics
+
+__all__ = __classification_metrics \
+    + __base_metrics \
+    + __nlp_metrics \
+    + __regression_metrics \
+    + __selfsupervised_metrics
diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py
@@ -823,3 +823,21 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor,
         Actual metric calculation.
         """
         return iou(y_pred, y_true, remove_bg=self.remove_bg, reduction=self.reduction)
+
+
+__all__ = [
+    "AUROC",
+    "Accuracy",
+    "AveragePrecision",
+    "ConfusionMatrix",
+    "DiceCoefficient",
+    "F1",
+    "FBeta",
+    "MulticlassPrecisionRecallCurve",
+    "MulticlassROC",
+    "Precision",
+    "PrecisionRecallCurve",
+    "ROC",
+    "Recall",
+    "IoU",
+]
@@ -40,10 +40,7 @@ def embedding_similarity(
     if reduction == 'mean':
         sqr_mtx = sqr_mtx.mean(dim=-1)
 
-    return sqr_mtx
-
+    if reduction == 'sum':
+        sqr_mtx = sqr_mtx.sum(dim=-1)
 
-if __name__ == '__main__':
-    a = torch.rand(3, 5)
-
-    print(embedding_similarity(a, 'cosine'))
+    return sqr_mtx
diff --git a/pytorch_lightning/metrics/metric.py b/pytorch_lightning/metrics/metric.py
@@ -287,3 +287,6 @@ def output_convert(self, data: Any, output: Any):
     def ddp_sync(self, data: Any, output: Any):
         return apply_to_collection(output, torch.Tensor, sync_ddp_if_available,
                                    self.reduce_group, self.reduce_op)
+
+
+__all__ = ["Metric", "TensorMetric", "NumpyMetric"]
diff --git a/pytorch_lightning/metrics/nlp.py b/pytorch_lightning/metrics/nlp.py
@@ -58,3 +58,6 @@ def forward(self, translate_corpus: list, reference_corpus: list) -> torch.Tenso
             n_gram=self.n_gram,
             smooth=self.smooth,
         ).to(self.device, self.dtype)
+
+
+__all__ = ["BLEUScore"]
diff --git a/pytorch_lightning/metrics/regression.py b/pytorch_lightning/metrics/regression.py
@@ -305,3 +305,13 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             A Tensor with SSIM score.
         """
         return ssim(pred, target, self.kernel_size, self.sigma, self.reduction, self.data_range, self.k1, self.k2)
+
+
+__all__ = [
+    "MAE",
+    "MSE",
+    "PSNR",
+    "RMSE",
+    "RMSLE",
+    "SSIM"
+]
@@ -0,0 +1,76 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+import torch
+
+from pytorch_lightning.metrics.functional.self_supervised import embedding_similarity
+from pytorch_lightning.metrics.metric import TensorMetric
+
+
+class EmbeddingSimilarity(TensorMetric):
+    """
+    Computes similarity between embeddings
+
+    Example:
+        >>> embeddings = torch.tensor([[1., 2., 3., 4.], [1., 2., 3., 4.], [4., 5., 6., 7.]])
+        >>> embedding_similarity(embeddings)
+        tensor([[0.0000, 1.0000, 0.9759],
+                [1.0000, 0.0000, 0.9759],
+                [0.9759, 0.9759, 0.0000]])
+
+    """
+    def __init__(
+            self,
+            similarity: str = 'cosine',
+            zero_diagonal: bool = True,
+            reduction: str = 'mean',
+            reduce_group: Any = None
+    ):
+        """
+        Args:
+            similarity: 'dot' or 'cosine'
+            reduction: 'none', 'sum', 'mean' (all along dim -1)
+            zero_diagonal: if True, the diagonals are set to zero
+            reduce_group: the process group to reduce metric results from DDP
+
+        """
+        super().__init__(name='embedding_similarity',
+                         reduce_group=reduce_group)
+        assert similarity in ('dot', 'cosine')
+        self.similarity = similarity
+        isinstance(zero_diagonal, bool)
+        self.zero_diagonal = zero_diagonal
+        asser reduction in ('none', 'sum', 'mean')
+        self.reduction = reduction
+
+    def forward(self, batch: torch.Tensor) -> torch.Tensor:
+        """
+        Actual metric computation
+
+        Args:
+            batch: tensor containing embeddings with shape (batch_size, dim)
+
+        Return:
+            A square matrix (batch, batch) with the similarity scores between all elements
+            If sum or mean are used, then returns (b, 1) with the reduced value for each row
+        """
+        return embedding_similarity(batch,
+                                    similarity=self.similarity,
+                                    zero_diagonal=self.zero_diagonal,
+                                    reduction=self.reduction)
+
+
+__all__ = ['EmbeddingSimilarity']
@@ -0,0 +1,35 @@
+import pytest
+import torch
+from sklearn.metrics import pairwise
+
+from pytorch_lightning.metrics.functional.self_supervised import embedding_similarity
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'dot'])
+@pytest.mark.parametrize('reduction', ['none', 'mean', 'sum'])
+def test_against_sklearn(similarity, reduction):
+    """Compare PL metrics to sklearn version."""
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+    batch = torch.randn(5, 10, device=device)  # 100 samples in 10 dimensions
+
+    pl_dist = embedding_similarity(batch, similarity=similarity,
+                                   reduction=reduction, zero_diagonal=False)
+
+    def sklearn_embedding_distance(batch, similarity, reduction):
+
+        metric_func = {'cosine': pairwise.cosine_similarity,
+                       'dot': pairwise.linear_kernel}[similarity]
+
+        dist = metric_func(batch, batch)
+        if reduction == 'mean':
+            return dist.mean(axis=-1)
+        if reduction == 'sum':
+            return dist.sum(axis=-1)
+        return dist
+
+    sk_dist = sklearn_embedding_distance(batch.cpu().detach().numpy(),
+                                         similarity=similarity, reduction=reduction)
+    sk_dist = torch.tensor(sk_dist, dtype=torch.float, device=device)
+
+    assert torch.allclose(sk_dist, pl_dist)