Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Metrics] class based embedding similarity + tests #3358

Merged
merged 16 commits into from
Sep 11, 2020
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `LightningModule.to_torchscript` to support exporting as `ScriptModule` ([#3258](https://github.com/PyTorchLightning/pytorch-lightning/pull/3258/))

- Added `EmbeddingSimilarity` metric:
* functional interface ([#3349](https://github.com/PyTorchLightning/pytorch-lightning/pull/3349))
* class based interface + tests ([#3358](https://github.com/PyTorchLightning/pytorch-lightning/pull/3358))

### Changed

- Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/PyTorchLightning/pytorch-lightning/pull/3251))
Expand Down Expand Up @@ -134,7 +138,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed adding val step argument to metrics ([#2986](https://github.com/PyTorchLightning/pytorch-lightning/pull/2986))
- Fixed an issue that caused `Trainer.test()` to stall in ddp mode ([#2997](https://github.com/PyTorchLightning/pytorch-lightning/pull/2997))
- Fixed gathering of results with tensors of varying shape ([#3020](https://github.com/PyTorchLightning/pytorch-lightning/pull/3020))
- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043))
- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/PyTorchLightning/pytorch-lightning/pull/3043))
- Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/PyTorchLightning/pytorch-lightning/pull/3045))
- Fixed setting device to root gpu ([#3042](https://github.com/PyTorchLightning/pytorch-lightning/pull/3042))

Expand Down
7 changes: 6 additions & 1 deletion docs/source/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ DiceCoefficient
.. autoclass:: pytorch_lightning.metrics.classification.DiceCoefficient
:noindex:

EmbeddingSimilarity
^^^^^^^^^^^^^^^^^^^

.. autoclass:: pytorch_lightning.metrics.self_supervised.EmbeddingSimilarity
:noindex:

F1
^^

Expand Down Expand Up @@ -629,4 +635,3 @@ MeanTweedieDeviance (sk)

.. autofunction:: pytorch_lightning.metrics.sklearns.MeanTweedieDeviance
:noindex:

73 changes: 16 additions & 57 deletions pytorch_lightning/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,18 @@
from pytorch_lightning.metrics.classification import (
Accuracy,
AveragePrecision,
ConfusionMatrix,
F1,
FBeta,
Recall,
ROC,
AUROC,
DiceCoefficient,
MulticlassPrecisionRecallCurve,
MulticlassROC,
Precision,
PrecisionRecallCurve,
IoU,
)
from pytorch_lightning.metrics.converters import numpy_metric, tensor_metric
from pytorch_lightning.metrics.metric import Metric, TensorMetric, NumpyMetric
from pytorch_lightning.metrics.nlp import BLEUScore
from pytorch_lightning.metrics.regression import (
MAE,
MSE,
PSNR,
RMSE,
RMSLE,
SSIM
)
from pytorch_lightning.metrics.sklearns import (
AUC,
SklearnMetric,
)
from pytorch_lightning.metrics.metric import *
from pytorch_lightning.metrics.metric import __all__ as __base_metrics
from pytorch_lightning.metrics.classification import *
from pytorch_lightning.metrics.classification import __all__ as __classification_metrics
from pytorch_lightning.metrics.nlp import *
from pytorch_lightning.metrics.nlp import __all__ as __nlp_metrics
from pytorch_lightning.metrics.regression import *
from pytorch_lightning.metrics.regression import __all__ as __regression_metrics
from pytorch_lightning.metrics.self_supervised import *
from pytorch_lightning.metrics.self_supervised import __all__ as __selfsupervised_metrics

__classification_metrics = [
"AUC",
"AUROC",
"Accuracy",
"AveragePrecision",
"ConfusionMatrix",
"DiceCoefficient",
"F1",
"FBeta",
"MulticlassPrecisionRecallCurve",
"MulticlassROC",
"Precision",
"PrecisionRecallCurve",
"ROC",
"Recall",
"IoU",
]
__regression_metrics = [
"MAE",
"MSE",
"PSNR",
"RMSE",
"RMSLE",
"SSIM"
]
__sequence_metrics = ["BLEUScore"]
__all__ = __regression_metrics + __classification_metrics + ["SklearnMetric"] + __sequence_metrics

__all__ = __classification_metrics \
+ __base_metrics \
+ __nlp_metrics \
+ __regression_metrics \
+ __selfsupervised_metrics
18 changes: 18 additions & 0 deletions pytorch_lightning/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,3 +823,21 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor,
Actual metric calculation.
"""
return iou(y_pred, y_true, remove_bg=self.remove_bg, reduction=self.reduction)


__all__ = [
"AUROC",
"Accuracy",
"AveragePrecision",
"ConfusionMatrix",
"DiceCoefficient",
"F1",
"FBeta",
"MulticlassPrecisionRecallCurve",
"MulticlassROC",
"Precision",
"PrecisionRecallCurve",
"ROC",
"Recall",
"IoU",
]
9 changes: 3 additions & 6 deletions pytorch_lightning/metrics/functional/self_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ def embedding_similarity(
if reduction == 'mean':
sqr_mtx = sqr_mtx.mean(dim=-1)

return sqr_mtx

if reduction == 'sum':
sqr_mtx = sqr_mtx.sum(dim=-1)

if __name__ == '__main__':
a = torch.rand(3, 5)

print(embedding_similarity(a, 'cosine'))
return sqr_mtx
3 changes: 3 additions & 0 deletions pytorch_lightning/metrics/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,6 @@ def output_convert(self, data: Any, output: Any):
def ddp_sync(self, data: Any, output: Any):
return apply_to_collection(output, torch.Tensor, sync_ddp_if_available,
self.reduce_group, self.reduce_op)


__all__ = ["Metric", "TensorMetric", "NumpyMetric"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not a very common place for __all__

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

codefactor also complains about this, not sure why. I think it should be fine though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the point is that it is very related to importing from packages when you do not want to import all functions
https://stackoverflow.com/questions/44834/can-someone-explain-all-in-python

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it is not a common place for __all__, normally I would put it at the top of the file, but then codefactor complains about it. But I can move it to the top if that is better.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it is not very common to have it in other files than __init__ so was there a reason to move it from the init?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was changed due to a comment for justus at some point, but lets change it back since it is very uncommon practise

3 changes: 3 additions & 0 deletions pytorch_lightning/metrics/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ def forward(self, translate_corpus: list, reference_corpus: list) -> torch.Tenso
n_gram=self.n_gram,
smooth=self.smooth,
).to(self.device, self.dtype)


__all__ = ["BLEUScore"]
10 changes: 10 additions & 0 deletions pytorch_lightning/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,13 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
A Tensor with SSIM score.
"""
return ssim(pred, target, self.kernel_size, self.sigma, self.reduction, self.data_range, self.k1, self.k2)


__all__ = [
"MAE",
"MSE",
"PSNR",
"RMSE",
"RMSLE",
"SSIM"
]
76 changes: 76 additions & 0 deletions pytorch_lightning/metrics/self_supervised.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any

import torch

from pytorch_lightning.metrics.functional.self_supervised import embedding_similarity
from pytorch_lightning.metrics.metric import TensorMetric


SkafteNicki marked this conversation as resolved.
Show resolved Hide resolved
class EmbeddingSimilarity(TensorMetric):
"""
Computes similarity between embeddings

Example:
>>> embeddings = torch.tensor([[1., 2., 3., 4.], [1., 2., 3., 4.], [4., 5., 6., 7.]])
>>> embedding_similarity(embeddings)
tensor([[0.0000, 1.0000, 0.9759],
[1.0000, 0.0000, 0.9759],
[0.9759, 0.9759, 0.0000]])

"""
def __init__(
self,
similarity: str = 'cosine',
zero_diagonal: bool = True,
reduction: str = 'mean',
reduce_group: Any = None
):
"""
Args:
similarity: 'dot' or 'cosine'
reduction: 'none', 'sum', 'mean' (all along dim -1)
zero_diagonal: if True, the diagonals are set to zero
reduce_group: the process group to reduce metric results from DDP

"""
super().__init__(name='embedding_similarity',
reduce_group=reduce_group)
assert similarity in ('dot', 'cosine')
self.similarity = similarity
Borda marked this conversation as resolved.
Show resolved Hide resolved
isinstance(zero_diagonal, bool)
self.zero_diagonal = zero_diagonal
Borda marked this conversation as resolved.
Show resolved Hide resolved
asser reduction in ('none', 'sum', 'mean')
awaelchli marked this conversation as resolved.
Show resolved Hide resolved
self.reduction = reduction

def forward(self, batch: torch.Tensor) -> torch.Tensor:
"""
Actual metric computation

Args:
batch: tensor containing embeddings with shape (batch_size, dim)

Return:
A square matrix (batch, batch) with the similarity scores between all elements
If sum or mean are used, then returns (b, 1) with the reduced value for each row
"""
return embedding_similarity(batch,
similarity=self.similarity,
zero_diagonal=self.zero_diagonal,
reduction=self.reduction)


__all__ = ['EmbeddingSimilarity']
35 changes: 35 additions & 0 deletions tests/metrics/functional/test_self_supervised.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
import torch
from sklearn.metrics import pairwise

from pytorch_lightning.metrics.functional.self_supervised import embedding_similarity


@pytest.mark.parametrize('similarity', ['cosine', 'dot'])
@pytest.mark.parametrize('reduction', ['none', 'mean', 'sum'])
def test_against_sklearn(similarity, reduction):
"""Compare PL metrics to sklearn version."""
device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch = torch.randn(5, 10, device=device) # 100 samples in 10 dimensions

pl_dist = embedding_similarity(batch, similarity=similarity,
reduction=reduction, zero_diagonal=False)

def sklearn_embedding_distance(batch, similarity, reduction):

metric_func = {'cosine': pairwise.cosine_similarity,
'dot': pairwise.linear_kernel}[similarity]

dist = metric_func(batch, batch)
if reduction == 'mean':
return dist.mean(axis=-1)
if reduction == 'sum':
return dist.sum(axis=-1)
return dist

sk_dist = sklearn_embedding_distance(batch.cpu().detach().numpy(),
similarity=similarity, reduction=reduction)
sk_dist = torch.tensor(sk_dist, dtype=torch.float, device=device)

assert torch.allclose(sk_dist, pl_dist)