Skip to content

Commit

Permalink
feat: Cleaned up scripts folder (#1144)
Browse files Browse the repository at this point in the history
* fix: Move benchmarks from script to benchmarks.py

- A few of the benchmarks weren't implemented in the benchmark list. The best choice seem to move them benchmarks and remove the script (there were already a few issues). To ensure that they are maintained with the rest.
- Removed trivial (e.g. running one task) or outdated examples. Removing them ensure that we have less to maintain.
- moved mteb specific script into a mteb-folder.

* format

* fix: Convert arbitrary imports to absolute imports. (#1145)

This avoids loading in classes as modules (see PR ...)

* restructe scripts folder
  • Loading branch information
KennethEnevoldsen authored Aug 12, 2024
1 parent 0f562c2 commit ebe6def
Show file tree
Hide file tree
Showing 243 changed files with 482 additions and 1,003,658 deletions.
168 changes: 166 additions & 2 deletions mteb/benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Sequence

from mteb.abstasks.AbsTask import AbsTask
from mteb.overview import get_tasks


@dataclass
class Benchmark:
name: str
tasks: list[str]
tasks: Sequence[str] | Sequence[AbsTask]
description: str | None = None
reference: str | None = None
citation: str | None = None
Expand Down Expand Up @@ -197,7 +201,6 @@ def __getitem__(self, index):
}
""",
)

SEB = Benchmark(
name="MTEB(Scandinavian)",
tasks=[
Expand Down Expand Up @@ -269,3 +272,164 @@ def __getitem__(self, index):
url={https://arxiv.org/abs/2407.02883},
}""",
)

MTEB_FRA = Benchmark(
name="MTEB(fra)",
tasks=get_tasks(
languages=["fra"],
tasks=[
# Classification
"AmazonReviewsClassification",
"MasakhaNEWSClassification",
"MassiveIntentClassification",
"MassiveScenarioClassification",
"MTOPDomainClassification",
"MTOPIntentClassification",
# Clustering
"AlloProfClusteringP2P",
"AlloProfClusteringS2S",
"HALClusteringS2S",
"MasakhaNEWSClusteringP2P",
"MasakhaNEWSClusteringS2S",
"MLSUMClusteringP2P",
"MLSUMClusteringS2S",
# Pair Classification
"OpusparcusPC",
"PawsXPairClassification",
# Reranking
"SyntecReranking",
"AlloprofReranking",
# Retrieval
"AlloprofRetrieval",
"BSARDRetrieval",
"SyntecRetrieval",
"XPQARetrieval",
"MintakaRetrieval",
# STS
"SummEvalFr",
"STSBenchmarkMultilingualSTS",
"STS22",
"SICKFr",
],
),
description="Main French benchmarks from MTEB",
reference="https://arxiv.org/abs/2405.20468",
citation="""@misc{ciancone2024mtebfrenchresourcesfrenchsentence,
title={MTEB-French: Resources for French Sentence Embedding Evaluation and Analysis},
author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini},
year={2024},
eprint={2405.20468},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2405.20468},
}""",
)


MTEB_DEU = Benchmark(
name="MTEB(deu)",
tasks=get_tasks(
languages=["deu"],
tasks=[
# Classification
"AmazonCounterfactualClassification",
"AmazonReviewsClassification",
"MTOPDomainClassification",
"MTOPIntentClassification",
"MassiveIntentClassification",
"MassiveScenarioClassification",
# Clustering
"BlurbsClusteringP2P",
"BlurbsClusteringS2S",
"TenKGnadClusteringP2P",
"TenKGnadClusteringS2S",
# Pair Classification
"FalseFriendsGermanEnglish",
"PawsXPairClassification",
# Reranking
"MIRACLReranking",
# Retrieval
"GermanQuAD-Retrieval",
"GermanDPR",
"XMarket",
"GerDaLIR",
# STS
"GermanSTSBenchmark",
"STS22",
],
),
description="Main German benchmarks from MTEB",
reference="https://arxiv.org/html/2401.02709v1",
citation="""@misc{wehrli2024germantextembeddingclustering,
title={German Text Embedding Clustering Benchmark},
author={Silvan Wehrli and Bert Arnrich and Christopher Irrgang},
year={2024},
eprint={2401.02709},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2401.02709},
}""",
)


MTEB_KOR = Benchmark(
name="MTEB(kor)",
tasks=get_tasks(
languages=["kor"],
tasks=[ # @KennethEnevoldsen: We could probably expand this to a more solid benchamrk, but for now I have left it as is.
# Classification
"KLUE-TC",
# Reranking
"MIRACLReranking",
# Retrieval
"MIRACLRetrieval",
"Ko-StrategyQA",
# STS
"KLUE-STS",
"KorSTS",
],
),
description="Main Korean benchmarks from MTEB",
reference=None,
citation=None,
)


MTEB_pol = Benchmark(
name="MTEB(pol)",
tasks=get_tasks(
languages=["pol"],
tasks=[
# Classification
"CBD",
"PolEmo2.0-IN",
"PolEmo2.0-OUT",
"AllegroReviews",
"PAC",
"MassiveIntentClassification",
"MassiveScenarioClassification",
# Clustering
"EightTagsClustering",
"PlscClusteringS2S",
"PlscClusteringP2P",
# Pair Classification
"SICK-E-PL",
"PpcPC",
"CDSC-E",
"PSC",
# STS
"SICK-R-PL",
"CDSC-R",
"STS22",
"STSBenchmarkMultilingualSTS",
],
),
description="Main Polish benchmarks from MTEB",
reference="https://arxiv.org/abs/2405.10138",
citation="""@article{poswiata2024plmteb,
title={PL-MTEB: Polish Massive Text Embedding Benchmark},
author={Rafał Poświata and Sławomir Dadas and Michał Perełkiewicz},
journal={arXiv preprint arXiv:2405.10138},
year={2024}
}""",
)
4 changes: 2 additions & 2 deletions mteb/models/google_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def __init__(self, model_name: str, sep: str = " ", **kwargs) -> None:
def _embed(
self,
texts: list[str],
google_task_type: str = None,
google_task_type: str | None = None,
titles: list[str] | None = None,
dimensionality: Optional[int] = 768,
dimensionality: int | None = 768,
) -> list[list[float]]:
"""Embeds texts with a pre-trained, foundational model.
From https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#generative-ai-get-text-embedding-python_vertex_ai_sdk
Expand Down
2 changes: 2 additions & 0 deletions mteb/models/gte_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from functools import partial

from mteb.model_meta import ModelMeta
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/BitextMining/dan/BornholmskBitextMining.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from mteb.abstasks import AbsTaskBitextMining
from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.TaskMetadata import TaskMetadata


Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from datasets import DatasetDict, load_dataset

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"kat_Geor": "ka",
"eng_Latn": "en",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"de-en": ["deu-Latn", "eng-Latn"],
"fr-en": ["fra-Latn", "eng-Latn"],
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/BUCCBitextMiningFast.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"de-en": ["deu-Latn", "eng-Latn"],
"fr-en": ["fra-Latn", "eng-Latn"],
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = [
"aai_Latn", # Apinayé
"aak_Arab", # Ankave
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask


class DiaBLaBitextMining(AbsTaskBitextMining, MultilingualTask):
metadata = TaskMetadata(
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/FloresBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = [
"ace_Arab",
"bam_Latn",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = [
"asm_Beng",
"ben_Beng",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = [
"asm_Beng",
"ben_Beng",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"ar-en": ["ara-Arab", "eng-Latn"],
"de-en": ["deu-Latn", "eng-Latn"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import datasets

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = [
"ben-Beng",
"guj_Gujr",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/multilingual/LinceMTBitextMining.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"eng-eng_hin": ["eng-Latn", "hin-Latn"],
}
Expand Down
3 changes: 2 additions & 1 deletion mteb/tasks/BitextMining/multilingual/NTREXBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import datasets

from mteb.abstasks import AbsTaskBitextMining, MultilingualTask
from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

_BRIDGE_LANGUAGES = (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"en-ha": ["eng-Latn", "hau-Latn"],
"en-ig": ["eng-Latn", "ibo-Latn"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from mteb.abstasks import AbsTaskBitextMining
from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.TaskMetadata import TaskMetadata


Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining
from mteb.abstasks.MultilingualTask import MultilingualTask
from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskBitextMining, MultilingualTask

_LANGUAGES = {
"ind-abs": ["ind-Latn", "abs-Latn"],
"ind-btk": ["ind-Latn", "bbc-Latn"],
Expand Down
Loading

0 comments on commit ebe6def

Please sign in to comment.