Skip to content

Commit

Permalink
Merge pull request #2711 from tomaarsen/model_cards/datasets_metadata
Browse files Browse the repository at this point in the history
[`model cards`] Also include HF datasets in the model card metadata
  • Loading branch information
tomaarsen authored Jun 4, 2024
2 parents 1e72d91 + 9aef3c4 commit 2224477
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions sentence_transformers/model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ class SentenceTransformerModelCardData(CardData):
citations: Dict[str, str] = field(default_factory=dict, init=False)
best_model_step: Optional[int] = field(default=None, init=False)
trainer: Optional["SentenceTransformerTrainer"] = field(default=None, init=False, repr=False)
datasets: List[str] = field(default_factory=list, init=False, repr=False)

# Utility fields
first_save: bool = field(default=True, init=False)
Expand Down Expand Up @@ -357,6 +358,10 @@ def validate_datasets(self, dataset_list, infer_languages: bool = True) -> None:
if language not in self.language:
self.language.append(language)

# Track dataset IDs for the metadata
if info.id not in self.datasets:
self.datasets.append(info.id)

output_dataset_list.append(dataset)
return output_dataset_list

Expand Down

0 comments on commit 2224477

Please sign in to comment.