From 9b1c33f914dd11ef33177f01340c15b72cbd1c1f Mon Sep 17 00:00:00 2001 From: Khalid Alnajjar <2656139+mokha@users.noreply.github.com> Date: Mon, 18 Dec 2023 16:16:40 +0200 Subject: [PATCH] Accept word2vec formats too. (#1875) * Acccept word2vec formats too. * Remove spaces in otherwise empty line --------- Co-authored-by: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> --- sentence_transformers/models/WordEmbeddings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sentence_transformers/models/WordEmbeddings.py b/sentence_transformers/models/WordEmbeddings.py index 46f52d013..f0f909b67 100644 --- a/sentence_transformers/models/WordEmbeddings.py +++ b/sentence_transformers/models/WordEmbeddings.py @@ -103,6 +103,10 @@ def from_text_file(embeddings_file_path: str, update_embeddings: bool = False, i iterator = tqdm(fIn, desc="Load Word Embeddings", unit="Embeddings") for line in iterator: split = line.rstrip().split(item_separator) + + if not vocab and len(split) == 2: # Handle Word2vec format + continue + word = split[0] if embeddings_dimension == None: