From d65d6949a3a435b9596766928e7e03754cd17b2a Mon Sep 17 00:00:00 2001 From: Marlon May <77202149+Marlon154@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:00:41 +0200 Subject: [PATCH 1/2] Add a progress bar for community detection --- sentence_transformers/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentence_transformers/util.py b/sentence_transformers/util.py index 6361ec9a5..4aec3a555 100644 --- a/sentence_transformers/util.py +++ b/sentence_transformers/util.py @@ -343,7 +343,7 @@ def import_from_string(dotted_path): raise ImportError(msg) -def community_detection(embeddings, threshold=0.75, min_community_size=10, batch_size=1024): +def community_detection(embeddings, threshold=0.75, min_community_size=10, batch_size=1024, show_progress_bar=False): """ Function for Fast Community Detection Finds in the embeddings all communities, i.e. embeddings that are close (closer than threshold). @@ -361,7 +361,7 @@ def community_detection(embeddings, threshold=0.75, min_community_size=10, batch min_community_size = min(min_community_size, len(embeddings)) sort_max_size = min(max(2 * min_community_size, 50), len(embeddings)) - for start_idx in range(0, len(embeddings), batch_size): + for start_idx in tqdm(range(0, len(embeddings), batch_size), desc="Find Cluster", disable=not show_progress_bar): # Compute cosine similarity scores cos_scores = cos_sim(embeddings[start_idx:start_idx + batch_size], embeddings) From a8ae01099bd860a0d1006edb0cd01b2b93efeaab Mon Sep 17 00:00:00 2001 From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> Date: Tue, 12 Dec 2023 09:00:54 +0100 Subject: [PATCH 2/2] Find Cluster -> Finding clusters --- sentence_transformers/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentence_transformers/util.py b/sentence_transformers/util.py index 4aec3a555..e649ceb49 100644 --- a/sentence_transformers/util.py +++ b/sentence_transformers/util.py @@ -361,7 +361,7 @@ def community_detection(embeddings, threshold=0.75, min_community_size=10, batch min_community_size = min(min_community_size, len(embeddings)) sort_max_size = min(max(2 * min_community_size, 50), len(embeddings)) - for start_idx in tqdm(range(0, len(embeddings), batch_size), desc="Find Cluster", disable=not show_progress_bar): + for start_idx in tqdm(range(0, len(embeddings), batch_size), desc="Finding clusters", disable=not show_progress_bar): # Compute cosine similarity scores cos_scores = cos_sim(embeddings[start_idx:start_idx + batch_size], embeddings)