From 7aab2d1cdcd0c41b0ad7352d92c8b05c07dafc3d Mon Sep 17 00:00:00 2001 From: Yash Pratap Solanky <101447028+ysolanky@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:56:59 -0500 Subject: [PATCH] fastembedder-fix (#1745) ## Description Updated `get_embedding_and_usage` function of `FastEmbedEmbedder`. Fixes #1715 ## Type of change Please check the options that are relevant: - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Model update - [ ] Infrastructure change ## Checklist - [ ] My code follows Phidata's style guidelines and best practices - [ ] I have performed a self-review of my code - [ ] I have added docstrings and comments for complex logic - [ ] My changes generate no new warnings or errors - [ ] I have added cookbook examples for my new addition (if needed) - [ ] I have updated requirements.txt/pyproject.toml (if needed) - [ ] I have verified my changes in a clean environment Notes: `FastEmbedEmbedder` currently does not provide usage information --- cookbook/assistants/llms/groq/video_summary/app.py | 4 ++-- cookbook/assistants/llms/ollama/video_summary/app.py | 4 ++-- cookbook/assistants/tools/hackernews.py | 4 +--- cookbook/teams/02_news_reporter.py | 3 +-- cookbook/tools/hackernews.py | 4 +--- cookbook/workflows/content_creator_workflow/prompts.py | 2 +- phi/agent/agent.py | 3 +-- phi/assistant/assistant.py | 6 ++---- phi/cli/config.py | 2 +- phi/document/chunking/agentic.py | 2 +- phi/embedder/fastembed.py | 6 +++++- phi/memory/classifier.py | 3 +-- phi/memory/manager.py | 3 +-- phi/memory/summarizer.py | 3 +-- phi/tools/decorator.py | 2 +- phi/tools/github.py | 6 +++--- phi/tools/wikipedia.py | 2 +- phi/vectordb/chroma/chromadb.py | 2 +- phi/vectordb/milvus/milvus.py | 2 +- phi/vectordb/qdrant/qdrant.py | 2 +- 20 files changed, 29 insertions(+), 36 deletions(-) diff --git a/cookbook/assistants/llms/groq/video_summary/app.py b/cookbook/assistants/llms/groq/video_summary/app.py index bc4580d31f..42b4d8ea82 100644 --- a/cookbook/assistants/llms/groq/video_summary/app.py +++ b/cookbook/assistants/llms/groq/video_summary/app.py @@ -89,7 +89,7 @@ def main() -> None: if num_chunks > 1: chunk_summaries = [] for i in range(num_chunks): - with st.status(f"Summarizing chunk: {i+1}", expanded=False) as status: + with st.status(f"Summarizing chunk: {i + 1}", expanded=False) as status: chunk_summary = "" chunk_container = st.empty() chunk_summarizer = get_chunk_summarizer(model=llm_model) @@ -99,7 +99,7 @@ def main() -> None: chunk_summary += delta # type: ignore chunk_container.markdown(chunk_summary) chunk_summaries.append(chunk_summary) - status.update(label=f"Chunk {i+1} summarized", state="complete", expanded=False) + status.update(label=f"Chunk {i + 1} summarized", state="complete", expanded=False) with st.spinner("Generating Summary"): summary = "" diff --git a/cookbook/assistants/llms/ollama/video_summary/app.py b/cookbook/assistants/llms/ollama/video_summary/app.py index 135a1f781c..9049e23305 100644 --- a/cookbook/assistants/llms/ollama/video_summary/app.py +++ b/cookbook/assistants/llms/ollama/video_summary/app.py @@ -90,7 +90,7 @@ def main() -> None: if num_chunks > 1: chunk_summaries = [] for i in range(num_chunks): - with st.status(f"Summarizing chunk: {i+1}", expanded=False) as status: + with st.status(f"Summarizing chunk: {i + 1}", expanded=False) as status: chunk_summary = "" chunk_container = st.empty() chunk_summarizer = get_chunk_summarizer(model=llm_model) @@ -100,7 +100,7 @@ def main() -> None: chunk_summary += delta # type: ignore chunk_container.markdown(chunk_summary) chunk_summaries.append(chunk_summary) - status.update(label=f"Chunk {i+1} summarized", state="complete", expanded=False) + status.update(label=f"Chunk {i + 1} summarized", state="complete", expanded=False) with st.spinner("Generating Summary"): summary = "" diff --git a/cookbook/assistants/tools/hackernews.py b/cookbook/assistants/tools/hackernews.py index 3a65c2faf9..8cbd6bba1d 100644 --- a/cookbook/assistants/tools/hackernews.py +++ b/cookbook/assistants/tools/hackernews.py @@ -10,7 +10,5 @@ # debug_mode=True, ) hn_assistant.print_response( - "Write an engaging summary of the " - "users with the top 2 stories on hackernews. " - "Please mention the stories as well.", + "Write an engaging summary of the users with the top 2 stories on hackernews. Please mention the stories as well.", ) diff --git a/cookbook/teams/02_news_reporter.py b/cookbook/teams/02_news_reporter.py index 65f73c2351..6e813b90d6 100644 --- a/cookbook/teams/02_news_reporter.py +++ b/cookbook/teams/02_news_reporter.py @@ -18,8 +18,7 @@ role="Searches the top URLs for a topic", instructions=[ "Given a topic, first generate a list of 3 search terms related to that topic.", - "For each search term, search the web and analyze the results." - "Return the 10 most relevant URLs to the topic.", + "For each search term, search the web and analyze the results.Return the 10 most relevant URLs to the topic.", "You are writing for the New York Times, so the quality of the sources is important.", ], tools=[DuckDuckGo()], diff --git a/cookbook/tools/hackernews.py b/cookbook/tools/hackernews.py index 6667ee6935..079cdbc0b4 100644 --- a/cookbook/tools/hackernews.py +++ b/cookbook/tools/hackernews.py @@ -8,7 +8,5 @@ markdown=True, ) agent.print_response( - "Write an engaging summary of the " - "users with the top 2 stories on hackernews. " - "Please mention the stories as well.", + "Write an engaging summary of the users with the top 2 stories on hackernews. Please mention the stories as well.", ) diff --git a/cookbook/workflows/content_creator_workflow/prompts.py b/cookbook/workflows/content_creator_workflow/prompts.py index 9447ccc7f0..7c405102c6 100644 --- a/cookbook/workflows/content_creator_workflow/prompts.py +++ b/cookbook/workflows/content_creator_workflow/prompts.py @@ -102,7 +102,7 @@ "and business leaders while maintaining technical accuracy.\n\n" ), "expected_output": ( - "A LinkedIn post plan containing:\n" "- content\n" "- a main blog URL that is associated with the post\n\n" + "A LinkedIn post plan containing:\n- content\n- a main blog URL that is associated with the post\n\n" ), }, } diff --git a/phi/agent/agent.py b/phi/agent/agent.py index c172de1b12..6411f3600d 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -493,8 +493,7 @@ def update_model(self) -> None: except ModuleNotFoundError as e: logger.exception(e) logger.error( - "phidata uses `openai` as the default model provider. " - "Please provide a `model` or install `openai`." + "phidata uses `openai` as the default model provider. Please provide a `model` or install `openai`." ) exit(1) self.model = OpenAIChat() # We default to OpenAIChat as a base model diff --git a/phi/assistant/assistant.py b/phi/assistant/assistant.py index 040cd8019f..9725443588 100644 --- a/phi/assistant/assistant.py +++ b/phi/assistant/assistant.py @@ -275,9 +275,7 @@ def update_llm(self) -> None: from phi.llm.openai import OpenAIChat except ModuleNotFoundError as e: logger.exception(e) - logger.error( - "phidata uses `openai` as the default LLM. " "Please provide an `llm` or install `openai`." - ) + logger.error("phidata uses `openai` as the default LLM. Please provide an `llm` or install `openai`.") exit(1) self.llm = OpenAIChat() @@ -662,7 +660,7 @@ def get_system_prompt(self) -> Optional[str]: ) ) for i, instruction in enumerate(instructions): - system_prompt_lines.append(f"{i+1}. {instruction}") + system_prompt_lines.append(f"{i + 1}. {instruction}") system_prompt_lines.append("") # The add the expected output to the system prompt diff --git a/phi/cli/config.py b/phi/cli/config.py index 72b5780773..744d59b85d 100644 --- a/phi/cli/config.py +++ b/phi/cli/config.py @@ -269,7 +269,7 @@ def print_to_cli(self, show_all: bool = False): else: print_info("No active workspace found.") print_info( - "Please create a workspace using `phi ws create` " "or setup existing workspace using `phi ws setup`" + "Please create a workspace using `phi ws create` or setup existing workspace using `phi ws setup`" ) if show_all and len(self.ws_config_map) > 0: diff --git a/phi/document/chunking/agentic.py b/phi/document/chunking/agentic.py index e9b5ef7be8..7cd1970ab4 100644 --- a/phi/document/chunking/agentic.py +++ b/phi/document/chunking/agentic.py @@ -30,7 +30,7 @@ def chunk(self, document: Document) -> List[Document]: Consider semantic completeness, paragraph boundaries, and topic transitions. Return only the character position number of where to break the text: - {remaining_text[:self.max_chunk_size]}""" + {remaining_text[: self.max_chunk_size]}""" try: response = self.model.response([Message(role="user", content=prompt)]) diff --git a/phi/embedder/fastembed.py b/phi/embedder/fastembed.py index 8a02c2a5d5..f74bb1749e 100644 --- a/phi/embedder/fastembed.py +++ b/phi/embedder/fastembed.py @@ -27,4 +27,8 @@ def get_embedding(self, text: str) -> List[float]: return [] def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]: - return super().get_embedding_and_usage(text) + embedding = self.get_embedding(text=text) + # Currently, FastEmbed does not provide usage information + usage = None + + return embedding, usage diff --git a/phi/memory/classifier.py b/phi/memory/classifier.py index fc12ad226a..001a842a75 100644 --- a/phi/memory/classifier.py +++ b/phi/memory/classifier.py @@ -23,8 +23,7 @@ def update_model(self) -> None: except ModuleNotFoundError as e: logger.exception(e) logger.error( - "phidata uses `openai` as the default model provider. " - "Please provide a `model` or install `openai`." + "phidata uses `openai` as the default model provider. Please provide a `model` or install `openai`." ) exit(1) self.model = OpenAIChat() diff --git a/phi/memory/manager.py b/phi/memory/manager.py index 53dc4f4333..60ecab8316 100644 --- a/phi/memory/manager.py +++ b/phi/memory/manager.py @@ -31,8 +31,7 @@ def update_model(self) -> None: except ModuleNotFoundError as e: logger.exception(e) logger.error( - "phidata uses `openai` as the default model provider. " - "Please provide a `model` or install `openai`." + "phidata uses `openai` as the default model provider. Please provide a `model` or install `openai`." ) exit(1) self.model = OpenAIChat() diff --git a/phi/memory/summarizer.py b/phi/memory/summarizer.py index 6b4799addd..5820d02409 100644 --- a/phi/memory/summarizer.py +++ b/phi/memory/summarizer.py @@ -21,8 +21,7 @@ def update_model(self) -> None: except ModuleNotFoundError as e: logger.exception(e) logger.error( - "phidata uses `openai` as the default model provider. " - "Please provide a `model` or install `openai`." + "phidata uses `openai` as the default model provider. Please provide a `model` or install `openai`." ) exit(1) self.model = OpenAIChat() diff --git a/phi/tools/decorator.py b/phi/tools/decorator.py index 0b612b4c7a..04a13f7dba 100644 --- a/phi/tools/decorator.py +++ b/phi/tools/decorator.py @@ -74,7 +74,7 @@ def another_function(): invalid_kwargs = set(kwargs.keys()) - VALID_KWARGS if invalid_kwargs: raise ValueError( - f"Invalid tool configuration arguments: {invalid_kwargs}. " f"Valid arguments are: {sorted(VALID_KWARGS)}" + f"Invalid tool configuration arguments: {invalid_kwargs}. Valid arguments are: {sorted(VALID_KWARGS)}" ) def decorator(func: F) -> Function: diff --git a/phi/tools/github.py b/phi/tools/github.py index d69a86936e..cb7f1bf313 100644 --- a/phi/tools/github.py +++ b/phi/tools/github.py @@ -24,7 +24,7 @@ def __init__( get_pull_request_changes: bool = True, create_issue: bool = True, create_repository: bool = True, - get_repository_languages: bool = True + get_repository_languages: bool = True, ): super().__init__(name="github") @@ -136,7 +136,7 @@ def create_repository( logger.debug(f"Creating repository: {name}") try: description = description if description is not None else "" - + if organization: logger.debug(f"Creating in organization: {organization}") org = self.g.get_organization(organization) @@ -153,7 +153,7 @@ def create_repository( description=description, auto_init=auto_init, ) - + repo_info = { "name": repo.full_name, "url": repo.html_url, diff --git a/phi/tools/wikipedia.py b/phi/tools/wikipedia.py index 4804005d9d..abe147abbe 100644 --- a/phi/tools/wikipedia.py +++ b/phi/tools/wikipedia.py @@ -47,7 +47,7 @@ def search_wikipedia(self, query: str) -> str: import wikipedia # noqa: F401 except ImportError: raise ImportError( - "The `wikipedia` package is not installed. " "Please install it via `pip install wikipedia`." + "The `wikipedia` package is not installed. Please install it via `pip install wikipedia`." ) logger.info(f"Searching wikipedia for: {query}") diff --git a/phi/vectordb/chroma/chromadb.py b/phi/vectordb/chroma/chromadb.py index 43eb280f8d..16564421b6 100644 --- a/phi/vectordb/chroma/chromadb.py +++ b/phi/vectordb/chroma/chromadb.py @@ -9,7 +9,7 @@ from chromadb.api.types import QueryResult, GetResult except ImportError: - raise ImportError("The `chromadb` package is not installed. " "Please install it via `pip install chromadb`.") + raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.") from phi.document import Document from phi.embedder import Embedder diff --git a/phi/vectordb/milvus/milvus.py b/phi/vectordb/milvus/milvus.py index c2c56697fe..3e95c97ec1 100644 --- a/phi/vectordb/milvus/milvus.py +++ b/phi/vectordb/milvus/milvus.py @@ -4,7 +4,7 @@ try: from pymilvus import MilvusClient # type: ignore except ImportError: - raise ImportError("The `pymilvus` package is not installed. " "Please install it via `pip install pymilvus`.") + raise ImportError("The `pymilvus` package is not installed. Please install it via `pip install pymilvus`.") from phi.document import Document from phi.embedder import Embedder diff --git a/phi/vectordb/qdrant/qdrant.py b/phi/vectordb/qdrant/qdrant.py index c84c458d93..4acbeb067e 100644 --- a/phi/vectordb/qdrant/qdrant.py +++ b/phi/vectordb/qdrant/qdrant.py @@ -6,7 +6,7 @@ from qdrant_client.http import models except ImportError: raise ImportError( - "The `qdrant-client` package is not installed. " "Please install it via `pip install qdrant-client`." + "The `qdrant-client` package is not installed. Please install it via `pip install qdrant-client`." ) from phi.document import Document