From e29ae0d99647bec66e24aefa07b4e6d45d79920f Mon Sep 17 00:00:00 2001 From: Manthan Gupta Date: Wed, 11 Dec 2024 18:33:49 +0530 Subject: [PATCH] Feat: CSV Url Knowledgebase --- phi/document/reader/csv_reader.py | 18 ++++++++++++++++++ phi/knowledge/csv.py | 12 +++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/phi/document/reader/csv_reader.py b/phi/document/reader/csv_reader.py index 1b6516c127..7a61c11328 100644 --- a/phi/document/reader/csv_reader.py +++ b/phi/document/reader/csv_reader.py @@ -48,3 +48,21 @@ def read(self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str except Exception as e: logger.error(f"Error reading: {file.name if isinstance(file, IO) else file}: {e}") return [] + + +class CSVUrlReader(Reader): + """Reader for CSV files""" + + def read(self, url: str) -> List[Document]: + if not url: + raise ValueError("No URL provided") + + try: + import httpx + except ImportError: + raise ImportError("`httpx` not installed") + + logger.info(f"Reading: {url}") + response = httpx.get(url) + + return CSVReader().read(file=io.StringIO(response.text)) diff --git a/phi/knowledge/csv.py b/phi/knowledge/csv.py index bade2649dc..f9327b8de9 100644 --- a/phi/knowledge/csv.py +++ b/phi/knowledge/csv.py @@ -2,7 +2,7 @@ from typing import Union, List, Iterator from phi.document import Document -from phi.document.reader.csv_reader import CSVReader +from phi.document.reader.csv_reader import CSVReader, CSVUrlReader from phi.knowledge.agent import AgentKnowledge @@ -26,3 +26,13 @@ def document_lists(self) -> Iterator[List[Document]]: yield self.reader.read(file=_csv) elif _csv_path.exists() and _csv_path.is_file() and _csv_path.suffix == ".csv": yield self.reader.read(file=_csv_path) + + +class CSVUrlKnowledgeBase(AgentKnowledge): + url: List[str] + reader: CSVUrlReader = CSVUrlReader() + + @property + def document_lists(self) -> Iterator[List[Document]]: + for url in self.url: + yield self.reader.read(url=url)