From 61542e86dbf3362ac6578ca82a86eb8fedfc9569 Mon Sep 17 00:00:00 2001 From: ia_fin Date: Sat, 29 Jun 2024 10:05:34 +0800 Subject: [PATCH 1/2] Create txt_reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 读取txt文件,可按行生成document --- .../agent/action/knowledge/reader/txt_reader | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 agentuniverse/agent/action/knowledge/reader/txt_reader diff --git a/agentuniverse/agent/action/knowledge/reader/txt_reader b/agentuniverse/agent/action/knowledge/reader/txt_reader new file mode 100644 index 00000000..709e9466 --- /dev/null +++ b/agentuniverse/agent/action/knowledge/reader/txt_reader @@ -0,0 +1,40 @@ + +from pathlib import Path +from typing import List, Optional, Dict + +from agentuniverse.agent.action.knowledge.reader.reader import Reader +from agentuniverse.agent.action.knowledge.store.document import Document + + +class LineTxtReader(Reader): + + + def load_data(self, fpath: Path, ext_info: Optional[Dict] = None) -> List[Document]: + dlist = [] + + with open(fpath, 'r', encoding='utf-8') as file: + + metadata = {"file_name": file.name} + if ext_info is not None: + metadata.update(ext_info) + + for line in file: + dlist.append(Document(text=line, metadata=metadata or {})) + + return dlist + + +class TxtReader(Reader): + """Txt reader.""" + + def load_data(self, fpath: Path, ext_info: Optional[Dict] = None) -> List[Document]: + + with open(fpath, 'r', encoding='utf-8') as file: + + metadata = {"file_name": file.name} + if ext_info is not None: + metadata.update(ext_info) + + txt = file.read() + + return [Document(text=txt, metadata=metadata or {})] From d5132818e36b9ec492d430d86a47099465951de4 Mon Sep 17 00:00:00 2001 From: Jerry Z H Date: Thu, 11 Jul 2024 15:46:04 +0800 Subject: [PATCH 2/2] refactor: update text_reader --- .../action/knowledge/reader/{txt_reader => file/txt_reader.py} | 1 - 1 file changed, 1 deletion(-) rename agentuniverse/agent/action/knowledge/reader/{txt_reader => file/txt_reader.py} (99%) diff --git a/agentuniverse/agent/action/knowledge/reader/txt_reader b/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py similarity index 99% rename from agentuniverse/agent/action/knowledge/reader/txt_reader rename to agentuniverse/agent/action/knowledge/reader/file/txt_reader.py index 709e9466..bd285899 100644 --- a/agentuniverse/agent/action/knowledge/reader/txt_reader +++ b/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py @@ -8,7 +8,6 @@ class LineTxtReader(Reader): - def load_data(self, fpath: Path, ext_info: Optional[Dict] = None) -> List[Document]: dlist = []