From 0867dad4d7b66ad098934dd9d39b3cdb261c0c00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Thu, 7 Mar 2024 17:04:44 +0800 Subject: [PATCH] fixbug: gbk UnicodeEncodeError --- metagpt/learn/skill_loader.py | 5 ++--- metagpt/utils/common.py | 16 +++++++++++---- metagpt/utils/dependency_file.py | 7 ++----- metagpt/utils/file_repository.py | 7 ++----- metagpt/utils/mermaid.py | 8 ++------ setup.py | 2 +- .../metagpt/roles/test_tutorial_assistant.py | 7 +++---- tests/metagpt/utils/test_common.py | 20 ++++++++++++++----- tests/metagpt/utils/test_git_repository.py | 5 ++--- tests/metagpt/utils/test_s3.py | 8 ++------ 10 files changed, 43 insertions(+), 42 deletions(-) diff --git a/metagpt/learn/skill_loader.py b/metagpt/learn/skill_loader.py index bcf28bb878..e98f73cf98 100644 --- a/metagpt/learn/skill_loader.py +++ b/metagpt/learn/skill_loader.py @@ -9,11 +9,11 @@ from pathlib import Path from typing import Dict, List, Optional -import aiofiles import yaml from pydantic import BaseModel, Field from metagpt.context import Context +from metagpt.utils.common import aread class Example(BaseModel): @@ -68,8 +68,7 @@ class SkillsDeclaration(BaseModel): async def load(skill_yaml_file_name: Path = None) -> "SkillsDeclaration": if not skill_yaml_file_name: skill_yaml_file_name = Path(__file__).parent.parent.parent / "docs/.well-known/skills.yaml" - async with aiofiles.open(str(skill_yaml_file_name), mode="r") as reader: - data = await reader.read(-1) + data = await aread(filename=skill_yaml_file_name) skill_data = yaml.safe_load(data) return SkillsDeclaration(**skill_data) diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 015902c3d6..aba75fbecf 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -29,6 +29,7 @@ from typing import Any, Callable, List, Tuple, Union import aiofiles +import chardet import loguru import requests from PIL import Image @@ -587,14 +588,21 @@ async def wrapper(self, *args, **kwargs): @handle_exception -async def aread(filename: str | Path, encoding=None) -> str: +async def aread(filename: str | Path, encoding="utf-8") -> str: """Read file asynchronously.""" - async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader: - content = await reader.read() + try: + async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader: + content = await reader.read() + except UnicodeDecodeError: + async with aiofiles.open(str(filename), mode="rb") as reader: + raw = await reader.read() + result = chardet.detect(raw) + detected_encoding = result["encoding"] + content = raw.decode(detected_encoding) return content -async def awrite(filename: str | Path, data: str, encoding=None): +async def awrite(filename: str | Path, data: str, encoding="utf-8"): """Write file asynchronously.""" pathname = Path(filename) pathname.parent.mkdir(parents=True, exist_ok=True) diff --git a/metagpt/utils/dependency_file.py b/metagpt/utils/dependency_file.py index d3add11712..0a375051c2 100644 --- a/metagpt/utils/dependency_file.py +++ b/metagpt/utils/dependency_file.py @@ -13,9 +13,7 @@ from pathlib import Path from typing import Set -import aiofiles - -from metagpt.utils.common import aread +from metagpt.utils.common import aread, awrite from metagpt.utils.exceptions import handle_exception @@ -45,8 +43,7 @@ async def load(self): async def save(self): """Save dependencies to the file asynchronously.""" data = json.dumps(self._dependencies) - async with aiofiles.open(str(self._filename), mode="w") as writer: - await writer.write(data) + await awrite(filename=self._filename, data=data) async def update(self, filename: Path | str, dependencies: Set[Path | str], persist=True): """Update dependencies for a file asynchronously. diff --git a/metagpt/utils/file_repository.py b/metagpt/utils/file_repository.py index d2a06963aa..d19f2b7052 100644 --- a/metagpt/utils/file_repository.py +++ b/metagpt/utils/file_repository.py @@ -14,11 +14,9 @@ from pathlib import Path from typing import Dict, List, Set -import aiofiles - from metagpt.logs import logger from metagpt.schema import Document -from metagpt.utils.common import aread +from metagpt.utils.common import aread, awrite from metagpt.utils.json_to_markdown import json_to_markdown @@ -55,8 +53,7 @@ async def save(self, filename: Path | str, content, dependencies: List[str] = No pathname = self.workdir / filename pathname.parent.mkdir(parents=True, exist_ok=True) content = content if content else "" # avoid `argument must be str, not None` to make it continue - async with aiofiles.open(str(pathname), mode="w") as writer: - await writer.write(content) + await awrite(filename=str(pathname), data=content) logger.info(f"save to: {str(pathname)}") if dependencies is not None: diff --git a/metagpt/utils/mermaid.py b/metagpt/utils/mermaid.py index ae3c5118fe..e1d140e849 100644 --- a/metagpt/utils/mermaid.py +++ b/metagpt/utils/mermaid.py @@ -9,11 +9,9 @@ import os from pathlib import Path -import aiofiles - from metagpt.config2 import config from metagpt.logs import logger -from metagpt.utils.common import check_cmd_exists +from metagpt.utils.common import awrite, check_cmd_exists async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int: @@ -30,9 +28,7 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt if dir_name and not os.path.exists(dir_name): os.makedirs(dir_name) tmp = Path(f"{output_file_without_suffix}.mmd") - async with aiofiles.open(tmp, "w", encoding="utf-8") as f: - await f.write(mermaid_code) - # tmp.write_text(mermaid_code, encoding="utf-8") + await awrite(filename=tmp, data=mermaid_code) if engine == "nodejs": if check_cmd_exists(config.mermaid.path) != 0: diff --git a/setup.py b/setup.py index 072237e8cb..813d2410cf 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ def run(self): setup( name="metagpt", - version="0.7.5", + version="0.7.6", description="The Multi-Agent Framework", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/metagpt/roles/test_tutorial_assistant.py b/tests/metagpt/roles/test_tutorial_assistant.py index c12c2b26ec..732f346fd0 100644 --- a/tests/metagpt/roles/test_tutorial_assistant.py +++ b/tests/metagpt/roles/test_tutorial_assistant.py @@ -6,11 +6,11 @@ @File : test_tutorial_assistant.py """ -import aiofiles import pytest from metagpt.const import TUTORIAL_PATH from metagpt.roles.tutorial_assistant import TutorialAssistant +from metagpt.utils.common import aread @pytest.mark.asyncio @@ -20,9 +20,8 @@ async def test_tutorial_assistant(language: str, topic: str, context): msg = await role.run(topic) assert TUTORIAL_PATH.exists() filename = msg.content - async with aiofiles.open(filename, mode="r", encoding="utf-8") as reader: - content = await reader.read() - assert "pip" in content + content = await aread(filename=filename) + assert "pip" in content if __name__ == "__main__": diff --git a/tests/metagpt/utils/test_common.py b/tests/metagpt/utils/test_common.py index 9b1fa878e1..7c59b80723 100644 --- a/tests/metagpt/utils/test_common.py +++ b/tests/metagpt/utils/test_common.py @@ -13,7 +13,6 @@ from pathlib import Path from typing import Any, Set -import aiofiles import pytest from pydantic import BaseModel @@ -125,9 +124,7 @@ class Input(BaseModel): async def test_parse_data_exception(self, filename, want): pathname = Path(__file__).parent.parent.parent / "data/output_parser" / filename assert pathname.exists() - async with aiofiles.open(str(pathname), mode="r") as reader: - data = await reader.read() - + data = await aread(filename=pathname) result = OutputParser.parse_data(data=data) assert want in result @@ -198,12 +195,25 @@ async def test_read_file_block(self): @pytest.mark.asyncio async def test_read_write(self): - pathname = Path(__file__).parent / uuid.uuid4().hex / "test.tmp" + pathname = Path(__file__).parent / f"../../../workspace/unittest/{uuid.uuid4().hex}" / "test.tmp" await awrite(pathname, "ABC") data = await aread(pathname) assert data == "ABC" pathname.unlink(missing_ok=True) + @pytest.mark.asyncio + async def test_read_write_error_charset(self): + pathname = Path(__file__).parent / f"../../../workspace/unittest/{uuid.uuid4().hex}" / "test.txt" + content = "中国abc123\u27f6" + await awrite(filename=pathname, data=content) + data = await aread(filename=pathname) + assert data == content + + content = "GB18030 是中国国家标准局发布的新一代中文字符集标准,是 GBK 的升级版,支持更广泛的字符范围。" + await awrite(filename=pathname, data=content, encoding="gb2312") + data = await aread(filename=pathname, encoding="utf-8") + assert data == content + if __name__ == "__main__": pytest.main([__file__, "-s"]) diff --git a/tests/metagpt/utils/test_git_repository.py b/tests/metagpt/utils/test_git_repository.py index ea28b8f0b8..480a22e24e 100644 --- a/tests/metagpt/utils/test_git_repository.py +++ b/tests/metagpt/utils/test_git_repository.py @@ -10,15 +10,14 @@ import shutil from pathlib import Path -import aiofiles import pytest +from metagpt.utils.common import awrite from metagpt.utils.git_repository import GitRepository async def mock_file(filename, content=""): - async with aiofiles.open(str(filename), mode="w") as file: - await file.write(content) + await awrite(filename=filename, data=content) async def mock_repo(local_path) -> (GitRepository, Path): diff --git a/tests/metagpt/utils/test_s3.py b/tests/metagpt/utils/test_s3.py index b26ebe94d9..c1a85f4fff 100644 --- a/tests/metagpt/utils/test_s3.py +++ b/tests/metagpt/utils/test_s3.py @@ -9,7 +9,6 @@ from pathlib import Path import aioboto3 -import aiofiles import pytest from metagpt.config2 import Config @@ -37,7 +36,7 @@ async def test_s3(mocker): conn = S3(s3) object_name = "unittest.bak" await conn.upload_file(bucket=s3.bucket, local_path=__file__, object_name=object_name) - pathname = (Path(__file__).parent / uuid.uuid4().hex).with_suffix(".bak") + pathname = (Path(__file__).parent / "../../../workspace/unittest" / uuid.uuid4().hex).with_suffix(".bak") pathname.unlink(missing_ok=True) await conn.download_file(bucket=s3.bucket, object_name=object_name, local_path=str(pathname)) assert pathname.exists() @@ -45,8 +44,7 @@ async def test_s3(mocker): assert url bin_data = await conn.get_object(bucket=s3.bucket, object_name=object_name) assert bin_data - async with aiofiles.open(__file__, mode="r", encoding="utf-8") as reader: - data = await reader.read() + data = await aread(filename=__file__) res = await conn.cache(data, ".bak", "script") assert "http" in res @@ -60,8 +58,6 @@ async def test_s3(mocker): except Exception: pass - await reader.close() - if __name__ == "__main__": pytest.main([__file__, "-s"])