Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read tool fix #1236

Merged
merged 9 commits into from
Sep 14, 2023
27 changes: 27 additions & 0 deletions superagi/helper/validate_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import csv
import pandas as pd
import chardet
from superagi.lib.logger import logger

def correct_csv_encoding(file_path):
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
encoding = result['encoding']

if encoding != 'utf-8':
data = []
with open(file_path, 'r', encoding=encoding) as f:
reader = csv.reader(f, delimiter=';', quotechar='"')
for row in reader:
try:
data.append(row)
except Exception as e:
logger.error(f"An error occurred while processing the file: {e}")
continue

df = pd.DataFrame(data)

df.to_csv(file_path, encoding='utf-8', index=False)
logger.info("File is converted to utf-8 encoding.")
else:
logger.info("File is already in utf-8 encoding.")
6 changes: 6 additions & 0 deletions superagi/tools/file/read_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pydantic import BaseModel, Field
from ebooklib import epub
from superagi.helper.validate_csv import correct_csv_encoding

from superagi.helper.resource_helper import ResourceHelper
from superagi.helper.s3_helper import S3Helper
Expand All @@ -17,6 +18,7 @@
from superagi.types.storage_types import StorageType
from superagi.config.config import get_config
from unstructured.partition.auto import partition
from superagi.lib.logger import logger

class ReadFileSchema(BaseModel):
"""Input for CopyFileTool."""
Expand Down Expand Up @@ -89,7 +91,11 @@ def _execute(self, file_name: str):

content = "\n".join(content)
else:
logger.info(final_path)
if final_path.endswith('.csv'):
correct_csv_encoding(final_path)
elements = partition(final_path)
logger.info(elements)
content = "\n\n".join([str(el) for el in elements])

if temporary_file_path is not None:
Expand Down