From 5d8772d3e89a7d01417aa7904451bf3f6032af96 Mon Sep 17 00:00:00 2001
From: Rounak Bhatia <rounak@contlo.com>
Date: Thu, 14 Sep 2023 14:27:17 +0530
Subject: [PATCH] read_tool_fix

---
 superagi/helper/validate_csv.py  | 27 +++++++++++++++++++++++++++
 superagi/tools/file/read_file.py |  6 ++++++
 2 files changed, 33 insertions(+)
 create mode 100644 superagi/helper/validate_csv.py

diff --git a/superagi/helper/validate_csv.py b/superagi/helper/validate_csv.py
new file mode 100644
index 000000000..5e1c6fd46
--- /dev/null
+++ b/superagi/helper/validate_csv.py
@@ -0,0 +1,27 @@
+import csv
+import pandas as pd
+import chardet
+from superagi.lib.logger import logger
+
+def correct_csv_encoding(file_path):
+    with open(file_path, 'rb') as f:
+        result = chardet.detect(f.read())
+        encoding = result['encoding']
+
+    if encoding != 'utf-8':
+        data = []
+        with open(file_path, 'r', encoding=encoding) as f:
+            reader = csv.reader(f, delimiter=';', quotechar='"')
+            for row in reader:
+                try:
+                    data.append(row)
+                except Exception as e:
+                    logger.error(f"An error occurred while processing the file: {e}")
+                    continue
+
+        df = pd.DataFrame(data)
+        
+        df.to_csv(file_path, encoding='utf-8', index=False)
+        logger.info("File is converted to utf-8 encoding.")
+    else:
+        logger.info("File is already in utf-8 encoding.")
\ No newline at end of file
diff --git a/superagi/tools/file/read_file.py b/superagi/tools/file/read_file.py
index 7c4177438..c984a7637 100644
--- a/superagi/tools/file/read_file.py
+++ b/superagi/tools/file/read_file.py
@@ -7,6 +7,7 @@
 
 from pydantic import BaseModel, Field
 from ebooklib import epub
+from superagi.helper.validate_csv import correct_csv_encoding
 
 from superagi.helper.resource_helper import ResourceHelper
 from superagi.helper.s3_helper import S3Helper
@@ -17,6 +18,7 @@
 from superagi.types.storage_types import StorageType
 from superagi.config.config import get_config
 from unstructured.partition.auto import partition
+from superagi.lib.logger import logger
 
 class ReadFileSchema(BaseModel):
     """Input for CopyFileTool."""
@@ -89,7 +91,11 @@ def _execute(self, file_name: str):
 
             content = "\n".join(content)
         else:
+            logger.info(final_path)
+            if final_path.endswith('.csv'):
+                correct_csv_encoding(final_path)
             elements = partition(final_path)
+            logger.info(elements)
             content = "\n\n".join([str(el) for el in elements])
 
         if temporary_file_path is not None: