Skip to content

Commit

Permalink
Merge pull request #30 from K3A-Team/feat/aziz/folder-upload
Browse files Browse the repository at this point in the history
added folder upload and fixed ai_description problem
  • Loading branch information
4zz0u4k authored Sep 17, 2024
2 parents 870b91f + bb6b216 commit b87cd6d
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 16 deletions.
14 changes: 13 additions & 1 deletion code/Routers/foldersRouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from Core.Shared.ErrorResponses import *
from fastapi import UploadFile
from fastapi import File
from typing import List
from dotenv import load_dotenv
from Models.Requests.FolderRequestsModels import CreateFolderRequest
from handlers.storageHandlers.foldersHandlers import createFolderHandler , getFolderHandler ,deleteFolderHandler , restoreFolderHandler , restoreFileHandler
from handlers.storageHandlers.foldersHandlers import createFolderHandler , getFolderHandler ,deleteFolderHandler , restoreFolderHandler , restoreFileHandler , uploadFolderHandler
from handlers.storageHandlers.filesHandlers import createFileHandler , deleteFileHandler

load_dotenv()
Expand Down Expand Up @@ -82,3 +83,14 @@ async def createFile(
except Exception as e:
return {"success": False, "message": str(e)}

@foldersRouter.post("/{folderId}/uploadFolder",status_code=status.HTTP_201_CREATED)
async def uploadFolder(folderId : str,
files: List[UploadFile] = File(...),
userID: str = Depends(LoginProtected)):
try:

result = await uploadFolderHandler(userID,files,folderId)
return {"success": True, "folder": result}

except Exception as e:
return {"success": False, "message": str(e)}
16 changes: 11 additions & 5 deletions code/handlers/storageHandlers/filesHandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import datetime
from Core.Shared.Database import db

async def createFileHandler(userID:str, folderId: str , file: UploadFile = File(...), force: bool | None = None):
async def createFileHandler(userID:str, folderId: str , file: UploadFile = File(...), force: bool | None = None, dir_name : str | None = None , valid_dir_name : bool = False):
"""
Creates a file in the specified folder and stores it in Firebase Storage.
Args:
Expand Down Expand Up @@ -59,7 +59,10 @@ async def createFileHandler(userID:str, folderId: str , file: UploadFile = File(



name, ext = os.path.splitext(file.filename)
if (not valid_dir_name):
name, ext = os.path.splitext(file.filename)
else:
name, ext = os.path.splitext(dir_name)
saved_name = name

duplicate_check = await is_file_duplicate(file_hash, folderId)
Expand All @@ -78,7 +81,10 @@ async def createFileHandler(userID:str, folderId: str , file: UploadFile = File(
except ValueError:
pass
else:
name = os.path.splitext(file.filename)[0]
if (not valid_dir_name):
name = os.path.splitext(file.filename)[0]
else:
name = os.path.splitext(dir_name)[0]
else:
raise HTTPException(status_code=400, detail="No last duplicate found")

Expand Down Expand Up @@ -118,8 +124,8 @@ async def createFileHandler(userID:str, folderId: str , file: UploadFile = File(
ai_description=ai_description
)

#ai_generated_tags,ai_generated_description = await process_and_upsert_service(file=file,name=name,file_id=fileObj.id,url=url,userID=userID,saved_name=saved_name)
ai_generated_tags,ai_generated_description = ['example-tag'] , 'example-description'
ai_generated_tags,ai_generated_description = await process_and_upsert_service(file=file,name=name,file_id=fileObj.id,url=url,userID=userID,saved_name=saved_name)
# ai_generated_tags,ai_generated_description = ['example-tag'] , 'example-description'

fileObj.tags.extend(ai_generated_tags)
fileObj.ai_description = ai_generated_description
Expand Down
28 changes: 28 additions & 0 deletions code/handlers/storageHandlers/foldersHandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from Core.Shared.Utils import *
from Core.Shared.ErrorResponses import *
from Models.Entities.Folder import Folder
from typing import List
from handlers.storageHandlers.filesHandlers import createFileHandler
import datetime
from Core.Shared.Database import db

Expand Down Expand Up @@ -222,3 +224,29 @@ async def restoreFileHandler(userID:str,fileId: str):
file = await Database.edit("files", file["id"], file)

return folder

async def uploadFolderHandler(userID : str,files : List[UploadFile],folderId : str):
folder_dict = {}
parent_created = False
for file in files:
# Extract needed informations
path_array = file.filename.split('/')
file_name = path_array[-1]
file_folder = path_array[-2]
if (len(path_array) > 2):
file_folder_parent_id = folder_dict[path_array[-3]]
else:
file_folder_parent_id = folderId
# Get file folder or create if it doesn't exist
if file_folder in folder_dict :
file_folder_id = folder_dict[file_folder]
else:
folderDict = await createFolderHandler(userID=userID, folderName=file_folder, parentFolderID=file_folder_parent_id)
file_folder_id = folderDict['id']
folder_dict[file_folder] = file_folder_id
if not parent_created :
created_folder_id = folderDict['id']
parent_created = True
# Create the file
fileDict = await createFileHandler(userID=userID,folderId=file_folder_id, file=file, force=True,dir_name=file_name,valid_dir_name=True)
return await Database.getFodlerFormatted(created_folder_id)
20 changes: 10 additions & 10 deletions code/services/upsertService.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
#-----------------------------------------------------------------------------------------------------------------------

# Reading style sheet values as rows (CSV,XLSX)
async def read_style_sheet(file: UploadFile):
async def read_style_sheet(file: UploadFile,name : str):
"""
Read and parse a style sheet file (CSV or XLSX) and return its content as rows.
Expand All @@ -75,13 +75,13 @@ async def read_style_sheet(file: UploadFile):
Raises:
ValueError: If the file format is not supported.
"""
match os.path.splitext(file.filename)[1].lower():
match os.path.splitext(name)[1].lower():
case '.csv':
df = pd.read_csv(file.file)
case '.xlsx':
df = pd.read_excel(file.file)
case _ :
ValueError(f"Unsupported file format for formating : {os.path.splitext(file.filename)[1].lower()}")
ValueError(f"Unsupported file format for formating : {os.path.splitext(name)[1].lower()}")
loader = DataFrameLoader(data_frame=df,page_content_column=df.columns[0])
rows = loader.load()
return [rows,df.columns[0]]
Expand All @@ -108,7 +108,7 @@ def split_rows(rows,page_content):
return chunks

# Reading file values as text (TXT,PDF)
async def read_text(file: UploadFile,url):
async def read_text(file: UploadFile,url,name : str):
"""
Split rows into chunks and combine metadata with content.
Expand All @@ -124,7 +124,7 @@ async def read_text(file: UploadFile,url):
list: A list of stringified chunks, where each chunk is a list of combined metadata and content.
"""
text = ''
match os.path.splitext(file.filename)[1].lower():
match os.path.splitext(name)[1].lower():
case '.pdf':
loader = PyPDFLoader(url)
pages = loader.load()
Expand All @@ -142,7 +142,7 @@ async def read_text(file: UploadFile,url):
content = await file.read()
return content.decode('utf-8')
case _ :
ValueError(f"Unsupported file format for formating : {os.path.splitext(file.filename)[1].lower()}")
ValueError(f"Unsupported file format for formating : {os.path.splitext(name)[1].lower()}")

# Spliting text
def split_text(text):
Expand Down Expand Up @@ -295,20 +295,20 @@ async def process_and_upsert_service(file,name,file_id,url,userID,saved_name):
upsert_name_to_pinecone(name,file_id,userID)

# Upserting the file's content
file_ext = os.path.splitext(file.filename)[1].lower()
file_ext = os.path.splitext(name)[1]
if (file_ext not in SUPPORTED_EXTENSIONS):
# No upserting (for the moment)
return []
return [] , ''
if(file_ext == '.csv' or file_ext == '.xlsx' ):
# Upserting rows
rows,page_content = await read_style_sheet(file)
rows,page_content = await read_style_sheet(file,name)
chunks = split_rows(rows,page_content=page_content)
upsert_content_to_pinecone(chunks,name,file_id,50,userID)
# Generating tags and description
extracted_content = ' '.join(chunks[:min(TAGS_GENERATION_CHUNKS, len(chunks))])
else:
# Upserting text
text = await read_text(file,url)
text = await read_text(file,url,name)
chunks = split_text(text)
upsert_content_to_pinecone(chunks,name,file_id,100,userID)
# Generating tags and description
Expand Down

0 comments on commit b87cd6d

Please sign in to comment.