Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple file upload #917

Merged
merged 1 commit into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import uuid
import shutil
from flask import Blueprint, request, jsonify
from urllib.parse import urlparse
import requests
Expand Down Expand Up @@ -136,30 +137,43 @@
return {"status": "no name"}
job_name = secure_filename(request.form["name"])
# check if the post request has the file part
if "file" not in request.files:
print("No file part")
return {"status": "no file"}
file = request.files["file"]
if file.filename == "":
files = request.files.getlist("file")

Check warning on line 140 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L140

Added line #L140 was not covered by tests

if not files or all(file.filename == '' for file in files):

Check warning on line 142 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L142

Added line #L142 was not covered by tests
return {"status": "no file name"}

if file:
filename = secure_filename(file.filename)
# save dir
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
# create dir if not exists
if not os.path.exists(save_dir):
os.makedirs(save_dir)

file.save(os.path.join(save_dir, filename))
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
".csv", ".epub", ".html", ".mdx"],
job_name, filename, user)
# task id
task_id = task.id
return {"status": "ok", "task_id": task_id}
# Directory where files will be saved
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
os.makedirs(save_dir, exist_ok=True)

Check warning on line 147 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L146-L147

Added lines #L146 - L147 were not covered by tests

if len(files) > 1:

Check warning on line 149 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L149

Added line #L149 was not covered by tests
# Multiple files; prepare them for zip
temp_dir = os.path.join(save_dir, "temp")
os.makedirs(temp_dir, exist_ok=True)

Check warning on line 152 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L151-L152

Added lines #L151 - L152 were not covered by tests

for file in files:
filename = secure_filename(file.filename)
file.save(os.path.join(temp_dir, filename))

Check warning on line 156 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L154-L156

Added lines #L154 - L156 were not covered by tests

# Use shutil.make_archive to zip the temp directory
zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format='zip', root_dir=temp_dir)
final_filename = os.path.basename(zip_path)

Check warning on line 160 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L159-L160

Added lines #L159 - L160 were not covered by tests

# Clean up the temporary directory after zipping
shutil.rmtree(temp_dir)

Check warning on line 163 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L163

Added line #L163 was not covered by tests
else:
return {"status": "error"}
# Single file
file = files[0]
final_filename = secure_filename(file.filename)
file_path = os.path.join(save_dir, final_filename)
file.save(file_path)

Check warning on line 169 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L166-L169

Added lines #L166 - L169 were not covered by tests

# Call ingest with the single file or zipped file
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",

Check warning on line 172 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L172

Added line #L172 was not covered by tests
".csv", ".epub", ".html", ".mdx"],
job_name, final_filename, user)

return {"status": "ok", "task_id": task.id}

Check warning on line 176 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L176

Added line #L176 was not covered by tests

@user.route("/api/remote", methods=["POST"])
def upload_remote():
Expand Down
36 changes: 31 additions & 5 deletions application/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,32 @@
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)

def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
"""
Recursively extract zip files with a limit on recursion depth.

Args:
zip_path (str): Path to the zip file to be extracted.
extract_to (str): Destination path for extracted files.
current_depth (int): Current depth of recursion.
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
"""
if current_depth > max_depth:
print(f"Reached maximum recursion depth of {max_depth}")
return

Check warning on line 51 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L49-L51

Added lines #L49 - L51 were not covered by tests

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
os.remove(zip_path) # Remove the zip file after extracting

Check warning on line 55 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L53-L55

Added lines #L53 - L55 were not covered by tests

# Check for nested zip files and extract them
for root, dirs, files in os.walk(extract_to):
for file in files:
if file.endswith(".zip"):

Check warning on line 60 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L58-L60

Added lines #L58 - L60 were not covered by tests
# If a nested zip file is found, extract it recursively
file_path = os.path.join(root, file)
extract_zip_recursive(file_path, root, current_depth + 1, max_depth)

Check warning on line 63 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L62-L63

Added lines #L62 - L63 were not covered by tests


# Define the main function for ingesting and processing documents.
def ingest_worker(self, directory, formats, name_job, filename, user):
Expand Down Expand Up @@ -66,9 +92,11 @@
token_check = True
min_tokens = 150
max_tokens = 1250
full_path = directory + "/" + user + "/" + name_job
recursion_depth = 2
full_path = os.path.join(directory, user, name_job)

Check warning on line 96 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L95-L96

Added lines #L95 - L96 were not covered by tests
import sys


print(full_path, file=sys.stderr)
# check if API_URL env variable is set
file_data = {"name": name_job, "file": filename, "user": user}
Expand All @@ -81,14 +109,12 @@

if not os.path.exists(full_path):
os.makedirs(full_path)
with open(full_path + "/" + filename, "wb") as f:
with open(os.path.join(full_path, filename), "wb") as f:

Check warning on line 112 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L112

Added line #L112 was not covered by tests
f.write(file)

# check if file is .zip and extract it
if filename.endswith(".zip"):
with zipfile.ZipFile(full_path + "/" + filename, "r") as zip_ref:
zip_ref.extractall(full_path)
os.remove(full_path + "/" + filename)
extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)

Check warning on line 117 in application/worker.py

View check run for this annotation

Codecov / codecov/patch

application/worker.py#L117

Added line #L117 was not covered by tests

self.update_state(state="PROGRESS", meta={"current": 1})

Expand Down
2 changes: 1 addition & 1 deletion frontend/src/upload/Upload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ export default function Upload({

const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
multiple: false,
multiple: true,
onDragEnter: doNothing,
onDragOver: doNothing,
onDragLeave: doNothing,
Expand Down
Loading