Skip to content

Commit

Permalink
Redirect client to download media directly from Amazon S3 / Azure Sto…
Browse files Browse the repository at this point in the history
…rage (#2713)

* download EntityList directly from S3

* fix failing test

* fix failing test

* use text/csv mimetype for EntityList CSV download

* refactor code

* add test

* fix failing test

* add docstring

* download export directly from s3

* remove pdb break

* fix failing tests

* enhance response_with_mimetype_and_name to redirect to s3

* revert changes

* fix lint error logging-fstring-interpolation

* fix failing tests

* fix failing tests

* fix failing tests

* fix failing tests

* enable commented code

* update tests

* add comment
  • Loading branch information
kelvin-muchiri authored Oct 14, 2024
1 parent 19a52eb commit d17dd40
Show file tree
Hide file tree
Showing 16 changed files with 331 additions and 272 deletions.
8 changes: 4 additions & 4 deletions onadata/apps/api/tests/viewsets/test_dataview_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ def test_csv_export_dataview(self):
content_disposition = headers["Content-Disposition"]
filename = filename_from_disposition(content_disposition)
_basename, ext = os.path.splitext(filename)
self.assertEqual(ext, ".csv")
self.assertEqual(ext, '.csv"')

content = get_response_content(response)
test_file_path = os.path.join(
Expand Down Expand Up @@ -838,7 +838,7 @@ def test_zip_export_dataview(self):
content_disposition = headers["Content-Disposition"]
filename = filename_from_disposition(content_disposition)
_basename, ext = os.path.splitext(filename)
self.assertEqual(ext, ".zip")
self.assertEqual(ext, '.zip"')

# pylint: disable=invalid-name
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
Expand Down Expand Up @@ -1574,7 +1574,7 @@ def test_export_dataview_not_affected_by_normal_exports(self):
content_disposition = headers["Content-Disposition"]
filename = filename_from_disposition(content_disposition)
_basename, ext = os.path.splitext(filename)
self.assertEqual(ext, ".csv")
self.assertEqual(ext, '.csv"')

content = get_response_content(response)

Expand Down Expand Up @@ -1957,7 +1957,7 @@ def test_csv_export_dataview_date_filter(self):
content_disposition = headers["Content-Disposition"]
filename = filename_from_disposition(content_disposition)
_basename, ext = os.path.splitext(filename)
self.assertEqual(ext, ".csv")
self.assertEqual(ext, '.csv"')

content = get_response_content(response)
self.assertEqual(content, "name,age,gender\nDennis Wambua,28,male\n")
Expand Down
46 changes: 40 additions & 6 deletions onadata/apps/api/tests/viewsets/test_entity_list_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import uuid
from datetime import datetime, timezone as dtz
from unittest.mock import patch
from unittest.mock import patch, MagicMock

from django.core.cache import cache
from django.test import override_settings
Expand All @@ -18,6 +18,7 @@
from onadata.libs.models.share_project import ShareProject
from onadata.libs.pagination import StandardPageNumberPagination
from onadata.libs.permissions import ROLES, OwnerRole
from onadata.apps.viewer.models.export import GenericExport
from onadata.libs.utils.user_auth import get_user_default_project


Expand Down Expand Up @@ -542,15 +543,15 @@ def test_render_csv(self):
response = self.view(request, pk=self.entity_list.pk, format="csv")
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.get("Content-Disposition"), "attachment; filename=trees.csv"
response.get("Content-Disposition"), 'attachment; filename="trees.csv"'
)
self.assertEqual(response["Content-Type"], "application/csv")
# Using `Accept` header
request = self.factory.get("/", HTTP_ACCEPT="text/csv", **self.extra)
response = self.view(request, pk=self.entity_list.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.get("Content-Disposition"), "attachment; filename=trees.csv"
response.get("Content-Disposition"), 'attachment; filename="trees.csv"'
)
self.assertEqual(response["Content-Type"], "application/csv")

Expand Down Expand Up @@ -1709,23 +1710,23 @@ def test_download(self):
response = self.view(request, pk=self.entity_list.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(
response["Content-Disposition"], "attachment; filename=trees.csv"
response["Content-Disposition"], 'attachment; filename="trees.csv"'
)
self.assertEqual(response["Content-Type"], "application/csv")
# Using `.csv` suffix
request = self.factory.get("/", **self.extra)
response = self.view(request, pk=self.entity_list.pk, format="csv")
self.assertEqual(response.status_code, 200)
self.assertEqual(
response["Content-Disposition"], "attachment; filename=trees.csv"
response["Content-Disposition"], 'attachment; filename="trees.csv"'
)
self.assertEqual(response["Content-Type"], "application/csv")
# Using `Accept` header
request = self.factory.get("/", HTTP_ACCEPT="text/csv", **self.extra)
response = self.view(request, pk=self.entity_list.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.get("Content-Disposition"), "attachment; filename=trees.csv"
response.get("Content-Disposition"), 'attachment; filename="trees.csv"'
)
self.assertEqual(response["Content-Type"], "application/csv")
# Unsupported suffix
Expand Down Expand Up @@ -1786,3 +1787,36 @@ def test_soft_deleted(self):
request = self.factory.get("/", **self.extra)
response = self.view(request, pk=self.entity_list.pk)
self.assertEqual(response.status_code, 404)

@patch("onadata.libs.utils.logger_tools.get_storage_class")
@patch("onadata.libs.utils.logger_tools.boto3.client")
def test_download_from_s3(self, mock_presigned_urls, mock_get_storage_class):
"""EntityList dataset is downloaded from Amazon S3"""
expected_url = (
"https://testing.s3.amazonaws.com/bob/exports/"
"trees/csv/trees_2024_06_21_07_47_24_026998.csv?"
"response-content-disposition=attachment%3Bfilename%trees.csv&"
"response-content-type=application%2Foctet-stream&"
"AWSAccessKeyId=AKIAJ3XYHHBIJDL7GY7A"
"&Signature=aGhiK%2BLFVeWm%2Fmg3S5zc05g8%3D&Expires=1615554960"
)
mock_presigned_urls().generate_presigned_url = MagicMock(
return_value=expected_url
)
mock_get_storage_class()().bucket.name = "onadata"
request = self.factory.get("/", **self.extra)
response = self.view(request, pk=self.entity_list.pk)
self.assertEqual(response.status_code, 302)
self.assertEqual(response.url, expected_url)
self.assertTrue(mock_presigned_urls.called)
export = GenericExport.objects.first()
mock_presigned_urls().generate_presigned_url.assert_called_with(
"get_object",
Params={
"Bucket": "onadata",
"Key": export.filepath,
"ResponseContentDisposition": 'attachment; filename="trees.csv"',
"ResponseContentType": "application/octet-stream",
},
ExpiresIn=3600,
)
117 changes: 60 additions & 57 deletions onadata/apps/api/tests/viewsets/test_export_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import os
from tempfile import NamedTemporaryFile
from unittest.mock import MagicMock, patch

from django.conf import settings
from django.utils.dateparse import parse_datetime
Expand Down Expand Up @@ -32,24 +33,32 @@ def setUp(self):
self.formats = ["csv", "csvzip", "kml", "osm", "savzip", "xls", "xlsx", "zip"]
self.view = ExportViewSet.as_view({"get": "retrieve"})

def _create_export(self):
# Create a temporary file in the 'exports' directory and
# prevent it from being deleted automatically
temp_dir = os.path.join(settings.MEDIA_ROOT, "exports")
os.makedirs(temp_dir, exist_ok=True)
dummy_export_file = NamedTemporaryFile(
suffix=".xlsx", dir=temp_dir, delete=False
)
dummy_export_file.close() # Explicitly close the file
filename = os.path.basename(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir="exports"
)
return export

def test_export_response(self):
"""
Test ExportViewSet retrieve has the correct headers in response.
"""
self._create_user_and_login()
self._publish_transportation_form()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
export = self._create_export()
request = self.factory.get("/export")
force_authenticate(request, user=self.user)
response = self.view(request, pk=export.pk)
self.assertIn(filename, response.get("Content-Disposition"))
self.assertIn(export.filename, response.get("Content-Disposition"))

def test_export_formats_present(self):
"""
Expand Down Expand Up @@ -92,14 +101,7 @@ def test_export_list_public(self):
self._publish_transportation_form()
self.xform.shared_data = True
self.xform.save()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
self._create_export()
view = ExportViewSet.as_view({"get": "list"})

# Should be empty list when no xform filter is provided
Expand All @@ -123,14 +125,7 @@ def test_export_list_public_form(self):
self._publish_transportation_form()
self.xform.shared_data = True
self.xform.save()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
self._create_export()
view = ExportViewSet.as_view({"get": "list"})
request = self.factory.get("/export", {"xform": self.xform.pk})
force_authenticate(request, user=user_mosh)
Expand Down Expand Up @@ -220,14 +215,7 @@ def test_export_list_on_user(self):
"""
self._create_user_and_login()
self._publish_transportation_form()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
exports = [
Export.objects.create(xform=self.xform, filename=filename, filedir=filedir)
]
exports[0].save()
exports = [self._create_export()]
view = ExportViewSet.as_view({"get": "list"})
request = self.factory.get("/export", data={"xform": self.xform.id})
force_authenticate(request, user=self.user)
Expand All @@ -242,14 +230,7 @@ def test_export_list_on_with_different_users(self):
"""
self._create_user_and_login()
self._publish_transportation_form()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
self._create_export()
view = ExportViewSet.as_view({"get": "list"})
request = self.factory.get("/export", data={"xform": self.xform.id})
self._create_user_and_login(username="mary", password="password1")
Expand Down Expand Up @@ -546,14 +527,7 @@ def test_export_retrieval_authentication(self):
"""
self._create_user_and_login()
self._publish_transportation_form()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
export = self._create_export()
extra = {"HTTP_AUTHORIZATION": f"Token {self.user.auth_token.key}"}

request = self.factory.get("/export", **extra)
Expand Down Expand Up @@ -589,14 +563,7 @@ def test_export_failure_reason_returned(self):
def test_export_are_downloadable_to_all_users_when_public_form(self):
self._create_user_and_login()
self._publish_transportation_form()
temp_dir = settings.MEDIA_ROOT
dummy_export_file = NamedTemporaryFile(suffix=".xlsx", dir=temp_dir)
filename = os.path.basename(dummy_export_file.name)
filedir = os.path.dirname(dummy_export_file.name)
export = Export.objects.create(
xform=self.xform, filename=filename, filedir=filedir
)
export.save()
export = self._create_export()

user_alice = self._create_user("alice", "alice")
# create user profile and set require_auth to false for tests
Expand All @@ -622,3 +589,39 @@ def test_export_are_downloadable_to_all_users_when_public_form(self):
request = self.factory.get("/export", **alices_extra)
response = self.view(request, pk=export.pk)
self.assertEqual(response.status_code, 200)

@patch("onadata.libs.utils.logger_tools.get_storage_class")
@patch("onadata.libs.utils.logger_tools.boto3.client")
def test_download_from_s3(self, mock_presigned_urls, mock_get_storage_class):
"""Export is downloaded from Amazon S3"""
expected_url = (
"https://testing.s3.amazonaws.com/bob/exports/"
"trees/csv/trees_2024_06_21_07_47_24_026998.csv?"
"response-content-disposition=attachment%3Bfilename%trees.csv&"
"response-content-type=application%2Foctet-stream&"
"AWSAccessKeyId=AKIAJ3XYHHBIJDL7GY7A"
"&Signature=aGhiK%2BLFVeWm%2Fmg3S5zc05g8%3D&Expires=1615554960"
)
mock_presigned_urls().generate_presigned_url = MagicMock(
return_value=expected_url
)
mock_get_storage_class()().bucket.name = "onadata"
self._create_user_and_login()
self._publish_transportation_form()
export = self._create_export()
request = self.factory.get("/export")
force_authenticate(request, user=self.user)
response = self.view(request, pk=export.pk)
self.assertEqual(response.status_code, 302)
self.assertEqual(response.url, expected_url)
self.assertTrue(mock_presigned_urls.called)
mock_presigned_urls().generate_presigned_url.assert_called_with(
"get_object",
Params={
"Bucket": "onadata",
"Key": export.filepath,
"ResponseContentDisposition": f'attachment; filename="{export.filename}"',
"ResponseContentType": "application/octet-stream",
},
ExpiresIn=3600,
)
Loading

0 comments on commit d17dd40

Please sign in to comment.