Skip to content

Commit

Permalink
Return 404s for any files not found in the zip, and have BadZipfile e…
Browse files Browse the repository at this point in the history
…rrors report the name, size, mode and download state to help us narrow down problems with opening. Also, tests. (#942)
  • Loading branch information
kollivier authored Sep 7, 2018
1 parent 764b50e commit 50943b2
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 51 deletions.
22 changes: 22 additions & 0 deletions contentcuration/contentcuration/tests/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import datetime

from django.conf import settings
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.management import call_command
from django.core.urlresolvers import reverse_lazy
from django.test import TestCase

from contentcuration.models import User
from contentcuration.utils import minio_utils
from contentcuration.utils.policies import get_latest_policies
from rest_framework.authtoken.models import Token
Expand Down Expand Up @@ -42,6 +45,8 @@ class StudioTestCase(TestCase, BucketTestMixin):
def setUpClass(cls):
super(StudioTestCase, cls).setUpClass()
call_command('loadconstants')
cls.url = "http://127.0.0.1:8000"
cls.admin_user = User.objects.create_superuser('big_shot', '[email protected]', 'password')

def setUp(self):
if not self.persist_bucket:
Expand All @@ -51,6 +56,22 @@ def tearDown(self):
if not self.persist_bucket:
self.delete_bucket()

def admin_client(self):
client = APIClient()
client.force_authenticate(self.admin_user)
return client

def upload_temp_file(self, data, ext='.txt'):
"""
Uploads a file to the server using an authorized client.
"""
fileobj_temp = testdata.create_temp_file(data, ext=ext)
name = fileobj_temp['name']

f = SimpleUploadedFile(name, data)
file_upload_url = self.url + str(reverse_lazy('api_file_upload'))
return fileobj_temp, self.admin_client().post(file_upload_url, {"file": f})


class StudioAPITestCase(APITestCase, BucketTestMixin):

Expand All @@ -67,6 +88,7 @@ def tearDown(self):
if not self.persist_bucket:
self.delete_bucket()


class BaseTestCase(StudioTestCase):
def setUp(self):
super(BaseTestCase, self).setUp()
Expand Down
25 changes: 3 additions & 22 deletions contentcuration/contentcuration/tests/test_createchannel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
from mixer.backend.django import mixer
from contentcuration import models

from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.urlresolvers import reverse_lazy

from rest_framework.test import APIClient

from base import StudioTestCase
from base import BaseTestCase
from testdata import create_temp_file
from contentcuration import models as cc

Expand Down Expand Up @@ -52,15 +51,12 @@ def add_field_defaults_to_node(node):
# Tests
###

class CreateChannelTestCase(StudioTestCase):
class CreateChannelTestCase(BaseTestCase):

@classmethod
def setUpClass(cls):
super(CreateChannelTestCase, cls).setUpClass()

cls.url = "http://127.0.0.1:8000"
cls.admin_user = models.User.objects.create_superuser('big_shot', '[email protected]', 'password')

cls.channel_metadata = {
"name": "Aron's cool channel",
"id": "fasdfada",
Expand All @@ -78,21 +74,6 @@ def setUp(self):
self.fileobj_document = create_temp_file("ghi", 'document', 'pdf', 'application/pdf')
self.fileobj_exercise = create_temp_file("jkl", 'exercise', 'perseus', 'application/perseus')

def admin_client(self):
client = APIClient()
client.force_authenticate(self.admin_user)
return client

def upload_file(self):
"""
Uploads a file to the server using an authorized client.
"""
fileobj_temp = create_temp_file(":)")
name = fileobj_temp['name']
file_upload_url = self.url + str(reverse_lazy('api_file_upload'))
f = SimpleUploadedFile(name, fileobj_temp['data'])
return self.admin_client().post(file_upload_url, {"file": f})

def create_channel(self):
create_channel_url = self.url + str(reverse_lazy('api_create_channel'))
payload = {
Expand All @@ -103,7 +84,7 @@ def create_channel(self):
return response

def test_api_file_upload_status(self):
response = self.upload_file()
fileobj, response = self.upload_temp_file(":)")
assert response.status_code == requests.codes.ok

def test_channel_create_channel_created(self):
Expand Down
74 changes: 74 additions & 0 deletions contentcuration/contentcuration/tests/test_zipcontentview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import shutil
import tempfile
import zipfile

from base import BaseTestCase


class ZipFileTestCase(BaseTestCase):
def setUp(self):
super(ZipFileTestCase, self).setUp()
self.zipfile_url = '/zipcontent/'

self.temp_files = []

def tearDown(self):
for temp_file in self.temp_files:
os.remove(temp_file)

def do_create_zip(self):
zip_handle, zip_filename = tempfile.mkstemp(suffix='.zip')
self.temp_files.append(zip_filename)
os.close(zip_handle)

with zipfile.ZipFile(zip_filename, 'w') as zip:
zip.writestr("index.html", "<html><head></head><body><p>Hello World!</p></body></html>")

return zip_filename

def test_invalid_zip(self):
temp_file, response = self.upload_temp_file("Hello!", ext="zip")
url = '{}{}/'.format(self.zipfile_url, temp_file['name'])
response = self.get(url)
assert response.status_code == 500

def test_valid_zipfile(self):
myzip = self.do_create_zip()

self.sign_in()
temp_file, response = self.upload_temp_file(open(myzip, 'rb').read(), ext='.zip')
assert response.status_code == 200
url = '{}{}/'.format(self.zipfile_url, temp_file['name'])
response = self.get(url)
assert response.status_code == 200

def test_valid_zipfile_file_access(self):
myzip = self.do_create_zip()

self.sign_in()
temp_file, response = self.upload_temp_file(open(myzip, 'rb').read(), ext='.zip')
assert response.status_code == 200
url = '{}{}/index.html'.format(self.zipfile_url, temp_file['name'])
response = self.get(url)
assert response.status_code == 200

def test_valid_zipfile_missing_file(self):
myzip = self.do_create_zip()

self.sign_in()
temp_file, response = self.upload_temp_file(open(myzip, 'rb').read(), ext='.zip')
assert response.status_code == 200
url = '{}{}/iamjustanillusion.txt'.format(self.zipfile_url, temp_file['name'])
response = self.get(url)
assert response.status_code == 404

def test_valid_zipfile_access_outside_zip_fails(self):
myzip = self.do_create_zip()

self.sign_in()
temp_file, response = self.upload_temp_file(open(myzip, 'rb').read(), ext='.zip')
assert response.status_code == 200
url = '{}{}/../outsidejson.js'.format(self.zipfile_url, temp_file['name'])
response = self.get(url)
assert response.status_code == 404
6 changes: 5 additions & 1 deletion contentcuration/contentcuration/utils/gcs_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ def open(self, name, mode="rb"):
# compatible as a filesystem name. Change it to hex.
tmp_filename = tmp_filename.decode("base64").encode("hex")

is_new = True
if os.path.exists(tmp_filename):
f = open(tmp_filename)
is_new = False

# If there's no such file, then download that file
# from GCS.
Expand All @@ -51,7 +53,9 @@ def open(self, name, mode="rb"):
# reopen the file we just wrote, this time in read mode
f = open(tmp_filename)

return File(f)
django_file = File(f)
django_file.just_downloaded = is_new
return django_file

def exists(self, name):
blob = self.bucket.get_blob(name)
Expand Down
64 changes: 36 additions & 28 deletions contentcuration/contentcuration/views/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import re
import zipfile

from raven.contrib.django.raven_compat.models import client

from django.core.files.storage import default_storage
from django.http import Http404, HttpResponse, HttpResponseNotFound
from django.http import Http404, HttpResponse, HttpResponseNotFound, HttpResponseServerError
from django.http.response import FileResponse, HttpResponseNotModified
from django.utils.http import http_date
from django.views.decorators.clickjacking import xframe_options_exempt
Expand Down Expand Up @@ -52,7 +54,8 @@ def get(self, request, zipped_filename, embedded_filepath):
"""
Handles GET requests and serves a static file from within the zip file.
"""
assert VALID_STORAGE_FILENAME.match(zipped_filename), "'{}' is not a valid content storage filename".format(zipped_filename)
if not VALID_STORAGE_FILENAME.match(zipped_filename):
return HttpResponseNotFound("'{}' is not a valid URL for this zip file".format(zipped_filename))

storage = default_storage

Expand All @@ -73,32 +76,37 @@ def get(self, request, zipped_filename, embedded_filepath):

zf_obj = storage.open(zipped_path)

with zipfile.ZipFile(zf_obj) as zf:
# if no path, or a directory, is being referenced, look for an index.html file
if not embedded_filepath or embedded_filepath.endswith("/"):
embedded_filepath += "index.html"

# get the details about the embedded file, and ensure it exists
try:
info = zf.getinfo(embedded_filepath)
except KeyError:
return HttpResponseNotFound('"{}" does not exist inside "{}"'.format(embedded_filepath, zipped_filename))

# try to guess the MIME type of the embedded file being referenced
content_type = mimetypes.guess_type(embedded_filepath)[0] or 'application/octet-stream'

if not os.path.splitext(embedded_filepath)[1] == '.json':
# generate a streaming response object, pulling data from within the zip file
response = FileResponse(zf.open(info), content_type=content_type)
file_size = info.file_size
else:
# load the stream from json file into memory, replace the path_place_holder.
content = zf.open(info).read()
str_to_be_replaced = ('$' + exercises.IMG_PLACEHOLDER).encode()
zipcontent = ('/' + request.resolver_match.url_name + "/" + zipped_filename).encode()
content_with_path = content.replace(str_to_be_replaced, zipcontent)
response = HttpResponse(content_with_path, content_type=content_type)
file_size = len(content_with_path)
try:
with zipfile.ZipFile(zf_obj) as zf:
# if no path, or a directory, is being referenced, look for an index.html file
if not embedded_filepath or embedded_filepath.endswith("/"):
embedded_filepath += "index.html"

# get the details about the embedded file, and ensure it exists
try:
info = zf.getinfo(embedded_filepath)
except KeyError:
return HttpResponseNotFound('"{}" does not exist inside "{}"'.format(embedded_filepath, zipped_filename))

# try to guess the MIME type of the embedded file being referenced
content_type = mimetypes.guess_type(embedded_filepath)[0] or 'application/octet-stream'

if not os.path.splitext(embedded_filepath)[1] == '.json':
# generate a streaming response object, pulling data from within the zip file
response = FileResponse(zf.open(info), content_type=content_type)
file_size = info.file_size
else:
# load the stream from json file into memory, replace the path_place_holder.
content = zf.open(info).read()
str_to_be_replaced = ('$' + exercises.IMG_PLACEHOLDER).encode()
zipcontent = ('/' + request.resolver_match.url_name + "/" + zipped_filename).encode()
content_with_path = content.replace(str_to_be_replaced, zipcontent)
response = HttpResponse(content_with_path, content_type=content_type)
file_size = len(content_with_path)
except zipfile.BadZipfile:
just_downloaded = getattr(zf_obj, 'just_downloaded', "Unknown (Most likely local file)")
client.captureMessage("Unable to open zip file. File info: name={}, size={}, mode={}, just_downloaded={}".format(zf_obj.name, zf_obj.size, zf_obj.mode, just_downloaded))
return HttpResponseServerError("Attempt to open zip file failed. Please try again, and if you continue to receive this message, please check that the zip file is valid.")

# set the last-modified header to the date marked on the embedded file
if info.date_time:
Expand Down

0 comments on commit 50943b2

Please sign in to comment.