Skip to content

Commit

Permalink
documents: handle several file formats
Browse files Browse the repository at this point in the history
This commit allows to add all kind of files to documents. Depending on the type of the file, a preview can be available or not.
Additionally, the way to show the files has been completely refactored, to make consistency with frontend views.

* Configures preview max size for images and JSON/XML files.
* Configures extensions which allow to show a preview.
* Adds a border to generated file thumbnail.
* Keeps the ratio when generating file thumbnail.
* Appends orginal extension to filename when generating system files, to avoid conflict with files having the same name.
* Adds defaults metadata values for file, only if file is not deleted.
* Moves some functions related to documents from `views.py` to `utils.py`, to use them in JSON serialization.
* Adds `restriction`, 'thumbnail` and `link` properties to file, in document detail view and serialization.
* Avoids to remove the `restriction` property during pre-loading data, because this property is flagged as `dump_only`.
* Removes call to `replace_refs()` in document detail view, as no `$ref` properties is used in template.
* Changes `restricted` image for a better one.
* Adds a default image if no thumbnail is available.
* Refactore the macro for displaying thumbnails, as more information is now present in file metadata.
* Closes #322.
* Closes #339.

Co-Authored-by: Sébastien Délèze <[email protected]>
  • Loading branch information
Sébastien Délèze committed Nov 12, 2020
1 parent cf240e0 commit 4c2f105
Show file tree
Hide file tree
Showing 18 changed files with 504 additions and 422 deletions.
4 changes: 4 additions & 0 deletions sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,3 +652,7 @@ def _(x):
# PREVIEW
# =======
PREVIEWER_BASE_TEMPLATE = 'sonar/preview/base.html'
PREVIEWER_MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024
"""Maximum file size in bytes for image files."""
PREVIEWER_MAX_FILE_SIZE_BYTES = 5 * 1024 * 1024
"""Maximum file size in bytes for JSON/XML files."""
5 changes: 5 additions & 0 deletions sonar/modules/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,8 @@
'user': ('sonar.modules.users.serializers.schemas.export:'
'ExportSchemaV1'),
}

SONAR_APP_FILE_PREVIEW_EXTENSIONS = [
'jpeg', 'jpg', 'gif', 'png', 'pdf', 'json', 'xml', 'csv', 'zip', 'md'
]
"""List of extensions for which files can be previewed."""
20 changes: 13 additions & 7 deletions sonar/modules/documents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,19 +181,25 @@ def sync_files(self, file, deleted=False):
:param file: File object.
:param deleted: Wether the given file has been deleted or not.
"""
# For documents, a thumbnail and a fulltext file is generated.
# Synchronise files between bucket and record.
self.files.flush()

# If file is not deleted, a thumbnail and a fulltext file is generated.
if not deleted:
self.create_fulltext_file(self.files[file.key])
self.create_thumbnail(self.files[file.key])

if not self.files[file.key].get('type'):
self.files[file.key]['type'] = 'file'
# Default type is `file`
if not self.files[file.key].get('type'):
self.files[file.key]['type'] = 'file'

if not self.files[file.key].get('label'):
self.files[file.key]['label'] = file.key
# Default label is `file.key`
if not self.files[file.key].get('label'):
self.files[file.key]['label'] = file.key

if not self.files[file.key].get('order'):
self.files[file.key]['order'] = self.get_next_file_order()
# Order is calculated with other files
if not self.files[file.key].get('order'):
self.files[file.key]['order'] = self.get_next_file_order()

super(DocumentRecord, self).sync_files(file, deleted)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,16 @@
},
"order": {
"title": "Position",
"description": "Position of the file, The lowest position means file is the main file.",
"description": "Position of the file, the first file in the list is the main file.",
"type": "integer",
"default": 1,
"minimum": 1
},
"external_url": {
"title": "URL to file",
"title": "External URL",
"type": "string",
"minLength": 1,
"format": "uri",
"pattern": "^https?://.*"
},
"restricted": {
Expand All @@ -101,6 +102,7 @@
"description": "Example: 2019-05-05",
"type": "string",
"minLength": 1,
"format": "date",
"pattern": "^[0-9]{4}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])$"
}
},
Expand Down
41 changes: 20 additions & 21 deletions sonar/modules/documents/marshmallow/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
from invenio_records_rest.schemas import Nested, StrictKeysMixin
from invenio_records_rest.schemas.fields import GenFunction, \
PersistentIdentifier, SanitizedUnicode
from marshmallow import fields, pre_dump, pre_load
from marshmallow import EXCLUDE, fields, pre_dump, pre_load

from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.permissions import DocumentPermission
from sonar.modules.documents.utils import has_external_urls_for_files, \
populate_files_properties
from sonar.modules.documents.views import contribution_text, \
create_publication_statement, dissertation, is_file_restricted, \
part_of_format
create_publication_statement, dissertation, part_of_format
from sonar.modules.serializers import schema_from_context
from sonar.modules.users.api import current_user_record

Expand All @@ -43,6 +44,14 @@
class FileSchemaV1(StrictKeysMixin):
"""File schema."""

class Meta:
"""Meta for file schema."""

# Specifically exclude unknown fields, as in the new version of
# marshmallow, dump_only fields are treated as included.
# https://github.com/marshmallow-code/marshmallow/issues/875
unknown = EXCLUDE

bucket = SanitizedUnicode()
file_id = SanitizedUnicode()
version_id = SanitizedUnicode()
Expand All @@ -56,12 +65,8 @@ class FileSchemaV1(StrictKeysMixin):
restricted = SanitizedUnicode()
embargo_date = SanitizedUnicode()
restriction = fields.Dict(dump_only=True)

@pre_load
def remove_restriction(self, data, **kwargs):
"""Remove restriction information before saving."""
data.pop('restriction', None)
return data
links = fields.Dict(dump_only=True)
thumbnail = SanitizedUnicode(dump_only=True)


class DocumentMetadataSchemaV1(StrictKeysMixin):
Expand Down Expand Up @@ -103,26 +108,20 @@ class DocumentMetadataSchemaV1(StrictKeysMixin):
permalink = SanitizedUnicode(dump_only=True)

@pre_dump
def process_files(self, item, **kwargs):
"""Add restrictions to file before dumping data.
def populate_files_properties(self, item, **kwargs):
"""Add some customs properties to file before dumping it.
:param item: Item object to process
:returns: Modified item
"""
if not item.get('_files'):
return item

# Add restrictions
for key, file in enumerate(item['_files']):
if file.get('type') == 'file':
restricted = is_file_restricted(file, item)

# Format date before serialization
if restricted.get('date'):
restricted['date'] = restricted['date'].strftime(
'%Y-%m-%d')
# Check if organisation record forces to point file to an external url
item['external_url'] = has_external_urls_for_files(item)

item['_files'][key]['restriction'] = restricted
# Add restriction, link and thumbnail to files
populate_files_properties(item)

# Sort files to have the main file in first position
item['_files'] = sorted(item['_files'],
Expand Down
11 changes: 7 additions & 4 deletions sonar/modules/documents/templates/documents/record.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
{% from 'sonar/macros/macro.html' import thumbnail %}

{%- block body %}
{% set record = record.replace_refs() %}
{% set title = record.title[0] | title_format(current_i18n.language) %}
{% set files = record.get_files_list() %}

Expand All @@ -29,8 +28,10 @@
<div class="col-lg-3 text-center">
{% if files and files | length > 0 %}
<div class="mb-4">
{{ thumbnail('documents', record, files | first) }}
{{ thumbnail(record, files | first) }}
</div>
{% else %}
<img src="{{ url_for('static', filename='images/no-image.png') }}" alt="{{ title }}" class="img-fluid">
{% endif %}

<!-- DOCUMENT TYPE -->
Expand Down Expand Up @@ -77,8 +78,8 @@ <h1 class="text-primary">{{ title }}</h1>
{% if record.contribution | length > 3 %}
<a href="#" id="show-more-contributors">{{ _('Show more') }}&hellip;</a>
{% endif %}
{% endif %}
</div>
{% endif %}

<!-- PUBLICATION STATEMENT -->
{% for provision_activity in record.provisionActivity %}
Expand Down Expand Up @@ -248,7 +249,9 @@ <h5 id="other_files" class="mt-5">{{ _('Other files') }}</h5>
<div class="row">
{% for file in files %}
{% if loop.index > 1 %}
{{ thumbnail('documents', record, file, 'col-lg-2 mb-4') }}
<div class="col-lg-2 mb-5">
{{ thumbnail(record, file) }}
</div>
{% endif %}
{% endfor %}
</div>
Expand Down
170 changes: 169 additions & 1 deletion sonar/modules/documents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@

from __future__ import absolute_import, print_function

from sonar.modules.utils import format_date, remove_trailing_punctuation
import re
from datetime import datetime

from flask import current_app, g, request

from sonar.modules.api import SonarRecord
from sonar.modules.utils import change_filename_extension, format_date, \
remove_trailing_punctuation


def publication_statement_text(provision_activity):
Expand Down Expand Up @@ -63,3 +70,164 @@ def publication_statement_text(provision_activity):
value = remove_trailing_punctuation(value)
statement_with_language[key] = value
return statement_with_language


def get_current_organisation_code():
"""Return current organisation by globals or query parameter."""
# Organisation is present in query parameters, useful for API calls.
organisation = request.args.get('view')
if organisation:
return organisation

# Organisation stored in globals
if g.get('organisation', {}).get('code'):
return g.organisation['code']

# Default organisation
return current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION')


def get_file_links(file, record):
"""Return link data to preview and/or download the file.
:param file: File record.
:param record: Record.
:returns: Dict containing the URL, the download URL and the type of link.
"""
links = {'external': None, 'preview': None, 'download': None}

# File is restricted, no link.
if file['restriction']['restricted']:
return links

# File has an external url
if record['external_url'] and file.get('external_url'):
links['external'] = file['external_url']
return links

match = re.search(r'\.(.*)$', file['key'])
if not match:
return links

links['download'] = '/documents/{pid}/files/{key}'.format(
pid=record['pid'], key=file['key'])

if not match.group(1) in current_app.config.get(
'SONAR_APP_FILE_PREVIEW_EXTENSIONS', []):
return links

links['preview'] = '/documents/{pid}/preview/{key}'.format(
pid=record['pid'], key=file['key'])
return links


def get_file_restriction(file, record):
"""Check if current file can be displayed.
:param file: File dict
:param record: Current record
:returns object containing result and possibly embargo date
"""

def is_restricted_by_scope(file):
"""File is restricted by scope (internal, rero or organisation).
:param file: File object.
"""
# File is restricted by internal IPs
if file['restricted'] == 'internal':
return request.remote_addr not in current_app.config.get(
'SONAR_APP_INTERNAL_IPS')

# File is restricted by organisation
organisation = get_current_organisation_code()

# We are in global organisation, so restriction is active
if organisation == current_app.config.get(
'SONAR_APP_DEFAULT_ORGANISATION'):
return True

# No organisation in record, restriction is active
if not record.get('organisation', {}).get('pid'):
return True

# Record organisation is different from current organisation
if organisation != record['organisation']['pid']:
return True

return False

restricted = {'restricted': False, 'date': None}

try:
embargo_date = datetime.strptime(file.get('embargo_date'), '%Y-%m-%d')
except Exception:
embargo_date = None

# Store embargo date if greater than now
if embargo_date and embargo_date > datetime.now():
restricted['restricted'] = True
restricted['date'] = embargo_date.strftime('%d/%m/%Y')

# File is restricted by organisation
if file.get('restricted'):
restricted['restricted'] = is_restricted_by_scope(file)

if not restricted['restricted']:
restricted['date'] = None

return restricted


def has_external_urls_for_files(record):
"""Check if files point to external website.
:param record: Current record.
:returns: True if record's organisation is configured to point files to an
external URL.
"""
if not record.get('organisation', {}):
return False

organisation_pid = SonarRecord.get_pid_by_ref_link(
record['organisation']['$ref']) if record['organisation'].get(
'$ref') else record['organisation']['pid']

return organisation_pid in current_app.config.get(
'SONAR_DOCUMENTS_ORGANISATIONS_EXTERNAL_FILES')


def get_thumbnail(file, record):
"""Get thumbnail from file.
If file is restricted, a restricted image is returned. If no thumbnail
found, a default image is returned.
:param file: Dict of file from which thumbnail will be returned.
:param record: Record object.
:returns: URL to thumbnail file.
"""
if file['restriction']['restricted']:
return 'static/images/restricted.png'

key = change_filename_extension(file['key'], 'jpg')

matches = [file for file in record['_files'] if file['key'] == key]

if not matches:
return 'static/images/no-image.png'

return '/documents/{pid}/files/{key}'.format(pid=record['pid'], key=key)


def populate_files_properties(record):
"""Add restriction, link and thumbnail to file.
:param record: Record object
:param file: File dict
"""
for file in record['_files']:
if file.get('type') == 'file':
file['restriction'] = get_file_restriction(file, record)
file['thumbnail'] = get_thumbnail(file, record)
file['links'] = get_file_links(file, record)
Loading

0 comments on commit 4c2f105

Please sign in to comment.