Skip to content

Commit

Permalink
file manager: copy from other bucket
Browse files Browse the repository at this point in the history
* closes zenodo/rdm-project#508

Co-authored-by: Javier Romero Castro <[email protected]>
Co-authored-by: Karolina Przerwa <[email protected]>
  • Loading branch information
2 people authored and zzacharo committed Nov 21, 2023
1 parent 63b4fce commit dc64686
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 20 deletions.
11 changes: 6 additions & 5 deletions invenio_records_resources/records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,14 @@ def get_by_key(cls, record_id, key):
@classmethod
def list_by_record(cls, record_id, with_deleted=False):
"""List all record files by record ID."""
query = cls.model_cls.query.filter(cls.model_cls.record_id == record_id)
with db.session.no_autoflush:
query = cls.model_cls.query.filter(cls.model_cls.record_id == record_id)

if not with_deleted:
query = query.filter(cls.model_cls.is_deleted != True)
if not with_deleted:
query = query.filter(cls.model_cls.is_deleted != True)

for obj in query:
yield cls(obj.data, model=obj)
for obj in query:
yield cls(obj.data, model=obj)

@property
def file(self):
Expand Down
2 changes: 1 addition & 1 deletion invenio_records_resources/records/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def object_version_id(cls):
@declared_attr
def object_version(cls):
"""Object version connected to the record file."""
return db.relationship(ObjectVersion)
return db.relationship(ObjectVersion) # Can we cache this?

@declared_attr
def __table_args__(cls):
Expand Down
67 changes: 54 additions & 13 deletions invenio_records_resources/records/systemfields/files/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@
}
"""

import uuid
from collections.abc import MutableMapping
from datetime import datetime
from functools import wraps
from sqlalchemy import insert

from invenio_db import db
from invenio_files_rest.errors import (
BucketLockedError,
InvalidKeyError,
Expand Down Expand Up @@ -266,24 +270,61 @@ def teardown(self, full=True):
self._order = []

def copy(self, src_files, copy_obj=True):
"""Copy from another file manager."""
"""Copy from another file manager.
This method will copy all object versions to the `self.bucket` assuming
that the latter is a new empty bucket.
"""
self.enabled = src_files.enabled

if not self.enabled:
return

for key, rf in src_files.items():
# Copy object version of link existing?
if copy_obj:
dst_obj = rf.object_version.copy(bucket=self.bucket)
else:
dst_obj = rf.object_version

# Copy file record
if rf.metadata is not None:
self[key] = dst_obj, rf.metadata
else:
self[key] = dst_obj
bucket_objects = ObjectVersion.query.filter_by(bucket_id=self.bucket_id).count()
if bucket_objects < 1:
# bucket is empty
# copy all object versions to self.bucket
objs = ObjectVersion.copy_from(src_files.bucket_id, self.bucket_id)
ovs_by_key = {obj["key"]: obj for obj in objs}
rf_to_bulk_insert = []

record_id = self.record.id
for key, rf in src_files.items():
new_rf = {
"id": uuid.uuid4(),
"created": datetime.utcnow(),
"updated": datetime.utcnow(),
"key": key,
"record_id": record_id,
"version_id": 1,
"object_version_id": ovs_by_key[key]["version_id"],
"json": rf.metadata or {},
}
rf_to_bulk_insert.append(new_rf)

if rf_to_bulk_insert:
db.session.execute(insert(self.file_cls.model_cls), rf_to_bulk_insert)
# we need to populate entries from DB so we store the record file model
# instance
if not self._entries:
self._entries = {}
for rf in self.file_cls.list_by_record(self.record.id):
self._entries[rf.key] = rf
else:
# if bucket is not empty then we fallback to the slow process of copying
# files
for key, rf in src_files.items():
# Copy object version of link existing?
if copy_obj:
dst_obj = rf.object_version.copy(bucket=self.bucket)
else:
dst_obj = rf.object_version

# Copy file record
if rf.metadata is not None:
self[key] = dst_obj, rf.metadata
else:
self[key] = dst_obj

self.default_preview = src_files.default_preview
self.order = src_files.order
Expand Down
2 changes: 1 addition & 1 deletion tests/records/test_systemfield_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_record_files_dump(base_app, db, location):
"entries": [
{
"uuid": str(rf.id),
"version_id": 3,
"version_id": 2,
"metadata": {"description": "Test file"},
"checksum": "md5:8bc944dbd052ef51652e70a5104492e3",
"key": "f1.txt",
Expand Down

0 comments on commit dc64686

Please sign in to comment.