-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
manager: improe copy func #542
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,15 +50,19 @@ | |
} | ||
""" | ||
|
||
import uuid | ||
from collections.abc import MutableMapping | ||
from datetime import datetime | ||
from functools import wraps | ||
|
||
from invenio_db import db | ||
from invenio_files_rest.errors import ( | ||
BucketLockedError, | ||
InvalidKeyError, | ||
InvalidOperationError, | ||
) | ||
from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion | ||
from sqlalchemy import insert | ||
|
||
|
||
def ensure_enabled(func): | ||
|
@@ -266,24 +270,61 @@ def teardown(self, full=True): | |
self._order = [] | ||
|
||
def copy(self, src_files, copy_obj=True): | ||
"""Copy from another file manager.""" | ||
"""Copy from another file manager. | ||
|
||
This method will copy all object versions to the `self.bucket` assuming | ||
that the latter is a new empty bucket. | ||
""" | ||
self.enabled = src_files.enabled | ||
|
||
if not self.enabled: | ||
return | ||
|
||
for key, rf in src_files.items(): | ||
# Copy object version of link existing? | ||
if copy_obj: | ||
dst_obj = rf.object_version.copy(bucket=self.bucket) | ||
else: | ||
dst_obj = rf.object_version | ||
|
||
# Copy file record | ||
if rf.metadata is not None: | ||
self[key] = dst_obj, rf.metadata | ||
else: | ||
self[key] = dst_obj | ||
bucket_objects = ObjectVersion.query.filter_by(bucket_id=self.bucket_id).count() | ||
if bucket_objects == 0: | ||
# bucket is empty | ||
# copy all object versions to self.bucket | ||
objs = ObjectVersion.copy_from(src_files.bucket_id, self.bucket_id) | ||
ovs_by_key = {obj["key"]: obj for obj in objs} | ||
rf_to_bulk_insert = [] | ||
|
||
record_id = self.record.id | ||
for key, rf in src_files.items(): | ||
new_rf = { | ||
"id": uuid.uuid4(), | ||
"created": datetime.utcnow(), | ||
"updated": datetime.utcnow(), | ||
"key": key, | ||
"record_id": record_id, | ||
"version_id": 1, | ||
"object_version_id": ovs_by_key[key]["version_id"], | ||
"json": rf.metadata or {}, | ||
} | ||
rf_to_bulk_insert.append(new_rf) | ||
|
||
if rf_to_bulk_insert: | ||
db.session.execute(insert(self.file_cls.model_cls), rf_to_bulk_insert) | ||
# we need to populate entries from DB so we store the record file model | ||
# instance | ||
if not self._entries: | ||
self._entries = {} | ||
for rf in self.file_cls.list_by_record(self.record.id): | ||
self._entries[rf.key] = rf | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we split this if statement to separate two methods instead? we know that the above block is used only while editing already published record, so we can use it directly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, but the point was not to change the existing chain of calls i.e draft_files.copy(record_files). I can do it if we align with @slint as we had discussed IRL and we said that this could be done later. |
||
# if bucket is not empty then we fallback to the slow process of copying | ||
# files | ||
for key, rf in src_files.items(): | ||
# Copy object version of link existing? | ||
if copy_obj: | ||
dst_obj = rf.object_version.copy(bucket=self.bucket) | ||
else: | ||
dst_obj = rf.object_version | ||
|
||
# Copy file record | ||
if rf.metadata is not None: | ||
self[key] = dst_obj, rf.metadata | ||
else: | ||
self[key] = dst_obj | ||
|
||
self.default_preview = src_files.default_preview | ||
self.order = src_files.order | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ tests = | |
invenio-db[postgresql,mysql,versioning]>=1.0.14,<2.0.0 | ||
pytest-invenio>=2.1.0,<3.0.0 | ||
pytest-mock>=1.6.0 | ||
sphinx>=4.2.0,<5 | ||
sphinx>=5,<6 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had to update this in order for the tests to pass, the error was caused by one of the extensions |
||
elasticsearch7 = | ||
invenio-search[elasticsearch7]>=2.1.0,<3.0.0 | ||
opensearch1 = | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure about this change, could you explain how does it help?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it was leftover from @jrcastro2 but in principle this is the fix we apply in other places instructing sqlalchemy not to trigger UPDATE queries when fetching from db. Is that the question?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It helps very little, concretely it prevents 2 more queries to the DB every time this is called, the
no_autoflush
is useful when we are only querying, without executing any modifications on the data. Otherwise it can lead to inconsistent data. We can remove it if it's too complicated as the real gains are very little in this case.