Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: PATCH collection curator API redesign #3814

Merged
merged 7 commits into from
Dec 21, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ def update_collection_version(self, version_id: CollectionVersionId, body: Colle
it should also update its publisher metadata
"""

# TODO: we could collect all the changes and issue a single update at the end
# TODO: CollectionMetadataUpdate should probably be used for collection creation as well
# TODO: link.type should DEFINITELY move to an enum. pylance will help with the refactor

Expand All @@ -221,13 +220,17 @@ def update_collection_version(self, version_id: CollectionVersionId, body: Colle
else:
new_doi = next((link.uri for link in body.links if link.type == "DOI"), None)

# Determine if publisher metadata should be unset, ignored or set at the end of the method.
# Note: the update needs to be done at the end to ensure atomicity
unset_publisher_metadata = False
set_publisher_metadata = None

if old_doi and new_doi is None:
# If the DOI was deleted, remove the publisher_metadata field
self.database_provider.save_collection_publisher_metadata(version_id, None)
unset_publisher_metadata = True
elif (new_doi is not None) and new_doi != old_doi:
# If the DOI has changed, fetch and update the metadata
publisher_metadata = self._get_publisher_metadata(new_doi, errors)
self.database_provider.save_collection_publisher_metadata(version_id, publisher_metadata)
set_publisher_metadata = self._get_publisher_metadata(new_doi, errors)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: set_publisher_metadata sounds more like a flag name next to the unset_publisher_metadata flag. Maybe this can be updated_publisher_metadata instead? But I don't feel super strongly, just an idea

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went for publisher_metadata_to_set. Let me know if that makes sense


if errors:
raise CollectionUpdateException(errors)
Expand All @@ -242,6 +245,11 @@ def update_collection_version(self, version_id: CollectionVersionId, body: Colle
value.strip_fields()
setattr(new_metadata, field, value)

# Issue all updates
if unset_publisher_metadata:
self.database_provider.save_collection_publisher_metadata(version_id, None)
if set_publisher_metadata is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: i believe its not possible for both to be true as written, but its easier to quickly understand the intention if we make this an elif

self.database_provider.save_collection_publisher_metadata(version_id, set_publisher_metadata)
self.database_provider.save_collection_metadata(version_id, new_metadata)

def _assert_collection_version_unpublished(
Expand Down
6 changes: 5 additions & 1 deletion backend/layers/common/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ def _verify_collection_metadata_fields(

def check(key):
value = getattr(metadata, key)
if check_existence and not value:
if check_existence and value is None:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not a fan of this method, but this should check two things:

  1. The field must exist if check_existence is true (namely, if this is a collection creation where all the fields are required)
  2. If a field exists, it cannot be falsey (namely, an empty string).

Maybe it would be better to split this behavior into two separate methods, rather than two if conditions.

# if checks_existence is true, value cannot be None since it must be required
errors.append({"name": key, "reason": "Cannot be blank."})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets give this error reason a different value than the one in the next block (or vice-versa). We should distinguish the error paths

elif value is not None and not value:
# In any case, if a value is defined, it cannot be falsey (aka blank)
errors.append({"name": key, "reason": "Cannot be blank."})
elif value is not None and key == "name" and control_char_re.search(value):
errors.append({"name": key, "reason": "Invalid characters detected."})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,55 @@ def get(collection_id: str, token_info: dict) -> Response:

def patch(collection_id: str, body: dict, token_info: dict) -> Response:
user_info = UserInfo(token_info)

if "links" in body:
if not body["links"]:
raise InvalidParametersHTTPException(detail="If provided, the 'links' array may not be empty")

collection_version = get_infered_collection_version_else_forbidden(collection_id)
is_owner_or_allowed_else_forbidden(collection_version, user_info)
if collection_version.published_at:
if collection_version.published_at is not None:
raise MethodNotAllowedException(
detail="Directly editing a public Collection is not allowed; you must create a revision."
)

# Build CollectionMetadataUpdate object
body["links"] = [Link(link.get("link_name"), link["link_type"], link["link_url"]) for link in body.get("links", [])]
collection_metadata = CollectionMetadataUpdate(**body)

# Update the collection
errors = []
if doi_url := body.get("doi"):
# Verify DOI
if doi_url := body.pop("doi", None):
if doi_url := doi.curation_get_normalized_doi_url(doi_url, errors):
links = body.get("links", [])
links.append({"link_type": ProjectLinkType.DOI.name, "link_url": doi_url})
body["links"] = links

# TODO: dedup
def _link_from_request(body: dict):
return Link(
body.get("link_name"),
body["link_type"],
body["link_url"],
)

if body.get("links") is not None:
update_links = [_link_from_request(node) for node in body["links"]]
else:
update_links = None

# Build CollectionMetadataUpdate object
collection_metadata = CollectionMetadataUpdate(
body.get("name"),
body.get("description"),
body.get("contact_name"),
body.get("contact_email"),
update_links,
)

# Update the collection
try:
get_business_logic().update_collection_version(collection_version.version_id, collection_metadata)
except CollectionUpdateException as ex:
errors.extend(ex.errors)
if errors:
raise InvalidParametersHTTPException(ext=dict(invalid_parameters=errors))

# Make Response
updated_collection_version = get_business_logic().get_collection_version(collection_version.version_id)

Expand Down
108 changes: 46 additions & 62 deletions tests/unit/backend/layers/api/test_curation_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import json
from dataclasses import asdict
import unittest
from unittest.mock import Mock, patch
import uuid

Expand All @@ -15,9 +14,11 @@
DatasetStatusKey,
DatasetUploadStatus,
DatasetValidationStatus,
Link,
OntologyTermId,
)
from backend.portal.api.curation.v1.curation.collections.common import EntityColumns
from tests.unit.backend.layers.api.test_portal_api import generate_mock_publisher_metadata
from tests.unit.backend.layers.common.base_test import DatasetArtifactUpdate, DatasetStatusUpdate
from tests.unit.backend.layers.common.base_api_test import BaseAPIPortalTest

Expand Down Expand Up @@ -715,7 +716,6 @@ def test__get_collection_with_x_approximate_distribution_none__OK(self):
self.assertIsNone(res.json["datasets"][0]["x_approximate_distribution"])


@unittest.skip("Trent will work on this")
class TestPatchCollectionID(BaseAPIPortalTest):
def setUp(self):
super().setUp()
Expand All @@ -738,26 +738,11 @@ def test__update_collection__OK(self):
self.assertEqual(200, response.status_code)

def test__update_collection_partial_data__OK(self):
description = "a description"
contact_name = "first last"
contact_email = "[email protected]"
links = [
{"link_name": "name", "link_type": "RAW_DATA", "link_url": "http://test_link.place"},
]
publisher_metadata = {
"authors": [{"name": "First Last", "given": "First", "family": "Last"}],
"journal": "Journal of Anamolous Results",
"published_year": 2022,
"published_month": 1,
}
collection_id = self.generate_collection(
description=description,
contact_name=contact_name,
contact_email=contact_email,
links=links,
publisher_metadata=publisher_metadata,
visibility="PRIVATE",
).collection_id
links = [Link("name", "RAW_DATA", "http://test_link.place")]
self.crossref_provider.fetch_metadata = Mock(return_value=generate_mock_publisher_metadata())
collection = self.generate_unpublished_collection(links=links)
collection_id = collection.collection_id

new_name = "A new name, and only a new name"
metadata = {"name": new_name}
response = self.app.patch(
Expand All @@ -766,13 +751,18 @@ def test__update_collection_partial_data__OK(self):
headers=self.make_owner_header(),
)
self.assertEqual(200, response.status_code)

response = self.app.get(f"curation/v1/collections/{collection_id}")
print(response.json)
self.assertEqual(new_name, response.json["name"])
self.assertEqual(description, response.json["description"])
self.assertEqual(contact_name, response.json["contact_name"])
self.assertEqual(contact_email, response.json["contact_email"])
self.assertEqual(links, response.json["links"])
self.assertEqual(publisher_metadata, response.json["publisher_metadata"])
self.assertEqual(collection.metadata.description, response.json["description"])
self.assertEqual(collection.metadata.contact_name, response.json["contact_name"])
self.assertEqual(collection.metadata.contact_email, response.json["contact_email"])
self.assertEqual(
[{"link_name": "name", "link_type": "RAW_DATA", "link_url": "http://test_link.place"}],
response.json["links"],
)
self.assertEqual(collection.publisher_metadata, response.json["publisher_metadata"])

def test_update_collection_with_empty_required_fields(self):
tests = [dict(description=""), dict(contact_name=""), dict(contact_email=""), dict(name="")]
Expand Down Expand Up @@ -818,9 +808,9 @@ def test__update_collection__links__OK(self):
self.assertEqual(expected_links, response.json["links"])

def test__update_collection__doi__OK(self):
initial_doi = "12.3456/doi_curie_reference"
initial_doi = "10.2020"
links = [
{"link_name": "new doi", "link_type": "DOI", "link_url": initial_doi},
{"link_name": "new doi", "link_type": "DOI", "link_url": "http://doi.org/10.2020"},
]
new_doi = "10.1016" # a real DOI (CURIE reference)
collection_id = self.generate_collection(links=links, visibility="PRIVATE").collection_id
Expand Down Expand Up @@ -861,38 +851,32 @@ def test__update_collection__doi_does_not_exist__BAD_REQUEST(self):
new_links = [
{"link_name": "new link", "link_type": "RAW_DATA", "link_url": "http://brand_new_link.place"},
]
publisher_metadata = {
"authors": [{"name": "First Last", "given": "First", "family": "Last"}],
"journal": "Journal of Anamolous Results",
"published_year": 2022,
"published_month": 1,
}
with patch(
"backend.common.providers.crossref_provider.CrossrefProvider.fetch_metadata",
side_effect=CrossrefDOINotFoundException(),
):
collection = self.generate_collection(
links=links, publisher_metadata=publisher_metadata, visibility="PRIVATE"
)
collection_id = collection.collection_id
original_collection = self.app.get(f"curation/v1/collections/{collection_id}").json

# Only compare to first item in links list because "DOI" type gets removed from Curator API response
self.assertEqual(links[:1], original_collection["links"])
metadata = {"links": new_links, "doi": "10.1016/bad_doi"}
response = self.app.patch(
f"/curation/v1/collections/{collection_id}",
data=json.dumps(metadata),
headers=self.make_owner_header(),
)
self.assertEqual(400, response.status_code)
original_collection_unchanged = self.app.get(f"curation/v1/collections/{collection_id}").json
# Only compare to first item in links list because "DOI" type gets removed from Curator API response
self.assertEqual(links[:1], original_collection_unchanged["links"])
self.assertEqual(publisher_metadata, original_collection_unchanged["publisher_metadata"])
mock_publisher_metadata = generate_mock_publisher_metadata()
self.crossref_provider.fetch_metadata = Mock(return_value=mock_publisher_metadata)
collection = self.generate_collection(links=links, visibility="PRIVATE")
self.assertIsNotNone(collection.publisher_metadata)
collection_id = collection.collection_id
original_collection = self.app.get(f"curation/v1/collections/{collection_id}").json

self.crossref_provider.fetch_metadata = Mock(side_effect=CrossrefDOINotFoundException())

# Only compare to first item in links list because "DOI" type gets removed from Curator API response
self.assertEqual(links[:1], original_collection["links"])
metadata = {"links": new_links, "doi": "10.1016/bad_doi"}
response = self.app.patch(
f"/curation/v1/collections/{collection_id}",
data=json.dumps(metadata),
headers=self.make_owner_header(),
)
self.assertEqual(400, response.status_code)
original_collection_unchanged = self.app.get(f"curation/v1/collections/{collection_id}").json

# Only compare to first item in links list because "DOI" type gets removed from Curator API response
self.assertEqual(links[:1], original_collection_unchanged["links"])
self.assertEqual(mock_publisher_metadata, original_collection_unchanged["publisher_metadata"])

def test__update_collection__Not_Owner(self):
collection_id = self.generate_unpublished_collection(owner="someone else")
collection_id = self.generate_unpublished_collection(owner="someone else").collection_id
response = self.app.patch(
f"/curation/v1/collections/{collection_id}",
data=json.dumps(self.test_collection),
Expand All @@ -901,23 +885,23 @@ def test__update_collection__Not_Owner(self):
self.assertEqual(403, response.status_code)

def test__update_collection__Super_Curator(self):
collection_id = self.generate_unpublished_collection()
collection_id = self.generate_unpublished_collection().collection_id
headers = self.make_super_curator_header()
response = self.app.patch(
f"/curation/v1/collections/{collection_id}", data=json.dumps(self.test_collection), headers=headers
)
self.assertEqual(200, response.status_code)

def test__update_public_collection_owner__405(self):
collection_id = self.generate_published_collection()
collection_id = self.generate_published_collection().collection_id
headers = self.make_super_curator_header()
response = self.app.patch(
f"/curation/v1/collections/{collection_id}", data=json.dumps(self.test_collection), headers=headers
)
self.assertEqual(405, response.status_code)
self.assertEqual(
"Directly editing a public Collection is not allowed; you must create a revision.",
json.loads(response.text)["detail"],
response.json["detail"],
)


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/backend/layers/common/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def generate_unpublished_collection(
add_datasets: int = 0,
metadata=None,
) -> CollectionVersion:
links = links if links else []
links = links or []
if not metadata:
metadata = copy.deepcopy(self.sample_collection_metadata)
metadata.links = links
Expand Down