Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: curator API fix tests #3782

Merged
merged 52 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
d8ae6f2
Merge branch 'main' into tsmith/3616-curation-api
Bento007 Nov 28, 2022
8f5cccc
fix typo
Bento007 Nov 28, 2022
f75c82c
Merge remote-tracking branch 'origin/tsmith/3616-curation-api' into t…
Bento007 Dec 3, 2022
63d18bf
resolve conflicts
Bento007 Dec 5, 2022
a0d83d6
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 5, 2022
7e9cb8c
Merge branch 'tsmith/revise-at-for-curation-api' into tsmith/3616-cur…
Bento007 Dec 5, 2022
d7bf076
curation api WIP
Bento007 Dec 7, 2022
5d02dcb
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 8, 2022
2d94fb5
curation_api
Bento007 Dec 8, 2022
9e0a2cb
curation_api
Bento007 Dec 8, 2022
fcc66bb
consolidate tests
Bento007 Dec 8, 2022
0a8e04e
resolve DOIs
Bento007 Dec 9, 2022
541dbb0
Fix s3_upload_credentials.py
Bento007 Dec 9, 2022
e90f561
Fix some get collection id tests
Bento007 Dec 10, 2022
51eac97
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 12, 2022
8bf68ce
Fix post Revisions
Bento007 Dec 12, 2022
00fd5f1
Fix Get Collections public test
Bento007 Dec 12, 2022
a05e666
Fix get collections tests
Bento007 Dec 13, 2022
f798571
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 13, 2022
e67750c
Fix get collection tests
Bento007 Dec 14, 2022
8151e66
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 14, 2022
418cc68
Fix get collection tests
Bento007 Dec 15, 2022
d1b5c45
feat: curation API for datasets (#3708)
ebezzi Dec 15, 2022
a26b427
fixing patch tests
Bento007 Dec 15, 2022
99163ea
consolidating tests
Bento007 Dec 15, 2022
a18c455
fixing get portal api index
Bento007 Dec 15, 2022
af4f001
Merge branch 'ebezzi/business-layer-unit-tests' into tsmith/3616-cura…
Bento007 Dec 16, 2022
000655a
Update tests/unit/backend/layers/business/test_business.py
Bento007 Dec 16, 2022
8b7078a
Update backend/layers/business/business.py
Bento007 Dec 16, 2022
1696ca9
fix typos
Bento007 Dec 16, 2022
34047f0
Merge remote-tracking branch 'origin/tsmith/3616-curation-api' into t…
Bento007 Dec 16, 2022
457a216
init
ebezzi Dec 16, 2022
633a52d
merge curator name
ebezzi Dec 16, 2022
e381d8f
rollback
ebezzi Dec 16, 2022
a0037f4
checkpoint
ebezzi Dec 16, 2022
ba5daa0
checkpoint
ebezzi Dec 16, 2022
71dbf18
checkpoint
ebezzi Dec 16, 2022
200f249
checkpoint
ebezzi Dec 16, 2022
e131d03
merge
ebezzi Dec 16, 2022
b97c5c5
more tests
ebezzi Dec 16, 2022
ea188a6
Asset tests
ebezzi Dec 16, 2022
0e4f484
more test fixes
ebezzi Dec 16, 2022
0984314
more test fixes
ebezzi Dec 16, 2022
0b2fc2e
comment
ebezzi Dec 16, 2022
f6a04b4
Linter
ebezzi Dec 16, 2022
35b2647
fix
ebezzi Dec 16, 2022
76a8e52
linter
ebezzi Dec 17, 2022
f9a3115
fixes
ebezzi Dec 19, 2022
d8866ce
improve tests
ebezzi Dec 19, 2022
a24de08
tests
ebezzi Dec 19, 2022
40a9766
remove monkeypatch
ebezzi Dec 19, 2022
9747f6c
Update backend/portal/api/curation/v1/curation/collections/actions.py
ebezzi Dec 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 29 additions & 28 deletions backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ def predicate(version: CollectionVersion):
or (filter.is_published is False and version.published_at is None)
)
owner = filter.owner is None or filter.owner == version.owner
return published and owner
curator = filter.curator_name is None or filter.curator_name == version.curator_name
return published and owner and curator

for collection_version in iterable:
if predicate(collection_version):
Expand Down Expand Up @@ -404,40 +405,40 @@ def get_dataset_status(self, dataset_version_id: DatasetVersionId) -> DatasetSta
def update_dataset_version_status(
self,
dataset_version_id: DatasetVersionId,
status_key: Optional[DatasetStatusKey] = None,
new_dataset_status: Optional[DatasetStatusGeneric] = None,
status_key: DatasetStatusKey,
new_dataset_status: DatasetStatusGeneric,
validation_message: Optional[str] = None,
) -> None:
"""
TODO: split into two method, one for updating validation_message, and the other statuses.
Updates the status of a dataset version.
status_key can be one of: [upload, validation, cxg, rds, h5ad, processing]
"""
if all([status_key, new_dataset_status]):
if status_key == DatasetStatusKey.UPLOAD and isinstance(new_dataset_status, DatasetUploadStatus):
self.database_provider.update_dataset_upload_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.PROCESSING and isinstance(new_dataset_status, DatasetProcessingStatus):
self.database_provider.update_dataset_processing_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.VALIDATION and isinstance(new_dataset_status, DatasetValidationStatus):
self.database_provider.update_dataset_validation_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.CXG and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "cxg_status", new_dataset_status
)
elif status_key == DatasetStatusKey.RDS and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "rds_status", new_dataset_status
)
elif status_key == DatasetStatusKey.H5AD and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "h5ad_status", new_dataset_status
)
else:
raise DatasetUpdateException(
f"Invalid status update for dataset {dataset_version_id}: cannot set {status_key} to "
f"{new_dataset_status}"
)
elif validation_message is not None:
if status_key == DatasetStatusKey.UPLOAD and isinstance(new_dataset_status, DatasetUploadStatus):
self.database_provider.update_dataset_upload_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.PROCESSING and isinstance(new_dataset_status, DatasetProcessingStatus):
self.database_provider.update_dataset_processing_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.VALIDATION and isinstance(new_dataset_status, DatasetValidationStatus):
self.database_provider.update_dataset_validation_status(dataset_version_id, new_dataset_status)
elif status_key == DatasetStatusKey.CXG and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "cxg_status", new_dataset_status
)
elif status_key == DatasetStatusKey.RDS and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "rds_status", new_dataset_status
)
elif status_key == DatasetStatusKey.H5AD and isinstance(new_dataset_status, DatasetConversionStatus):
self.database_provider.update_dataset_conversion_status(
dataset_version_id, "h5ad_status", new_dataset_status
)
else:
raise DatasetUpdateException(
f"Invalid status update for dataset {dataset_version_id}: cannot set {status_key} to "
f"{new_dataset_status}"
)

if validation_message is not None:
self.database_provider.update_dataset_validation_message(dataset_version_id, validation_message)

def add_dataset_artifact(
Expand Down
10 changes: 5 additions & 5 deletions backend/layers/common/entities.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pydantic import Field
from pydantic.dataclasses import dataclass
# from pydantic import Field
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pydantic.dataclass should work as long as we aren't mixing class members with and without defaults.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's work on pydantic on a separate PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then remove these imports

# from pydantic.dataclasses import dataclass
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import List, Optional
Expand Down Expand Up @@ -244,14 +245,13 @@ class CollectionVersionBase:
published_at: Optional[datetime]
created_at: datetime
canonical_collection: CanonicalCollection
curator_name: Optional[str] = ""


@dataclass
class CollectionVersion(CollectionVersionBase):
datasets: List[DatasetVersionId] = Field(default_factory=list)
datasets: List[DatasetVersionId]


@dataclass
class CollectionVersionWithDatasets(CollectionVersionBase):
datasets: List[DatasetVersion] = Field(default_factory=list)
datasets: List[DatasetVersion]
5 changes: 1 addition & 4 deletions backend/layers/persistence/persistence_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def create_canonical_collection(
) -> CollectionVersion:
collection_id = CollectionId(self._generate_id())
version_id = CollectionVersionId(self._generate_id())
canonical = CanonicalCollection(
id=collection_id, version_id=version_id, originally_published_at=None, tombstoned=False
)
canonical = CanonicalCollection(collection_id, None, None, False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we set the version_id here? Do we only add the version_id if the version is published? If this is the case then if a collection is not published, what happens if the user goes to the collection page using the canonical collection id?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the canonical -> version mapping only needs to exist for published collections. A canonical collection can have multiple versions, but only one can be actively published at the same time - hence this mapping. @atolopko-czi has a diagram that might explain this better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if the user goes to an unpublished collection page using the canonical collection id?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a use case where a user would like to use the canonical id before it is published so they can reference it in their publications.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a ticket to make sure this behavior is supported.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

version = CollectionVersion(
collection_id=collection_id,
version_id=version_id,
Expand All @@ -84,7 +82,6 @@ def create_canonical_collection(
datasets=[],
)
self.collections_versions[version_id.id] = version
self.collections[collection_id.id] = canonical
# Don't set mappings here - those will be set when publishing the collection!
return copy.deepcopy(version)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def get(visibility: str, token_info: dict, curator: str = None):


def post(body: dict, user: str):

# Extract DOI into link
errors = []
if doi_url := body.get("doi"):
Expand All @@ -61,7 +60,7 @@ def post(body: dict, user: str):
metadata = CollectionMetadata(body["name"], body["description"], body["contact_name"], body["contact_email"], links)

try:
version = get_business_logic().create_collection(user, metadata)
version = get_business_logic().create_collection(user, body.get("curator_name", ""), metadata)
ebezzi marked this conversation as resolved.
Show resolved Hide resolved
except CollectionCreationException as ex:
errors.extend(ex.errors)
if errors:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
def get(collection_id: str, token_info: dict):
config = CorporaConfig()
user_info = UserInfo(token_info)
# TODO: Since this method only works on private collections, I think we should just accept the version_id here
collection_version = get_infered_collection_version_else_forbidden(collection_id)
is_owner_or_allowed_else_forbidden(collection_version, user_info)
if collection_version.published_at:
Expand Down
12 changes: 7 additions & 5 deletions backend/portal/api/curation/v1/curation/collections/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,17 @@ def reshape_for_curation_api(

# build response
doi, links = extract_doi_from_links(collection_version.metadata.links)
revised_at = business_logic.get_published_collection_version(
collection_version.canonical_collection.id
).published_at
published_version = business_logic.get_published_collection_version(collection_version.canonical_collection.id)
if published_version is not None:
revised_at = published_version.published_at
else:
revised_at = None
response = dict(
collection_url=f"{CorporaConfig().collections_base_url}/collections/{collection_id.id}",
contact_email=collection_version.metadata.contact_email,
contact_name=collection_version.metadata.contact_name,
created_at=collection_version.created_at,
curator_name=collection_version.owner,
curator_name=collection_version.curator_name,
datasets=response_datasets,
description=collection_version.metadata.description,
doi=doi,
Expand Down Expand Up @@ -278,7 +280,7 @@ def get_collection_level_processing_status(datasets: List[DatasetVersion]) -> st
return return_status


def get_infered_collection_version_else_forbidden(collection_id: str) -> Optional[CollectionVersionWithDatasets]:
def get_infered_collection_version_else_forbidden(collection_id: str) -> CollectionVersionWithDatasets:
"""
Infer the collection version from either a CollectionId or a CollectionVersionId and return the CollectionVersion.
:param collection_id: identifies the collection version
Expand Down
22 changes: 16 additions & 6 deletions tests/unit/backend/layers/api/test_curation_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from backend.portal.api.curation.v1.curation.collections.common import EntityColumns
from tests.unit.backend.fixtures.mock_aws_test_case import CorporaTestCaseUsingMockAWS
from tests.unit.backend.layers.common.base_test import DatasetArtifactUpdate, DatasetStatusUpdate
from unit.backend.layers.common.base_api_test import BaseAPIPortalTest
from tests.unit.backend.layers.common.base_api_test import BaseAPIPortalTest


class TestAsset(CorporaTestCaseUsingMockAWS):
Expand Down Expand Up @@ -138,8 +138,18 @@ def test__delete_tombstone_collection(self):


class TestS3Credentials(BaseAPIPortalTest):
@patch("backend.common.corpora_config.CorporaConfig.__getattr__")
@patch("backend.portal.api.curation.v1.curation.collections.collection_id.s3_upload_credentials.sts_client")
def test__generate_s3_credentials__OK(self, sts_client: Mock):
def test__generate_s3_credentials__OK(self, sts_client: Mock, mock_config: Mock):

def mock_config_fn(name):
if name == "curator_role_arn":
return "test_role_arn"
if name == "submission_bucket":
return "cellxgene-dataset-submissions-test"

mock_config.side_effect = mock_config_fn

def _test(token, is_super_curator: bool = False):
sts_client.assume_role_with_web_identity = Mock(
return_value={
Expand All @@ -150,17 +160,17 @@ def _test(token, is_super_curator: bool = False):
}
}
)
collection_id = self.generate_unpublished_collection().collection_id
version_id = self.generate_unpublished_collection().version_id
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to test with both the version_id and collection_id

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should now work with both, I'll add the double tests.

headers = {"Authorization": f"Bearer {token}"}

response = self.app.get(f"/curation/v1/collections/{collection_id}/s3-upload-credentials", headers=headers)
response = self.app.get(f"/curation/v1/collections/{version_id}/s3-upload-credentials", headers=headers)
self.assertEqual(200, response.status_code)
token_sub = self._mock_assert_authorized_token(token)["sub"]
self.assertEqual(response.json["Bucket"], "cellxgene-dataset-submissions-test")
if is_super_curator:
self.assertEqual(response.json["UploadKeyPrefix"], f"super/{collection_id}/")
self.assertEqual(response.json["UploadKeyPrefix"], f"super/{version_id}/")
else:
self.assertEqual(response.json["UploadKeyPrefix"], f"{token_sub}/{collection_id}/")
self.assertEqual(response.json["UploadKeyPrefix"], f"{token_sub}/{version_id}/")

with self.subTest("collection owner"):
_test("owner")
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/backend/layers/api/test_portal_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test__get_collection_id__ok(self):
"contact_email": "[email protected]",
"contact_name": "john doe",
"created_at": mock.ANY,
"curator_name": "Test User",
"curator_name": "Jane Smith",
"data_submission_policy_version": "1.0",
"datasets": [
{
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/backend/layers/common/base_api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class BaseAPIPortalTest(BaseAuthAPITest, BaseTest):
def setUp(self):
super().setUp()

# TODO: this can be improved, but the current authorization method requires it
self.mock = patch("backend.common.corpora_config.CorporaAuthConfig.__getattr__", return_value="mock_audience")
self.mock.start()

self.cloudfront_provider = CDNProviderInterface()
pa = PortalApi(self.business_logic, self.cloudfront_provider)

Expand Down
7 changes: 4 additions & 3 deletions tests/unit/backend/layers/common/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,18 @@ def generate_unpublished_collection(

return self.business_logic.get_collection_version(collection.version_id)

# Public collections need to have at least one dataset!
# Public collections need to have at least one dataset!
def generate_published_collection(
self,
owner="test_user_id",
links: List[Link] = [],
add_datasets: int = 1,
curator_name: str = "Test User",
curator_name: str = "Jane Smith",
metadata=None,
) -> CollectionVersion:
unpublished_collection = self.generate_unpublished_collection(
owner, links, curator_name=curator_name, add_datasets=add_datasets, metadata=metadata
owner, curator_name, links, add_datasets=add_datasets, metadata=metadata
)
self.business_logic.publish_collection_version(unpublished_collection.version_id)
return self.business_logic.get_collection_version(unpublished_collection.version_id)
Expand Down Expand Up @@ -192,7 +193,7 @@ def generate_dataset(
)
if not metadata:
metadata = copy.deepcopy(self.sample_dataset_metadata)
if name:
if name is not None:
metadata.name = name
self.business_logic.set_dataset_metadata(dataset_version_id, metadata)
for status in statuses:
Expand Down