From 01dfbce6cc1d634df89aafc99b53e5bed00c010c Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 12 Dec 2022 10:55:42 -0500 Subject: [PATCH 1/3] linting Signed-off-by: nayib-jose-gloria --- backend/layers/api/portal_api.py | 2 +- backend/layers/business/business.py | 2 +- backend/layers/persistence/persistence.py | 4 +- .../layers/persistence/persistence_mock.py | 5 ++- backend/layers/processing/process_cxg.py | 7 +++- .../processing/process_download_validate.py | 11 ++++- backend/layers/processing/process_logic.py | 2 +- backend/layers/processing/process_seurat.py | 7 +++- .../backend/layers/api/test_portal_api.py | 42 +++++++++++++------ .../backend/layers/business/test_business.py | 2 +- .../backend/layers/common/base_api_test.py | 22 +++++----- 11 files changed, 72 insertions(+), 34 deletions(-) diff --git a/backend/layers/api/portal_api.py b/backend/layers/api/portal_api.py index 9fb398a657649..3557ac401de74 100644 --- a/backend/layers/api/portal_api.py +++ b/backend/layers/api/portal_api.py @@ -555,7 +555,7 @@ def get_dataset_identifiers(self, url: str): """ try: path = urlparse(url).path - id = [segment for segment in path.split("/") if segment][-1].strip(".cxg") + id = [segment for segment in path.split("/") if segment][-1].removesuffix(".cxg") except Exception: raise ServerErrorHTTPException("Cannot parse URL") diff --git a/backend/layers/business/business.py b/backend/layers/business/business.py index 96e52e42e5e01..1c4b486860f81 100644 --- a/backend/layers/business/business.py +++ b/backend/layers/business/business.py @@ -47,7 +47,7 @@ DatasetValidationStatus, DatasetVersion, DatasetVersionId, - Link + Link, ) from backend.layers.persistence.persistence_interface import DatabaseProviderInterface from backend.layers.thirdparty.crossref_provider import CrossrefProviderInterface diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index e4539131085b8..fa01602d1d9dd 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -440,7 +440,9 @@ def get_dataset_version(self, dataset_version_id: DatasetVersionId) -> DatasetVe Returns a dataset version by id. """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=dataset_version_id.id).one_or_none() + dataset_version = ( + session.query(DatasetVersionTable).filter_by(version_id=dataset_version_id.id).one_or_none() + ) if dataset_version is None: return None return self._hydrate_dataset_version(dataset_version) diff --git a/backend/layers/persistence/persistence_mock.py b/backend/layers/persistence/persistence_mock.py index f274d5ebf8fc1..9184dd46876a5 100644 --- a/backend/layers/persistence/persistence_mock.py +++ b/backend/layers/persistence/persistence_mock.py @@ -82,8 +82,9 @@ def create_canonical_collection(self, owner: str, collection_metadata: Collectio # Don't set mappings here - those will be set when publishing the collection! return copy.deepcopy(version) - def _update_version_with_canonical(self, version: Union[CollectionVersion, CollectionVersionWithDatasets], - update_datasets: bool = False): + def _update_version_with_canonical( + self, version: Union[CollectionVersion, CollectionVersionWithDatasets], update_datasets: bool = False + ): """ Private method that returns a version updated with the canonical collection. This is equivalent to a database double lookup (or join). diff --git a/backend/layers/processing/process_cxg.py b/backend/layers/processing/process_cxg.py index d514ac47bd1bf..a55c89a258f29 100644 --- a/backend/layers/processing/process_cxg.py +++ b/backend/layers/processing/process_cxg.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 from backend.layers.business.business_interface import BusinessLogicInterface -from backend.layers.common.entities import DatasetArtifactType, DatasetConversionStatus, DatasetStatusKey, DatasetVersionId +from backend.layers.common.entities import ( + DatasetArtifactType, + DatasetConversionStatus, + DatasetStatusKey, + DatasetVersionId, +) from backend.layers.processing.process_logic import ProcessingLogic from backend.layers.thirdparty.s3_provider import S3ProviderInterface from backend.layers.thirdparty.uri_provider import UriProviderInterface diff --git a/backend/layers/processing/process_download_validate.py b/backend/layers/processing/process_download_validate.py index 30c256bab6898..cab6f6a3bef2d 100644 --- a/backend/layers/processing/process_download_validate.py +++ b/backend/layers/processing/process_download_validate.py @@ -235,8 +235,15 @@ def process(self, dataset_id: DatasetVersionId, dropbox_url: str, artifact_bucke bucket_prefix = self.get_bucket_prefix(dataset_id.id) # Upload the original dataset to the artifact bucket - self.create_artifact(local_filename, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD) + self.create_artifact( + local_filename, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD + ) # Upload the labeled dataset to the artifact bucket self.create_artifact( - file_with_labels, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD + file_with_labels, + DatasetArtifactType.H5AD, + bucket_prefix, + dataset_id, + artifact_bucket, + DatasetStatusKey.H5AD, ) diff --git a/backend/layers/processing/process_logic.py b/backend/layers/processing/process_logic.py index eca898ef86db2..1b39f55778d24 100644 --- a/backend/layers/processing/process_logic.py +++ b/backend/layers/processing/process_logic.py @@ -62,7 +62,7 @@ def create_artifact( self, file_name: str, artifact_type: str, - bucket_prefix: str, + bucket_prefix: str, dataset_id: DatasetVersionId, artifact_bucket: str, processing_status_key: DatasetStatusKey, diff --git a/backend/layers/processing/process_seurat.py b/backend/layers/processing/process_seurat.py index 270a5e5dd5655..b9ef41a0dbd8c 100644 --- a/backend/layers/processing/process_seurat.py +++ b/backend/layers/processing/process_seurat.py @@ -2,7 +2,12 @@ import subprocess from backend.layers.business.business_interface import BusinessLogicInterface -from backend.layers.common.entities import DatasetArtifactType, DatasetConversionStatus, DatasetStatusKey, DatasetVersionId +from backend.layers.common.entities import ( + DatasetArtifactType, + DatasetConversionStatus, + DatasetStatusKey, + DatasetVersionId, +) from backend.layers.processing.process_logic import ProcessingLogic from backend.layers.thirdparty.s3_provider import S3ProviderInterface from backend.layers.thirdparty.uri_provider import UriProviderInterface diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index 8f44a6fafdddb..55c0e40e18f5d 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -1,4 +1,5 @@ import gevent.monkey + gevent.monkey.patch_all() import dataclasses @@ -824,7 +825,9 @@ def test_tombstone_published_collection_with_revision__ok(self): headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} # Verify private collections exist - test_private_url = furl(path=f"/dp/v1/collections/{revision.version_id}", query_params=dict(visibility="PRIVATE")) + test_private_url = furl( + path=f"/dp/v1/collections/{revision.version_id}", query_params=dict(visibility="PRIVATE") + ) response = self.app.get(test_private_url.url, headers=headers) self.assertEqual(200, response.status_code) body = json.loads(response.data) @@ -832,7 +835,9 @@ def test_tombstone_published_collection_with_revision__ok(self): self.assertIn(dataset_rev.dataset_version_id, dataset_ids) # Verify public collections exist - test_public_url = furl(path=f"/dp/v1/collections/{collection.collection_id}", query_params=dict(visibility="PUBLIC")) + test_public_url = furl( + path=f"/dp/v1/collections/{collection.collection_id}", query_params=dict(visibility="PUBLIC") + ) response = self.app.get(test_public_url.url, headers=headers) self.assertEqual(200, response.status_code) body = json.loads(response.data) @@ -884,7 +889,7 @@ def test_delete_collection__public__ok(self): self.assertEqual(body, "") def test_delete_collection__not_owner(self): - collection = self.generate_unpublished_collection(owner='not_test_user_id') + collection = self.generate_unpublished_collection(owner="not_test_user_id") test_url = furl(path=f"/dp/v1/collections/{collection.version_id}") headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} response = self.app.delete(test_url.url, headers=headers) @@ -909,7 +914,9 @@ def test_deleted_collection_does_not_appear_in_collection_lists(self): self.assertIn(public_collection.collection_id.id, collection_ids) self.assertIn(collection_to_delete.version_id.id, collection_ids) - test_url = furl(path=f"/dp/v1/collections/{collection_to_delete.version_id.id}", query_params=dict(visibility="PRIVATE")) + test_url = furl( + path=f"/dp/v1/collections/{collection_to_delete.version_id.id}", query_params=dict(visibility="PRIVATE") + ) response = self.app.delete(test_url.url, headers=headers) self.assertEqual(response.status_code, 204) @@ -1103,12 +1110,15 @@ def test__update_collection_same_doi_does_not_update_metadata(self): class TestCollectionsCurators(NewBaseTest): - def test_view_non_owned_private_collection__ok(self): # Generate test collection collection = self.generate_unpublished_collection(owner="another_test_user_id") - headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token(user='not_owner')} + headers = { + "host": "localhost", + "Content-Type": "application/json", + "Cookie": self.get_cxguser_token(user="not_owner"), + } test_url = furl(path=f"/dp/v1/collections/{collection.version_id}", query_params=dict(visibility="PRIVATE")) response = self.app.get(test_url.url, headers=headers) @@ -1237,9 +1247,13 @@ class TestDataset(NewBaseTest): # ✅ def test__post_dataset_asset__OK(self): self.business_logic.get_dataset_artifact_download_data = Mock( - return_value=DatasetArtifactDownloadData("asset.h5ad", DatasetArtifactType.H5AD, 1000, "http://presigned.url") + return_value=DatasetArtifactDownloadData( + "asset.h5ad", DatasetArtifactType.H5AD, 1000, "http://presigned.url" + ) + ) + version = self.generate_dataset( + artifacts=[DatasetArtifactUpdate(DatasetArtifactType.H5AD, "http://mock.uri/asset.h5ad")] ) - version = self.generate_dataset(artifacts=[DatasetArtifactUpdate(DatasetArtifactType.H5AD, "http://mock.uri/asset.h5ad")]) dataset_version_id = version.dataset_version_id artifact_id = version.artifact_ids[0] @@ -1258,7 +1272,9 @@ def test__post_dataset_asset__file_SERVER_ERROR(self): """ `post_dataset_asset` should throw 500 if presigned_url or file_size aren't returned from the server """ - version = self.generate_dataset(artifacts=[DatasetArtifactUpdate(DatasetArtifactType.H5AD, "http://mock.uri/asset.h5ad")]) + version = self.generate_dataset( + artifacts=[DatasetArtifactUpdate(DatasetArtifactType.H5AD, "http://mock.uri/asset.h5ad")] + ) dataset_version_id = version.dataset_version_id artifact_id = version.artifact_ids[0] @@ -2199,16 +2215,16 @@ def test__bad_link__400(self): with self.subTest("Bad Dropbox link"): self.uri_provider.validate = Mock(return_value=True) - self.uri_provider.get_file_info.side_effect = FileInfoException("The URL provided causes an error with Dropbox.") + self.uri_provider.get_file_info.side_effect = FileInfoException( + "The URL provided causes an error with Dropbox." + ) headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} body = {"url": self.dummy_link} test_url = furl(path=path) response = self.app.post(test_url.url, headers=headers, data=json.dumps(body)) self.assertEqual(400, response.status_code) print(json.loads(response.data)["detail"]) - self.assertTrue( - "The URL provided causes an error with Dropbox." == json.loads(response.data)["detail"] - ) + self.assertTrue("The URL provided causes an error with Dropbox." == json.loads(response.data)["detail"]) # ✅ def test__oversized__413(self): diff --git a/tests/unit/backend/layers/business/test_business.py b/tests/unit/backend/layers/business/test_business.py index 89ae2ccccf65d..738674c72f81e 100644 --- a/tests/unit/backend/layers/business/test_business.py +++ b/tests/unit/backend/layers/business/test_business.py @@ -50,7 +50,7 @@ class BaseBusinessLogicTestCase(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.run_as_integration = True if os.environ.get('INTEGRATION_TEST', 'false').lower() == 'true' else False + cls.run_as_integration = True if os.environ.get("INTEGRATION_TEST", "false").lower() == "true" else False if cls.run_as_integration: cls.database_provider = DatabaseProvider(database_uri="postgresql://postgres:secret@localhost") cls.database_provider._drop_schema("persistence_schema") diff --git a/tests/unit/backend/layers/common/base_api_test.py b/tests/unit/backend/layers/common/base_api_test.py index f88cda8d84d9a..9b4ad0442c402 100644 --- a/tests/unit/backend/layers/common/base_api_test.py +++ b/tests/unit/backend/layers/common/base_api_test.py @@ -100,7 +100,7 @@ class NewBaseTest(BaseAuthAPITest): @classmethod def setUpClass(cls) -> None: - cls.run_as_integration = True if os.environ.get('INTEGRATION_TEST', 'false').lower() == 'true' else False + cls.run_as_integration = True if os.environ.get("INTEGRATION_TEST", "false").lower() == "true" else False if cls.run_as_integration: cls.database_provider = DatabaseProvider(database_uri="postgresql://postgres:secret@localhost") cls.database_provider._drop_schema("persistence_schema") @@ -195,7 +195,7 @@ def generate_unpublished_collection( # Public collections need to have at least one dataset! def generate_published_collection( - self, owner="test_user_id", links: List[Link] = [], add_datasets: int = 1 + self, owner="test_user_id", links: List[Link] = [], add_datasets: int = 1 ) -> CollectionVersion: unpublished_collection = self.generate_unpublished_collection(owner, links, add_datasets=add_datasets) self.business_logic.publish_collection_version(unpublished_collection.version_id) @@ -206,11 +206,11 @@ def generate_revision(self, collection_id: str) -> CollectionVersion: return self.business_logic.get_collection_version(revision.version_id) def generate_dataset( - self, - owner: str = "test_user_id", + self, + owner: str = "test_user_id", collection_version: Optional[CollectionVersion] = None, - metadata: Optional[DatasetMetadata] = None, - statuses: List[DatasetStatusUpdate] = [], + metadata: Optional[DatasetMetadata] = None, + statuses: List[DatasetStatusUpdate] = [], artifacts: List[DatasetArtifactUpdate] = [], publish: bool = False, ) -> DatasetData: @@ -219,7 +219,9 @@ def generate_dataset( """ if not collection_version: collection_version = self.generate_unpublished_collection(owner) - dataset_version_id, dataset_id = self.business_logic.ingest_dataset(collection_version.version_id, "http://fake.url", None) + dataset_version_id, dataset_id = self.business_logic.ingest_dataset( + collection_version.version_id, "http://fake.url", None + ) if not metadata: metadata = copy.deepcopy(self.sample_dataset_metadata) self.business_logic.set_dataset_metadata(dataset_version_id, metadata) @@ -234,9 +236,9 @@ def generate_dataset( self.business_logic.publish_collection_version(collection_version.version_id) explorer_url = f"http://base.url/{dataset_version_id.id}.cxg/" return DatasetData( - dataset_version_id.id, - dataset_id.id, - explorer_url, + dataset_version_id.id, + dataset_id.id, + explorer_url, collection_version.version_id.id, collection_version.collection_id.id, [a.id for a in artifact_ids], From deb7a4f8c732129734e199dea671a01dafa868e6 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 13 Dec 2022 14:02:16 -0500 Subject: [PATCH 2/3] feat: support canonical collection tombstoning Signed-off-by: nayib-jose-gloria --- backend/common/utils/http_exceptions.py | 6 + backend/layers/api/portal_api.py | 35 ++++-- backend/layers/business/business.py | 12 +- backend/layers/persistence/orm.py | 1 - backend/layers/persistence/persistence.py | 33 +++-- .../persistence/persistence_interface.py | 4 + .../layers/persistence/persistence_mock.py | 17 ++- .../backend/layers/api/test_portal_api.py | 114 ++++++++---------- .../backend/layers/business/test_business.py | 12 ++ 9 files changed, 128 insertions(+), 106 deletions(-) diff --git a/backend/common/utils/http_exceptions.py b/backend/common/utils/http_exceptions.py index 56215cfb7562a..73aeae4feebc1 100644 --- a/backend/common/utils/http_exceptions.py +++ b/backend/common/utils/http_exceptions.py @@ -59,3 +59,9 @@ def __init__(self, detail: str = "Method not allowed.", *args, **kwargs) -> None class ConflictException(ProblemException): def __init__(self, detail: str = "A duplicate resource already exists.", *args, **kwargs) -> None: super().__init__(status=requests.codes.conflict, title="Conflict", detail=detail, *args, **kwargs) + + +class GoneHTTPException(ProblemException): + def __init__(self, detail: str = "Resource has been removed", *args, **kwargs) -> None: + super().__init__(status=requests.codes.gone, title="Gone", detail=detail, *args, **kwargs) + diff --git a/backend/layers/api/portal_api.py b/backend/layers/api/portal_api.py index 3557ac401de74..af582c09e941d 100644 --- a/backend/layers/api/portal_api.py +++ b/backend/layers/api/portal_api.py @@ -6,6 +6,7 @@ from backend.common.utils.http_exceptions import ( ConflictException, + GoneHTTPException, ForbiddenHTTPException, InvalidParametersHTTPException, MethodNotAllowedException, @@ -141,7 +142,7 @@ def remove_none(self, body: dict): return {k: v for k, v in body.items() if v is not None} # Note: `metadata` can be none while the dataset is uploading - def _dataset_to_response(self, dataset: DatasetVersion): + def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned): return self.remove_none( { "assay": None @@ -186,7 +187,7 @@ def _dataset_to_response(self, dataset: DatasetVersion): "tissue": None if dataset.metadata is None else self._ontology_term_ids_to_response(dataset.metadata.tissue), - "tombstone": False, # TODO + "tombstone": is_tombstoned, "updated_at": dataset.created_at, # Legacy: datasets can't be updated anymore "x_approximate_distribution": None if dataset.metadata is None @@ -196,6 +197,7 @@ def _dataset_to_response(self, dataset: DatasetVersion): def _collection_to_response(self, collection: CollectionVersion, access_type: str): collection_id = collection.collection_id.id if collection.published_at is not None else collection.version_id.id + is_tombstoned = collection.canonical_collection.tombstoned return self.remove_none( { "access_type": access_type, @@ -204,7 +206,7 @@ def _collection_to_response(self, collection: CollectionVersion, access_type: st "created_at": collection.created_at, "curator_name": "", # TODO "data_submission_policy_version": "1.0", # TODO - "datasets": [self._dataset_to_response(ds) for ds in collection.datasets], + "datasets": [self._dataset_to_response(ds, is_tombstoned=is_tombstoned) for ds in collection.datasets], "description": collection.metadata.description, "id": collection_id, "links": [self._link_to_response(link) for link in collection.metadata.links], @@ -225,13 +227,16 @@ def get_collection_details(self, collection_id: str, token_info: dict): version = self.business_logic.get_published_collection_version(CollectionId(collection_id)) if version is None: version = self.business_logic.get_collection_version(CollectionVersionId(collection_id)) + else: + if version.canonical_collection.tombstoned: + raise GoneHTTPException() + if version is None: raise ForbiddenHTTPException() # TODO: maybe remake this exception - # TODO: handle tombstoning + user_info = UserInfo(token_info) - print(token_info, user_info.user_id, version.owner) access_type = "WRITE" if user_info.is_user_owner_or_allowed(version.owner) else "READ" response = self._collection_to_response(version, access_type) @@ -329,15 +334,25 @@ def get_collection_index(self): def delete_collection(self, collection_id: str, token_info: dict): """ - Deletes a collection version from the persistence store. + Deletes a collection version from the persistence store, or tombstones a canonical collection. """ - version = self.business_logic.get_collection_version(CollectionVersionId(collection_id)) + resource_id = CollectionVersionId(collection_id) + version = self.business_logic.get_collection_version(resource_id) if version is None: - raise ForbiddenHTTPException() + resource_id = CollectionId(collection_id) + version = self.business_logic.get_collection_version_from_canonical(resource_id) + if version is None: + raise ForbiddenHTTPException() if not UserInfo(token_info).is_user_owner_or_allowed(version.owner): raise ForbiddenHTTPException() - self.business_logic.delete_collection_version(CollectionVersionId(collection_id)) + if isinstance(resource_id, CollectionVersionId): + try: + self.business_logic.delete_collection_version(resource_id) + except CollectionIsPublishedException: + raise ForbiddenHTTPException() + elif isinstance(resource_id, CollectionId): + self.business_logic.tombstone_collection(resource_id) def update_collection(self, collection_id: str, body: dict, token_info: dict): """ @@ -523,7 +538,7 @@ def get_datasets_index(self): response = [] for dataset in self.business_logic.get_all_published_datasets(): - payload = self._dataset_to_response(dataset) + payload = self._dataset_to_response(dataset, is_tombstoned=False) enrich_dataset_with_ancestors( payload, "development_stage", ontology_mappings.development_stage_ontology_mapping ) diff --git a/backend/layers/business/business.py b/backend/layers/business/business.py index 1c4b486860f81..16004ac34631f 100644 --- a/backend/layers/business/business.py +++ b/backend/layers/business/business.py @@ -49,7 +49,7 @@ DatasetVersionId, Link, ) -from backend.layers.persistence.persistence_interface import DatabaseProviderInterface +from backend.layers.persistence.persistence_interface import DatabaseProviderInterface, PersistenceException from backend.layers.thirdparty.crossref_provider import CrossrefProviderInterface from backend.layers.thirdparty.s3_provider import S3Provider from backend.layers.thirdparty.step_function_provider import StepFunctionProviderInterface @@ -154,6 +154,8 @@ def get_collection_version_from_canonical( return published_version else: all_versions = self.get_collection_versions_from_canonical(collection_id) + if not all_versions: + return None return next(v for v in all_versions if v.published_at is None) def get_collections(self, filter: CollectionQueryFilter) -> Iterable[CollectionVersion]: @@ -423,9 +425,8 @@ def create_collection_version(self, collection_id: CollectionId) -> CollectionVe Also ensures that the collection does not have any active, unpublished version """ - try: - all_versions = self.database_provider.get_all_versions_for_collection(collection_id) - except Exception: # TODO: maybe add a RecordNotFound exception for finer grained exceptions + all_versions = self.database_provider.get_all_versions_for_collection(collection_id) + if not all_versions: raise CollectionVersionException(f"Collection {collection_id} cannot be found") if any(v for v in all_versions if v.published_at is None): @@ -437,6 +438,9 @@ def create_collection_version(self, collection_id: CollectionId) -> CollectionVe def delete_collection_version(self, version_id: CollectionVersionId) -> None: self.database_provider.delete_collection_version(version_id) + def tombstone_collection(self, collection_id: CollectionId) -> None: + self.database_provider.delete_canonical_collection(collection_id) + def publish_collection_version(self, version_id: CollectionVersionId) -> None: version = self.database_provider.get_collection_version(version_id) diff --git a/backend/layers/persistence/orm.py b/backend/layers/persistence/orm.py index 39908b1b85929..d75ba62733f43 100644 --- a/backend/layers/persistence/orm.py +++ b/backend/layers/persistence/orm.py @@ -63,7 +63,6 @@ class DatasetVersion: Column("created_at", DateTime), Column("metadata", JSON), Column("artifacts", ARRAY(UUID(as_uuid=True))), - Column("created_at", DateTime), Column("status", JSON), ) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index fa01602d1d9dd..d2c89bb06815d 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -31,6 +31,7 @@ DatasetVersion, DatasetVersionId, ) +from backend.layers.business.exceptions import CollectionIsPublishedException from backend.layers.persistence.orm import ( Collection as CollectionTable, CollectionVersion as CollectionVersionTable, @@ -38,15 +39,11 @@ DatasetArtifact as DatasetArtifactTable, DatasetVersion as DatasetVersionTable, ) -from backend.layers.persistence.persistence_interface import DatabaseProviderInterface +from backend.layers.persistence.persistence_interface import DatabaseProviderInterface, PersistenceException logger = logging.getLogger(__name__) -class PersistenceException(Exception): - pass - - class DatabaseProvider(DatabaseProviderInterface): def __init__(self, database_uri: str = None, schema_name: str = "persistence_schema") -> None: if not database_uri: @@ -160,7 +157,9 @@ def _hydrate_dataset_version(self, dataset_version: DatasetVersionTable) -> Data def get_canonical_collection(self, collection_id: CollectionId) -> CanonicalCollection: with self._manage_session() as session: - collection = session.query(CollectionTable).filter_by(id=collection_id.id).one() + collection = session.query(CollectionTable).filter_by(id=collection_id.id).one_or_none() + if collection is None: + return None return CanonicalCollection( CollectionId(str(collection.id)), None if collection.version_id is None else CollectionVersionId(str(collection.version_id)), @@ -170,18 +169,11 @@ def get_canonical_collection(self, collection_id: CollectionId) -> CanonicalColl def get_canonical_dataset(self, dataset_id: DatasetId) -> CanonicalDataset: with self._manage_session() as session: - dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one() + dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one_or_none() + if dataset is None: + return None return CanonicalDataset(dataset_id, DatasetVersionId(str(dataset.dataset_version_id)), dataset.published_at) - # from typing import TypeVar, Generic - # T = TypeVar('T') - - # @staticmethod - # def parse_id(id: Any, id_type: T) -> Optional[T]: - # if id is None: - # return None - # return type(id_type)(str(id)) - def create_canonical_collection(self, owner: str, collection_metadata: CollectionMetadata) -> CollectionVersion: """ Creates a new canonical collection, generating a canonical collection_id and a new version_id. @@ -325,8 +317,9 @@ def delete_canonical_collection(self, collection_id: CollectionId) -> None: Deletes (tombstones) a canonical collection. """ with self._manage_session() as session: - canonical_collection = session.query(CollectionTable).filter_by(id=collection_id).one() - canonical_collection.tombstoned = True + canonical_collection = session.query(CollectionTable).filter_by(id=collection_id.id).one_or_none() + if canonical_collection: + canonical_collection.tombstoned = True def save_collection_metadata( self, version_id: CollectionVersionId, collection_metadata: CollectionMetadata @@ -377,7 +370,9 @@ def delete_collection_version(self, version_id: CollectionVersionId) -> None: """ with self._manage_session() as session: version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none() - if version and version.published_at is None: + if version: + if version.published_at: + raise CollectionIsPublishedException(f'Published Collection Version {version_id} cannot be deleted') session.delete(version) def finalize_collection_version( diff --git a/backend/layers/persistence/persistence_interface.py b/backend/layers/persistence/persistence_interface.py index 8ee77882cc424..fff860e621a63 100644 --- a/backend/layers/persistence/persistence_interface.py +++ b/backend/layers/persistence/persistence_interface.py @@ -20,6 +20,10 @@ ) +class PersistenceException(Exception): + pass + + class DatabaseProviderInterface: def create_canonical_collection(self, owner: str, collection_metadata: CollectionMetadata) -> CollectionVersion: """ diff --git a/backend/layers/persistence/persistence_mock.py b/backend/layers/persistence/persistence_mock.py index 9184dd46876a5..210fa963e16b8 100644 --- a/backend/layers/persistence/persistence_mock.py +++ b/backend/layers/persistence/persistence_mock.py @@ -23,7 +23,8 @@ DatasetVersion, DatasetVersionId, ) -from backend.layers.persistence.persistence import DatabaseProviderInterface +from backend.layers.persistence.persistence import DatabaseProviderInterface, PersistenceException +from backend.layers.business.exceptions import CollectionIsPublishedException class DatabaseProviderMock(DatabaseProviderInterface): @@ -120,10 +121,14 @@ def get_all_collections_versions(self) -> Iterable[CollectionVersion]: # TODO: def get_all_mapped_collection_versions(self) -> Iterable[CollectionVersion]: # TODO: add filters if needed for version_id, collection_version in self.collections_versions.items(): if version_id in [c.version_id.id for c in self.collections.values()]: + collection_id = collection_version.collection_id.id + if self.collections[collection_id].tombstoned: + continue yield self._update_version_with_canonical(collection_version) - def delete_collection(self, collection_id: CollectionId) -> None: - del self.collections[collection_id.id] + def delete_canonical_collection(self, collection_id: CollectionId) -> None: + collection = self.collections[collection_id.id] + collection.tombstoned = True def save_collection_metadata( self, version_id: CollectionVersionId, collection_metadata: CollectionMetadata @@ -160,7 +165,9 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi return new_version_id def delete_collection_version(self, version_id: CollectionVersionId) -> None: - # Can only delete an unpublished collection + collection_version = self.collections_versions.get(version_id.id) + if collection_version.published_at: + raise CollectionIsPublishedException("Can only delete unpublished collections") del self.collections_versions[version_id.id] def get_collection_version(self, version_id: CollectionVersionId) -> CollectionVersion: @@ -174,8 +181,6 @@ def get_all_versions_for_collection(self, collection_id: CollectionId) -> Iterab for collection_version in self.collections_versions.values(): if collection_version.collection_id == collection_id: versions.append(self._update_version_with_canonical(collection_version, update_datasets=True)) - if not versions: - raise ValueError("Could not find matching collection Id") return versions def get_collection_version_with_datasets(self, version_id: CollectionVersionId) -> CollectionVersionWithDatasets: diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index 55c0e40e18f5d..db780aa6e4bac 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -691,9 +691,13 @@ def test__get_all_collections_for_index(self): """ collection = self.generate_published_collection() + collection_to_tombstone = self.generate_published_collection() private_collection = self.generate_unpublished_collection() - # TODO: a tombstoned collection should not be returned as well + tombstone_url = furl(path=f"/dp/v1/collections/{collection_to_tombstone.collection_id}") + headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} + response = self.app.delete(tombstone_url.url, headers=headers) + self.assertEqual(204, response.status_code) test_url = furl(path="/dp/v1/collections/index") headers = {"host": "localhost", "Content-Type": "application/json"} @@ -705,6 +709,8 @@ def test__get_all_collections_for_index(self): self.assertIn(collection.collection_id.id, ids) self.assertNotIn(private_collection.collection_id.id, ids) self.assertNotIn(private_collection.version_id.id, ids) + self.assertNotIn(collection_to_tombstone.collection_id.id, ids) + self.assertNotIn(collection_to_tombstone.version_id.id, ids) actual_collection = body[-1] # last added collection self.assertEqual(actual_collection["id"], collection.collection_id.id) @@ -756,7 +762,6 @@ def test__create_collection__InvalidParameters_DOI(self): self.assertIn(error, response.json["detail"]) -# 🔴 TODO: This should be reviewed. Collection deletion is a weird beast class TestCollectionDeletion(NewBaseTest): def test_delete_private_collection_version__ok(self): # Generate test collection @@ -809,84 +814,61 @@ def test_delete_collection_revision__ok(self): dataset_ids = [dataset["id"] for dataset in body["datasets"]] self.assertNotIn(dataset.dataset_version_id, dataset_ids) - @unittest.skip("finish once tombstoning logic is done") - def test_tombstone_published_collection_with_revision__ok(self): - """Both the published and revised collections are tombstoned.""" - # Generate the public collection + def test_delete_collection_version__public__403(self): collection = self.generate_published_collection() - # Generate test collection - revision = self.generate_revision(collection.collection_id) - - processing_status = DatasetStatusUpdate(status_key=DatasetStatusKey.UPLOAD, status=DatasetUploadStatus.UPLOADED) - dataset_rev = self.generate_dataset(collection_version=revision, statuses=[processing_status]) - dataset_pub = collection.datasets[0] - dataset_pub_id = dataset_pub.version_id.id + test_url = furl(path=f"/dp/v1/collections/{collection.version_id}") headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} + response = self.app.delete(test_url.url, headers=headers) + self.assertEqual(response.status_code, 403) - # Verify private collections exist - test_private_url = furl( - path=f"/dp/v1/collections/{revision.version_id}", query_params=dict(visibility="PRIVATE") - ) - response = self.app.get(test_private_url.url, headers=headers) - self.assertEqual(200, response.status_code) - body = json.loads(response.data) - dataset_ids = [dataset["id"] for dataset in body["datasets"]] - self.assertIn(dataset_rev.dataset_version_id, dataset_ids) - - # Verify public collections exist + def test_delete_published_collection__ok(self): + """Published collections are tombstoned.""" + # Generate the public collection + collection = self.generate_published_collection() + headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} test_public_url = furl( path=f"/dp/v1/collections/{collection.collection_id}", query_params=dict(visibility="PUBLIC") ) - response = self.app.get(test_public_url.url, headers=headers) - self.assertEqual(200, response.status_code) - body = json.loads(response.data) - dataset_ids = [dataset["id"] for dataset in body["datasets"]] - self.assertIn(dataset_pub_id, dataset_ids) - - # TODO: TEST tombstoned logic - - # delete public collection - # response = self.app.delete(test_public_url.url, headers=headers) - # self.assertEqual(response.status_code, 204) - - # check collection revision and datasets are gone - # response = self.app.get(test_private_url.url, headers=headers) - # self.assertEqual(response.status_code, 403) + # tombstone public collection + response = self.app.delete(test_public_url.url, headers=headers) + self.assertEqual(response.status_code, 204) - # check public collection is tombstoned and datasets deleted. - # response = self.app.get(test_public_url.url, headers=headers) - # self.assertEqual(response.status_code, 410) + # check collection is gone + response = self.app.get(test_public_url.url, headers=headers) + self.assertEqual(response.status_code, 410) - @unittest.skip("finish once tombstoning logic is done") - def test_delete_collection__already_tombstoned__ok(self): - # Generate the public collection - collection = self.generate_published_collection() - # Generate test collection - revision = self.generate_revision(collection.collection_id) + # check collection version details--datasets are tombstoned + test_version_url = furl( + path=f"/dp/v1/collections/{collection.version_id}", query_params=dict(visibility="PUBLIC") + ) + response = self.app.get(test_version_url.url, headers=headers) + body = json.loads(response.data) + datasets_tombstoned = [dataset['tombstone'] for dataset in body['datasets']] + self.assertTrue(all(datasets_tombstoned)) - test_url = furl(path=f"/dp/v1/collections/{collection.collection_id}", query_params=dict(visibility="PRIVATE")) - headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} - response = self.app.delete(test_url.url, headers=headers) - self.assertEqual(response.status_code, 403) + # check that tombstoned collection doesn't appear in collections list endpoint + test_list_url = furl(path="/dp/v1/collections") + response = self.app.get(test_list_url.url, headers=headers) + body = json.loads(response.data) + collection_ids = [collection.id for collection in body['collections']] + self.assertNotIn(collection.collection_id, collection_ids) + self.assertNotIn(collection.version_id, collection_ids) - @unittest.skip("finish once tombstoning logic is done") - def test_delete_collection__public__ok(self): - collection = self.generate_published_collection() + def test_delete_collection_version__already_deleted__403(self): + collection = self.generate_unpublished_collection() - test_urls = [ - furl(path=f"/dp/v1/collections/{collection.collection_id}"), - ] + # delete private collection + test_private_url = furl( + path=f"/dp/v1/collections/{collection.version_id}", query_params=dict(visibility="PRIVATE") + ) headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} - for test_url in test_urls: - with self.subTest(test_url.url): - response = self.app.delete(test_url.url, headers=headers) - self.assertEqual(response.status_code, 204) + response = self.app.delete(test_private_url.url, headers=headers) + self.assertEqual(response.status_code, 204) - response = self.app.get(test_url.url, headers=headers) - body = json.loads(response.data) - self.assertEqual(response.status_code, 410) - self.assertEqual(body, "") + # check that DELETE on an already deleted collection is forbidden + response = self.app.delete(test_private_url.url, headers=headers) + self.assertEqual(response.status_code, 403) def test_delete_collection__not_owner(self): collection = self.generate_unpublished_collection(owner="not_test_user_id") diff --git a/tests/unit/backend/layers/business/test_business.py b/tests/unit/backend/layers/business/test_business.py index 738674c72f81e..29c702be722c3 100644 --- a/tests/unit/backend/layers/business/test_business.py +++ b/tests/unit/backend/layers/business/test_business.py @@ -16,6 +16,7 @@ CollectionPublishException, CollectionUpdateException, CollectionVersionException, + CollectionNotFoundException, DatasetIngestException, DatasetNotFoundException, ) @@ -842,6 +843,17 @@ def test_delete_collection_version_ok(self): self.assertEqual(version.version_id, published_collection.version_id) self.assertNotEqual(version.version_id, new_version.version_id) + def test_tombstone_collection_ok(self): + """ + A collection can be marked as tombstoned using 'tombstone_collection' + """ + published_collection = self.initialize_published_collection() + self.business_logic.tombstone_collection(published_collection.collection_id) + + # The collection version canonical collection has tombstoned marked as True + collection_version = self.business_logic.get_collection_version(published_collection.version_id) + self.assertTrue(collection_version.canonical_collection.tombstoned) + def test_publish_version_fails_on_published_collection(self): """ `publish_collection_version` should fail if called on a collection version that is already published. From f9923332f31c33cf5aae7ea36dc9465de632e754 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 13 Dec 2022 16:39:41 -0500 Subject: [PATCH 3/3] linting + pr feedback Signed-off-by: nayib-jose-gloria --- backend/common/utils/http_exceptions.py | 1 - backend/layers/api/portal_api.py | 14 ++++++-------- backend/layers/persistence/persistence.py | 2 +- backend/layers/persistence/persistence_mock.py | 2 +- tests/unit/backend/layers/api/test_portal_api.py | 11 ++++++----- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/backend/common/utils/http_exceptions.py b/backend/common/utils/http_exceptions.py index 73aeae4feebc1..4c0bf02d3026c 100644 --- a/backend/common/utils/http_exceptions.py +++ b/backend/common/utils/http_exceptions.py @@ -64,4 +64,3 @@ def __init__(self, detail: str = "A duplicate resource already exists.", *args, class GoneHTTPException(ProblemException): def __init__(self, detail: str = "Resource has been removed", *args, **kwargs) -> None: super().__init__(status=requests.codes.gone, title="Gone", detail=detail, *args, **kwargs) - diff --git a/backend/layers/api/portal_api.py b/backend/layers/api/portal_api.py index af582c09e941d..0c2960f403e18 100644 --- a/backend/layers/api/portal_api.py +++ b/backend/layers/api/portal_api.py @@ -142,7 +142,7 @@ def remove_none(self, body: dict): return {k: v for k, v in body.items() if v is not None} # Note: `metadata` can be none while the dataset is uploading - def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned): + def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned: bool): return self.remove_none( { "assay": None @@ -227,14 +227,11 @@ def get_collection_details(self, collection_id: str, token_info: dict): version = self.business_logic.get_published_collection_version(CollectionId(collection_id)) if version is None: version = self.business_logic.get_collection_version(CollectionVersionId(collection_id)) - else: - if version.canonical_collection.tombstoned: - raise GoneHTTPException() - - if version is None: - raise ForbiddenHTTPException() # TODO: maybe remake this exception - + if version is None: + raise ForbiddenHTTPException() + if version.canonical_collection.tombstoned: + raise GoneHTTPException() user_info = UserInfo(token_info) access_type = "WRITE" if user_info.is_user_owner_or_allowed(version.owner) else "READ" @@ -343,6 +340,7 @@ def delete_collection(self, collection_id: str, token_info: dict): version = self.business_logic.get_collection_version_from_canonical(resource_id) if version is None: raise ForbiddenHTTPException() + if not UserInfo(token_info).is_user_owner_or_allowed(version.owner): raise ForbiddenHTTPException() diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index d2c89bb06815d..ff4cfbb59e206 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -372,7 +372,7 @@ def delete_collection_version(self, version_id: CollectionVersionId) -> None: version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none() if version: if version.published_at: - raise CollectionIsPublishedException(f'Published Collection Version {version_id} cannot be deleted') + raise CollectionIsPublishedException(f"Published Collection Version {version_id} cannot be deleted") session.delete(version) def finalize_collection_version( diff --git a/backend/layers/persistence/persistence_mock.py b/backend/layers/persistence/persistence_mock.py index 210fa963e16b8..79f006c19dc53 100644 --- a/backend/layers/persistence/persistence_mock.py +++ b/backend/layers/persistence/persistence_mock.py @@ -166,7 +166,7 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi def delete_collection_version(self, version_id: CollectionVersionId) -> None: collection_version = self.collections_versions.get(version_id.id) - if collection_version.published_at: + if collection_version.published_at is not None: raise CollectionIsPublishedException("Can only delete unpublished collections") del self.collections_versions[version_id.id] diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index db780aa6e4bac..92e283f49867e 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -838,20 +838,21 @@ def test_delete_published_collection__ok(self): response = self.app.get(test_public_url.url, headers=headers) self.assertEqual(response.status_code, 410) - # check collection version details--datasets are tombstoned + # check collection version also returns 'gone' test_version_url = furl( path=f"/dp/v1/collections/{collection.version_id}", query_params=dict(visibility="PUBLIC") ) response = self.app.get(test_version_url.url, headers=headers) - body = json.loads(response.data) - datasets_tombstoned = [dataset['tombstone'] for dataset in body['datasets']] - self.assertTrue(all(datasets_tombstoned)) + self.assertEqual(response.status_code, 410) + # body = json.loads(response.data) + # datasets_tombstoned = [dataset["tombstone"] for dataset in body["datasets"]] + # self.assertTrue(all(datasets_tombstoned)) # check that tombstoned collection doesn't appear in collections list endpoint test_list_url = furl(path="/dp/v1/collections") response = self.app.get(test_list_url.url, headers=headers) body = json.loads(response.data) - collection_ids = [collection.id for collection in body['collections']] + collection_ids = [collection.id for collection in body["collections"]] self.assertNotIn(collection.collection_id, collection_ids) self.assertNotIn(collection.version_id, collection_ids)