Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add canonical collection tombstoning to portal redesign #3740

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/common/utils/http_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,9 @@ def __init__(self, detail: str = "Method not allowed.", *args, **kwargs) -> None
class ConflictException(ProblemException):
def __init__(self, detail: str = "A duplicate resource already exists.", *args, **kwargs) -> None:
super().__init__(status=requests.codes.conflict, title="Conflict", detail=detail, *args, **kwargs)


class GoneHTTPException(ProblemException):
def __init__(self, detail: str = "Resource has been removed", *args, **kwargs) -> None:
super().__init__(status=requests.codes.gone, title="Gone", detail=detail, *args, **kwargs)

37 changes: 26 additions & 11 deletions backend/layers/api/portal_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from backend.common.utils.http_exceptions import (
ConflictException,
GoneHTTPException,
ForbiddenHTTPException,
InvalidParametersHTTPException,
MethodNotAllowedException,
Expand Down Expand Up @@ -141,7 +142,7 @@ def remove_none(self, body: dict):
return {k: v for k, v in body.items() if v is not None}

# Note: `metadata` can be none while the dataset is uploading
def _dataset_to_response(self, dataset: DatasetVersion):
def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned):
def _dataset_to_response(self, dataset: DatasetVersion, is_tombstoned: bool):

return self.remove_none(
{
"assay": None
Expand Down Expand Up @@ -186,7 +187,7 @@ def _dataset_to_response(self, dataset: DatasetVersion):
"tissue": None
if dataset.metadata is None
else self._ontology_term_ids_to_response(dataset.metadata.tissue),
"tombstone": False, # TODO
"tombstone": is_tombstoned,
"updated_at": dataset.created_at, # Legacy: datasets can't be updated anymore
"x_approximate_distribution": None
if dataset.metadata is None
Expand All @@ -196,6 +197,7 @@ def _dataset_to_response(self, dataset: DatasetVersion):

def _collection_to_response(self, collection: CollectionVersion, access_type: str):
collection_id = collection.collection_id.id if collection.published_at is not None else collection.version_id.id
is_tombstoned = collection.canonical_collection.tombstoned
return self.remove_none(
{
"access_type": access_type,
Expand All @@ -204,7 +206,7 @@ def _collection_to_response(self, collection: CollectionVersion, access_type: st
"created_at": collection.created_at,
"curator_name": "", # TODO
"data_submission_policy_version": "1.0", # TODO
"datasets": [self._dataset_to_response(ds) for ds in collection.datasets],
"datasets": [self._dataset_to_response(ds, is_tombstoned=is_tombstoned) for ds in collection.datasets],
"description": collection.metadata.description,
"id": collection_id,
"links": [self._link_to_response(link) for link in collection.metadata.links],
Expand All @@ -225,13 +227,16 @@ def get_collection_details(self, collection_id: str, token_info: dict):
version = self.business_logic.get_published_collection_version(CollectionId(collection_id))
if version is None:
version = self.business_logic.get_collection_version(CollectionVersionId(collection_id))
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: else is probably not needed here

if version.canonical_collection.tombstoned:
raise GoneHTTPException()

if version is None:
raise ForbiddenHTTPException() # TODO: maybe remake this exception

# TODO: handle tombstoning


user_info = UserInfo(token_info)
print(token_info, user_info.user_id, version.owner)
access_type = "WRITE" if user_info.is_user_owner_or_allowed(version.owner) else "READ"

response = self._collection_to_response(version, access_type)
Expand Down Expand Up @@ -329,15 +334,25 @@ def get_collection_index(self):

def delete_collection(self, collection_id: str, token_info: dict):
"""
Deletes a collection version from the persistence store.
Deletes a collection version from the persistence store, or tombstones a canonical collection.
"""
version = self.business_logic.get_collection_version(CollectionVersionId(collection_id))
resource_id = CollectionVersionId(collection_id)
version = self.business_logic.get_collection_version(resource_id)
if version is None:
raise ForbiddenHTTPException()
resource_id = CollectionId(collection_id)
version = self.business_logic.get_collection_version_from_canonical(resource_id)
if version is None:
raise ForbiddenHTTPException()
if not UserInfo(token_info).is_user_owner_or_allowed(version.owner):
raise ForbiddenHTTPException()

self.business_logic.delete_collection_version(CollectionVersionId(collection_id))
if isinstance(resource_id, CollectionVersionId):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure about using isinstance in this context. That said, I can't come up with a better solution myself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can replace with a flag if you prefer.

Or we can rework the underlying business logic method to fetch the collection/collection version before deleting and handle the logic there. The only issue is we'd be introducing a 2nd set of lookups, since we need the lookup here to get the version owner for the auth logic handling

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's leave it as is

try:
self.business_logic.delete_collection_version(resource_id)
except CollectionIsPublishedException:
raise ForbiddenHTTPException()
elif isinstance(resource_id, CollectionId):
self.business_logic.tombstone_collection(resource_id)

def update_collection(self, collection_id: str, body: dict, token_info: dict):
"""
Expand Down Expand Up @@ -523,7 +538,7 @@ def get_datasets_index(self):

response = []
for dataset in self.business_logic.get_all_published_datasets():
payload = self._dataset_to_response(dataset)
payload = self._dataset_to_response(dataset, is_tombstoned=False)
enrich_dataset_with_ancestors(
payload, "development_stage", ontology_mappings.development_stage_ontology_mapping
)
Expand Down Expand Up @@ -555,7 +570,7 @@ def get_dataset_identifiers(self, url: str):
"""
try:
path = urlparse(url).path
id = [segment for segment in path.split("/") if segment][-1].strip(".cxg")
id = [segment for segment in path.split("/") if segment][-1].removesuffix(".cxg")
except Exception:
raise ServerErrorHTTPException("Cannot parse URL")

Expand Down
14 changes: 9 additions & 5 deletions backend/layers/business/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
DatasetValidationStatus,
DatasetVersion,
DatasetVersionId,
Link
Link,
)
from backend.layers.persistence.persistence_interface import DatabaseProviderInterface
from backend.layers.persistence.persistence_interface import DatabaseProviderInterface, PersistenceException
from backend.layers.thirdparty.crossref_provider import CrossrefProviderInterface
from backend.layers.thirdparty.s3_provider import S3Provider
from backend.layers.thirdparty.step_function_provider import StepFunctionProviderInterface
Expand Down Expand Up @@ -154,6 +154,8 @@ def get_collection_version_from_canonical(
return published_version
else:
all_versions = self.get_collection_versions_from_canonical(collection_id)
if not all_versions:
return None
return next(v for v in all_versions if v.published_at is None)

def get_collections(self, filter: CollectionQueryFilter) -> Iterable[CollectionVersion]:
Expand Down Expand Up @@ -423,9 +425,8 @@ def create_collection_version(self, collection_id: CollectionId) -> CollectionVe
Also ensures that the collection does not have any active, unpublished version
"""

try:
all_versions = self.database_provider.get_all_versions_for_collection(collection_id)
except Exception: # TODO: maybe add a RecordNotFound exception for finer grained exceptions
all_versions = self.database_provider.get_all_versions_for_collection(collection_id)
if not all_versions:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

raise CollectionVersionException(f"Collection {collection_id} cannot be found")

if any(v for v in all_versions if v.published_at is None):
Expand All @@ -437,6 +438,9 @@ def create_collection_version(self, collection_id: CollectionId) -> CollectionVe
def delete_collection_version(self, version_id: CollectionVersionId) -> None:
self.database_provider.delete_collection_version(version_id)

def tombstone_collection(self, collection_id: CollectionId) -> None:
self.database_provider.delete_canonical_collection(collection_id)

def publish_collection_version(self, version_id: CollectionVersionId) -> None:
version = self.database_provider.get_collection_version(version_id)

Expand Down
1 change: 0 additions & 1 deletion backend/layers/persistence/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ class DatasetVersion:
Column("created_at", DateTime),
Column("metadata", JSON),
Column("artifacts", ARRAY(UUID(as_uuid=True))),
Column("created_at", DateTime),
Column("status", JSON),
)

Expand Down
37 changes: 17 additions & 20 deletions backend/layers/persistence/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,19 @@
DatasetVersion,
DatasetVersionId,
)
from backend.layers.business.exceptions import CollectionIsPublishedException
from backend.layers.persistence.orm import (
Collection as CollectionTable,
CollectionVersion as CollectionVersionTable,
Dataset as DatasetTable,
DatasetArtifact as DatasetArtifactTable,
DatasetVersion as DatasetVersionTable,
)
from backend.layers.persistence.persistence_interface import DatabaseProviderInterface
from backend.layers.persistence.persistence_interface import DatabaseProviderInterface, PersistenceException

logger = logging.getLogger(__name__)


class PersistenceException(Exception):
pass


class DatabaseProvider(DatabaseProviderInterface):
def __init__(self, database_uri: str = None, schema_name: str = "persistence_schema") -> None:
if not database_uri:
Expand Down Expand Up @@ -160,7 +157,9 @@ def _hydrate_dataset_version(self, dataset_version: DatasetVersionTable) -> Data

def get_canonical_collection(self, collection_id: CollectionId) -> CanonicalCollection:
with self._manage_session() as session:
collection = session.query(CollectionTable).filter_by(id=collection_id.id).one()
collection = session.query(CollectionTable).filter_by(id=collection_id.id).one_or_none()
if collection is None:
return None
return CanonicalCollection(
CollectionId(str(collection.id)),
None if collection.version_id is None else CollectionVersionId(str(collection.version_id)),
Expand All @@ -170,18 +169,11 @@ def get_canonical_collection(self, collection_id: CollectionId) -> CanonicalColl

def get_canonical_dataset(self, dataset_id: DatasetId) -> CanonicalDataset:
with self._manage_session() as session:
dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one()
dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one_or_none()
if dataset is None:
return None
return CanonicalDataset(dataset_id, DatasetVersionId(str(dataset.dataset_version_id)), dataset.published_at)

# from typing import TypeVar, Generic
# T = TypeVar('T')

# @staticmethod
# def parse_id(id: Any, id_type: T) -> Optional[T]:
# if id is None:
# return None
# return type(id_type)(str(id))

def create_canonical_collection(self, owner: str, collection_metadata: CollectionMetadata) -> CollectionVersion:
"""
Creates a new canonical collection, generating a canonical collection_id and a new version_id.
Expand Down Expand Up @@ -325,8 +317,9 @@ def delete_canonical_collection(self, collection_id: CollectionId) -> None:
Deletes (tombstones) a canonical collection.
"""
with self._manage_session() as session:
canonical_collection = session.query(CollectionTable).filter_by(id=collection_id).one()
canonical_collection.tombstoned = True
canonical_collection = session.query(CollectionTable).filter_by(id=collection_id.id).one_or_none()
if canonical_collection:
canonical_collection.tombstoned = True

def save_collection_metadata(
self, version_id: CollectionVersionId, collection_metadata: CollectionMetadata
Expand Down Expand Up @@ -377,7 +370,9 @@ def delete_collection_version(self, version_id: CollectionVersionId) -> None:
"""
with self._manage_session() as session:
version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none()
if version and version.published_at is None:
if version:
if version.published_at:
raise CollectionIsPublishedException(f'Published Collection Version {version_id} cannot be deleted')
session.delete(version)

def finalize_collection_version(
Expand Down Expand Up @@ -440,7 +435,9 @@ def get_dataset_version(self, dataset_version_id: DatasetVersionId) -> DatasetVe
Returns a dataset version by id.
"""
with self._manage_session() as session:
dataset_version = session.query(DatasetVersionTable).filter_by(version_id=dataset_version_id.id).one_or_none()
dataset_version = (
session.query(DatasetVersionTable).filter_by(version_id=dataset_version_id.id).one_or_none()
)
if dataset_version is None:
return None
return self._hydrate_dataset_version(dataset_version)
Expand Down
4 changes: 4 additions & 0 deletions backend/layers/persistence/persistence_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
)


class PersistenceException(Exception):
pass


class DatabaseProviderInterface:
def create_canonical_collection(self, owner: str, collection_metadata: CollectionMetadata) -> CollectionVersion:
"""
Expand Down
22 changes: 14 additions & 8 deletions backend/layers/persistence/persistence_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
DatasetVersion,
DatasetVersionId,
)
from backend.layers.persistence.persistence import DatabaseProviderInterface
from backend.layers.persistence.persistence import DatabaseProviderInterface, PersistenceException
from backend.layers.business.exceptions import CollectionIsPublishedException


class DatabaseProviderMock(DatabaseProviderInterface):
Expand Down Expand Up @@ -82,8 +83,9 @@ def create_canonical_collection(self, owner: str, collection_metadata: Collectio
# Don't set mappings here - those will be set when publishing the collection!
return copy.deepcopy(version)

def _update_version_with_canonical(self, version: Union[CollectionVersion, CollectionVersionWithDatasets],
update_datasets: bool = False):
def _update_version_with_canonical(
self, version: Union[CollectionVersion, CollectionVersionWithDatasets], update_datasets: bool = False
):
"""
Private method that returns a version updated with the canonical collection.
This is equivalent to a database double lookup (or join).
Expand Down Expand Up @@ -119,10 +121,14 @@ def get_all_collections_versions(self) -> Iterable[CollectionVersion]: # TODO:
def get_all_mapped_collection_versions(self) -> Iterable[CollectionVersion]: # TODO: add filters if needed
for version_id, collection_version in self.collections_versions.items():
if version_id in [c.version_id.id for c in self.collections.values()]:
collection_id = collection_version.collection_id.id
if self.collections[collection_id].tombstoned:
continue
yield self._update_version_with_canonical(collection_version)

def delete_collection(self, collection_id: CollectionId) -> None:
del self.collections[collection_id.id]
def delete_canonical_collection(self, collection_id: CollectionId) -> None:
collection = self.collections[collection_id.id]
collection.tombstoned = True

def save_collection_metadata(
self, version_id: CollectionVersionId, collection_metadata: CollectionMetadata
Expand Down Expand Up @@ -159,7 +165,9 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi
return new_version_id

def delete_collection_version(self, version_id: CollectionVersionId) -> None:
# Can only delete an unpublished collection
collection_version = self.collections_versions.get(version_id.id)
if collection_version.published_at:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I generally prefer an explicit is not None. Even if this is a super edge case, a collection published in January 1st, 1970 at midnight would result as non published 😄

raise CollectionIsPublishedException("Can only delete unpublished collections")
del self.collections_versions[version_id.id]

def get_collection_version(self, version_id: CollectionVersionId) -> CollectionVersion:
Expand All @@ -173,8 +181,6 @@ def get_all_versions_for_collection(self, collection_id: CollectionId) -> Iterab
for collection_version in self.collections_versions.values():
if collection_version.collection_id == collection_id:
versions.append(self._update_version_with_canonical(collection_version, update_datasets=True))
if not versions:
raise ValueError("Could not find matching collection Id")
return versions

def get_collection_version_with_datasets(self, version_id: CollectionVersionId) -> CollectionVersionWithDatasets:
Expand Down
7 changes: 6 additions & 1 deletion backend/layers/processing/process_cxg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/env python3
from backend.layers.business.business_interface import BusinessLogicInterface
from backend.layers.common.entities import DatasetArtifactType, DatasetConversionStatus, DatasetStatusKey, DatasetVersionId
from backend.layers.common.entities import (
DatasetArtifactType,
DatasetConversionStatus,
DatasetStatusKey,
DatasetVersionId,
)
from backend.layers.processing.process_logic import ProcessingLogic
from backend.layers.thirdparty.s3_provider import S3ProviderInterface
from backend.layers.thirdparty.uri_provider import UriProviderInterface
Expand Down
11 changes: 9 additions & 2 deletions backend/layers/processing/process_download_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,15 @@ def process(self, dataset_id: DatasetVersionId, dropbox_url: str, artifact_bucke

bucket_prefix = self.get_bucket_prefix(dataset_id.id)
# Upload the original dataset to the artifact bucket
self.create_artifact(local_filename, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD)
self.create_artifact(
local_filename, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD
)
# Upload the labeled dataset to the artifact bucket
self.create_artifact(
file_with_labels, DatasetArtifactType.H5AD, bucket_prefix, dataset_id, artifact_bucket, DatasetStatusKey.H5AD
file_with_labels,
DatasetArtifactType.H5AD,
bucket_prefix,
dataset_id,
artifact_bucket,
DatasetStatusKey.H5AD,
)
2 changes: 1 addition & 1 deletion backend/layers/processing/process_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def create_artifact(
self,
file_name: str,
artifact_type: str,
bucket_prefix: str,
bucket_prefix: str,
dataset_id: DatasetVersionId,
artifact_bucket: str,
processing_status_key: DatasetStatusKey,
Expand Down
7 changes: 6 additions & 1 deletion backend/layers/processing/process_seurat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
import subprocess

from backend.layers.business.business_interface import BusinessLogicInterface
from backend.layers.common.entities import DatasetArtifactType, DatasetConversionStatus, DatasetStatusKey, DatasetVersionId
from backend.layers.common.entities import (
DatasetArtifactType,
DatasetConversionStatus,
DatasetStatusKey,
DatasetVersionId,
)
from backend.layers.processing.process_logic import ProcessingLogic
from backend.layers.thirdparty.s3_provider import S3ProviderInterface
from backend.layers.thirdparty.uri_provider import UriProviderInterface
Expand Down
Loading