From e3303dd8fffb13e1362167a68a0f855730f7c0c3 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:25:58 -0500 Subject: [PATCH 01/22] refactor: persistence layer clean-up Signed-off-by: nayib-jose-gloria --- DEV_ENV.md | 11 +- backend/layers/common/entities.py | 39 ++--- backend/layers/persistence/orm.py | 96 +++++------ backend/layers/persistence/persistence.py | 150 ++++++++---------- .../layers/persistence/persistence_mock.py | 20 +-- scripts/cxg_admin_scripts/migrate.py | 26 ++- .../backend/layers/api/test_portal_api.py | 16 +- .../backend/layers/business/test_business.py | 2 +- tests/unit/backend/layers/common/base_test.py | 3 - 9 files changed, 156 insertions(+), 207 deletions(-) diff --git a/DEV_ENV.md b/DEV_ENV.md index e5e7fbc6966ad..4a41422023e72 100644 --- a/DEV_ENV.md +++ b/DEV_ENV.md @@ -51,11 +51,12 @@ The dev environment is initialized with AWS Secrets/S3 data in the [scripts/setu ### Make targets for running tests in dev -| Command | Description | Notes | -| ---------------------------- | --------------------------------------------- | ----- | -| `make local-unit-test` | Run backend tests in the dev environment | | -| `make local-functional-test` | Run functional tests in the dev environment | | -| `make local-smoke-test` | Run frontend/e2e tests in the dev environment | | +| Command | Description | Notes | +|------------------------------|----------------------------------------------------------------------------------------------| ----- | +| `make local-unit-test` | Run backend tests in the local dockerized environment, against mock of persistence layer | | +| `make local-integration-test` | Run backend tests in the local dockerized environment, against dockerized database instance. | | +| `make local-functional-test` | Run functional tests in local dockerized environment | | +| `make local-smoke-test` | Run frontend/e2e tests in the local dockerized environment | | ### External dependencies diff --git a/backend/layers/common/entities.py b/backend/layers/common/entities.py index 86ff2a531cc48..bdd18f167842d 100644 --- a/backend/layers/common/entities.py +++ b/backend/layers/common/entities.py @@ -3,6 +3,8 @@ from enum import Enum from typing import List, Optional +import uuid + from dataclasses_json import dataclass_json @@ -98,43 +100,34 @@ def empty(): @dataclass -class CollectionId: +class EntityId: id: str - def __repr__(self) -> str: - return self.id - - -@dataclass -class CollectionVersionId: - id: str + def __init__(self, entity_id: str = None): + self.id = str(entity_id) if entity_id is not None else str(uuid.uuid4()) def __repr__(self) -> str: return self.id -@dataclass -class DatasetId: - id: str +class CollectionId(EntityId): + pass - def __repr__(self) -> str: - return self.id +class CollectionVersionId(EntityId): + pass -@dataclass -class DatasetVersionId: - id: str - def __repr__(self) -> str: - return self.id +class DatasetId(EntityId): + pass -@dataclass -class DatasetArtifactId: - id: str +class DatasetVersionId(EntityId): + pass - def __repr__(self) -> str: - return self.id + +class DatasetArtifactId(EntityId): + pass @dataclass diff --git a/backend/layers/persistence/orm.py b/backend/layers/persistence/orm.py index b8a997efd31c9..c6867a97e7ae6 100644 --- a/backend/layers/persistence/orm.py +++ b/backend/layers/persistence/orm.py @@ -10,71 +10,61 @@ @mapper_registry.mapped -class Collection: +class CollectionTable: - __table__ = Table( - "Collection", - mapper_registry.metadata, - Column("id", UUID(as_uuid=True), primary_key=True), - Column("version_id", UUID(as_uuid=True)), - Column("originally_published_at", DateTime), - Column("tombstoned", BOOLEAN), - ) + __tablename__ = "Collection" + + id = Column(UUID(as_uuid=True), primary_key=True) + version_id = Column(UUID(as_uuid=True)) + originally_published_at = Column(DateTime) + tombstone = Column(BOOLEAN) @mapper_registry.mapped -class CollectionVersion: - - __table__ = Table( - "CollectionVersion", - mapper_registry.metadata, - Column("version_id", UUID(as_uuid=True), primary_key=True), - Column("collection_id", UUID(as_uuid=True)), - Column("metadata", JSON), - Column("owner", String), - Column("curator_name", String), - Column("publisher_metadata", JSON), - Column("published_at", DateTime), - Column("created_at", DateTime), - Column("datasets", ARRAY(UUID(as_uuid=True))), - ) +class CollectionVersionTable: + + __tablename__ = "CollectionVersion" + + id = Column(UUID(as_uuid=True), primary_key=True) + collection_id = Column(UUID(as_uuid=True)) + collection_metadata = Column(JSON) + owner = Column(String) + curator_name = Column(String) + publisher_metadata = Column(JSON) + published_at = Column(DateTime) + created_at = Column(DateTime) + datasets = Column(ARRAY(UUID(as_uuid=True))) @mapper_registry.mapped -class Dataset: +class DatasetTable: + + __tablename__ = "Dataset" - __table__ = Table( - "Dataset", - mapper_registry.metadata, - Column("dataset_id", UUID(as_uuid=True), primary_key=True), - Column("dataset_version_id", UUID(as_uuid=True)), - Column("published_at", DateTime), - ) + id = Column(UUID(as_uuid=True), primary_key=True) + version_id = Column(UUID(as_uuid=True)) + published_at = Column(DateTime) @mapper_registry.mapped -class DatasetVersion: +class DatasetVersionTable: - __table__ = Table( - "DatasetVersion", - mapper_registry.metadata, - Column("version_id", UUID(as_uuid=True), primary_key=True), - Column("dataset_id", UUID(as_uuid=True), ForeignKey("Dataset.dataset_id")), - Column("collection_id", UUID(as_uuid=True)), - Column("created_at", DateTime), - Column("metadata", JSON), - Column("artifacts", ARRAY(UUID(as_uuid=True))), - Column("status", JSON), - ) + __tablename__ = "DatasetVersion" + + id = Column(UUID(as_uuid=True), primary_key=True) + dataset_id = Column(UUID(as_uuid=True), ForeignKey("Dataset.id")) + collection_id = Column(UUID(as_uuid=True)) + created_at = Column(DateTime) + dataset_metadata = Column(JSON) + artifacts = Column(ARRAY(UUID(as_uuid=True))) + status = Column(JSON) @mapper_registry.mapped -class DatasetArtifact: - - __table__ = Table( - "DatasetArtifact", - mapper_registry.metadata, - Column("id", UUID(as_uuid=True), primary_key=True), - Column("type", Enum(DatasetArtifactType)), - Column("uri", String), - ) +class DatasetArtifactTable: + + __tablename__ = "DatasetArtifact" + + id = Column(UUID(as_uuid=True), primary_key=True) + type = Column(Enum(DatasetArtifactType)) + uri = Column(String) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 30fdd0030c221..cb40d18e7cbaf 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -33,11 +33,11 @@ ) from backend.layers.business.exceptions import CollectionIsPublishedException from backend.layers.persistence.orm import ( - Collection as CollectionTable, - CollectionVersion as CollectionVersionTable, - Dataset as DatasetTable, - DatasetArtifact as DatasetArtifactTable, - DatasetVersion as DatasetVersionTable, + CollectionTable, + CollectionVersionTable, + DatasetTable, + DatasetArtifactTable, + DatasetVersionTable, ) from backend.layers.persistence.persistence_interface import DatabaseProviderInterface, PersistenceException @@ -88,17 +88,13 @@ def _manage_session(self, **kwargs): if session is not None: session.close() - @staticmethod - def _generate_id(): - return str(uuid.uuid4()) - def _row_to_collection_version(self, row: Any, canonical_collection: CanonicalCollection) -> CollectionVersion: return CollectionVersion( collection_id=CollectionId(str(row.collection_id)), version_id=CollectionVersionId(str(row.version_id)), owner=row.owner, curator_name=row.curator_name, - metadata=CollectionMetadata.from_json(row.metadata), + metadata=CollectionMetadata.from_json(row.collection_metadata), publisher_metadata=None if row.publisher_metadata is None else json.loads(row.publisher_metadata), datasets=[DatasetVersionId(str(id)) for id in row.datasets], published_at=row.published_at, @@ -114,7 +110,7 @@ def _row_to_collection_version_with_datasets( version_id=CollectionVersionId(str(row.version_id)), owner=row.owner, curator_name=row.curator_name, - metadata=CollectionMetadata.from_json(row.metadata), + metadata=CollectionMetadata.from_json(row.collection_metadata), publisher_metadata=None if row.publisher_metadata is None else json.loads(row.publisher_metadata), datasets=datasets, published_at=row.published_at, @@ -134,10 +130,10 @@ def _row_to_dataset_version(self, row: Any, canonical_dataset: CanonicalDataset, status = row.status else: status = DatasetStatus.from_json(row.status) - if type(row.metadata) is DatasetMetadata or row.metadata is None: - metadata = row.metadata + if type(row.dataset_metadata) is DatasetMetadata or row.dataset_metadata is None: + metadata = row.dataset_metadata else: - metadata = DatasetMetadata.from_json(row.metadata) + metadata = DatasetMetadata.from_json(row.dataset_metadata) return DatasetVersion( DatasetId(str(row.dataset_id)), DatasetVersionId(str(row.version_id)), @@ -166,12 +162,12 @@ def get_canonical_collection(self, collection_id: CollectionId) -> CanonicalColl CollectionId(str(collection.id)), None if collection.version_id is None else CollectionVersionId(str(collection.version_id)), collection.originally_published_at, - collection.tombstoned, + collection.tombstone, ) def get_canonical_dataset(self, dataset_id: DatasetId) -> CanonicalDataset: with self._manage_session() as session: - dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one_or_none() + dataset = session.query(DatasetTable).filter_by(id=dataset_id.id).one_or_none() if dataset is None: return None return CanonicalDataset(dataset_id, DatasetVersionId(str(dataset.dataset_version_id)), dataset.published_at) @@ -183,13 +179,13 @@ def create_canonical_collection( Creates a new canonical collection, generating a canonical collection_id and a new version_id. Returns the newly created CollectionVersion """ - collection_id = CollectionId((self._generate_id())) - version_id = CollectionVersionId((self._generate_id())) + collection_id = CollectionId() + version_id = CollectionVersionId() now = datetime.utcnow() canonical_collection = CollectionTable( id=collection_id.id, version_id=None, - tombstoned=False, + tombstone=False, originally_published_at=None, ) @@ -198,7 +194,7 @@ def create_canonical_collection( version_id=version_id.id, owner=owner, curator_name=curator_name, - metadata=collection_metadata.to_json(), + collection_metadata=collection_metadata.to_json(), publisher_metadata=None, published_at=None, created_at=now, @@ -218,7 +214,7 @@ def get_collection_version(self, version_id: CollectionVersionId) -> CollectionV Retrieves a specific collection version by id """ with self._manage_session() as session: - collection_version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none() + collection_version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one_or_none() if collection_version is None: return None collection_id = CollectionId(str(collection_version.collection_id)) @@ -234,7 +230,7 @@ def get_collection_version_with_datasets(self, version_id: CollectionVersionId) Retrieves a specific collection version by id, with datasets """ with self._manage_session() as session: - collection_version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none() + collection_version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one_or_none() if collection_version is None: return None collection_id = CollectionId(str(collection_version.collection_id)) @@ -252,7 +248,7 @@ def get_collection_mapped_version(self, collection_id: CollectionId) -> Optional if version_id is None or version_id[0] is None: return None version_id = version_id[0] - collection_version = session.query(CollectionVersionTable).filter_by(version_id=version_id).one() + collection_version = session.query(CollectionVersionTable).filter_by(id=version_id).one() canonical_collection = self.get_canonical_collection(collection_id) datasets = self._get_datasets([DatasetVersionId(str(id)) for id in collection_version.datasets]) return self._row_to_collection_version_with_datasets(collection_version, canonical_collection, datasets) @@ -287,7 +283,7 @@ def get_all_collections_versions(self) -> Iterable[CollectionVersion]: CollectionId(str(collection_row.id)), None if collection_row.version_id is None else CollectionVersionId(str(collection_row.version_id)), collection_row.originally_published_at, - collection_row.tombstoned, + collection_row.tombstone, ) result = [] @@ -309,7 +305,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It canonical_collections = ( session.query(CollectionTable) .filter(CollectionTable.version_id.isnot(None)) - .filter_by(tombstoned=False) + .filter_by(tombstone=False) .all() ) @@ -327,7 +323,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It CollectionId(str(canonical_row.id)), CollectionVersionId(str(canonical_row.version_id)), canonical_row.originally_published_at, - canonical_row.tombstoned, + canonical_row.tombstone, ) yield self._row_to_collection_version(version, canonical) @@ -338,7 +334,7 @@ def delete_canonical_collection(self, collection_id: CollectionId) -> None: with self._manage_session() as session: canonical_collection = session.query(CollectionTable).filter_by(id=collection_id.id).one_or_none() if canonical_collection: - canonical_collection.tombstoned = True + canonical_collection.tombstone = True def save_collection_metadata( self, version_id: CollectionVersionId, collection_metadata: CollectionMetadata @@ -347,8 +343,8 @@ def save_collection_metadata( Saves collection metadata for a collection version """ with self._manage_session() as session: - version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one() - version.metadata = collection_metadata.to_json() + version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one() + version.collection_metadata = collection_metadata.to_json() def save_collection_publisher_metadata( self, version_id: CollectionVersionId, publisher_metadata: Optional[dict] @@ -357,7 +353,7 @@ def save_collection_publisher_metadata( Saves publisher metadata for a collection version. Specify None to remove it """ with self._manage_session() as session: - version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one() + version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one() version.publisher_metadata = json.dumps(publisher_metadata) def add_collection_version(self, collection_id: CollectionId) -> CollectionVersionId: @@ -368,12 +364,12 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi """ with self._manage_session() as session: current_version_id = session.query(CollectionTable.version_id).filter_by(id=collection_id.id).one()[0] - current_version = session.query(CollectionVersionTable).filter_by(version_id=current_version_id).one() - new_version_id = self._generate_id() + current_version = session.query(CollectionVersionTable).filter_by(id=current_version_id).one() + new_version_id = CollectionVersionId() new_version = CollectionVersionTable( version_id=new_version_id, collection_id=collection_id.id, - metadata=current_version.metadata, + collection_metadata=current_version.collection_metadata, owner=current_version.owner, curator_name=current_version.curator_name, publisher_metadata=current_version.publisher_metadata, @@ -389,7 +385,7 @@ def delete_collection_version(self, version_id: CollectionVersionId) -> None: Deletes a collection version, if it is unpublished. """ with self._manage_session() as session: - version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one_or_none() + version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one_or_none() if version: if version.published_at: raise CollectionIsPublishedException(f"Published Collection Version {version_id} cannot be deleted") @@ -402,50 +398,30 @@ def finalize_collection_version( published_at: Optional[datetime] = None, ) -> None: """ - Finalizes a collection version. This is equivalent to calling: - 1. update_collection_version_mapping - 2. set_collection_version_published_at - 3. finalize_dataset_versions + Finalizes a collection version """ published_at = published_at if published_at else datetime.utcnow() - self.update_collection_version_mapping(collection_id, version_id, published_at) - self.set_collection_version_published_at(version_id, published_at) - self.finalize_dataset_versions(version_id, published_at) - - def update_collection_version_mapping( - self, collection_id: CollectionId, version_id: CollectionVersionId, published_at: datetime - ) -> None: - """ - Updates the mapping between the canonical collection `collection_id` and its `version_id` - """ with self._manage_session() as session: + # update canonical collection -> collection version mapping collection = session.query(CollectionTable).filter_by(id=collection_id.id).one() collection.version_id = version_id.id + + # update canonical collection if this is its first publish if collection.originally_published_at is None: collection.originally_published_at = published_at - def set_collection_version_published_at(self, version_id: CollectionVersionId, published_at: datetime) -> None: - """ - Sets the `published_at` datetime for a collection version - """ - with self._manage_session() as session: - collection_version = session.query(CollectionVersionTable).filter_by(version_id=version_id.id).one() + # update collection version + collection_version = session.query(CollectionVersionTable).filter_by(id=version_id.id).one() collection_version.published_at = published_at - def finalize_dataset_versions(self, version_id: CollectionVersionId, published_at: datetime) -> None: - """ - 1. Updates the mapping between the canonical dataset and the latest published dataset version for each dataset - in a CollectionVersion - 2. Sets the each canonical dataset's 'published_at' if not previously set - """ - with self._manage_session() as session: + # finalize collection version's dataset versions dataset_version_ids = ( - session.query(CollectionVersionTable.datasets).filter_by(version_id=version_id.id).one()[0] + session.query(CollectionVersionTable.datasets).filter_by(id=version_id.id).one()[0] ) for dataset_version, dataset in ( session.query(DatasetVersionTable, DatasetTable) - .filter(DatasetVersionTable.dataset_id == DatasetTable.dataset_id) - .filter(DatasetVersionTable.version_id.in_(dataset_version_ids)) + .filter(DatasetVersionTable.dataset_id == DatasetTable.id) + .filter(DatasetVersionTable.id.in_(dataset_version_ids)) .all() ): dataset.version_id = dataset_version.version_id @@ -458,7 +434,7 @@ def get_dataset_version(self, dataset_version_id: DatasetVersionId) -> DatasetVe """ with self._manage_session() as session: dataset_version = ( - session.query(DatasetVersionTable).filter_by(version_id=dataset_version_id.id).one_or_none() + session.query(DatasetVersionTable).filter_by(id=dataset_version_id.id).one_or_none() ) if dataset_version is None: return None @@ -521,11 +497,11 @@ def create_canonical_dataset(self, collection_version_id: CollectionVersionId) - with self._manage_session() as session: collection_id = ( session.query(CollectionVersionTable.collection_id) - .filter_by(version_id=collection_version_id.id) + .filter_by(id=collection_version_id.id) .one()[0] ) - dataset_id = DatasetId(self._generate_id()) - dataset_version_id = DatasetVersionId(self._generate_id()) + dataset_id = DatasetId() + dataset_version_id = DatasetVersionId() canonical_dataset = DatasetTable( dataset_id=dataset_id.id, dataset_version_id=dataset_version_id.id, published_at=None ) @@ -533,7 +509,7 @@ def create_canonical_dataset(self, collection_version_id: CollectionVersionId) - version_id=dataset_version_id.id, dataset_id=dataset_id.id, collection_id=collection_id, - metadata=None, + dataset_metadata=None, artifacts=list(), status=DatasetStatus.empty().to_json(), created_at=datetime.utcnow(), @@ -552,11 +528,11 @@ def add_dataset_artifact( """ Adds a dataset artifact to an existing dataset version. """ - artifact_id = DatasetArtifactId(self._generate_id()) + artifact_id = DatasetArtifactId() artifact = DatasetArtifactTable(id=artifact_id.id, type=artifact_type, uri=artifact_uri) with self._manage_session() as session: session.add(artifact) - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() artifacts = list(dataset_version.artifacts) artifacts.append(uuid.UUID(artifact_id.id)) dataset_version.artifacts = artifacts @@ -567,7 +543,7 @@ def update_dataset_processing_status(self, version_id: DatasetVersionId, status: Updates the processing status for a dataset version. """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() dataset_version_status = json.loads(dataset_version.status) dataset_version_status["processing_status"] = status.value dataset_version.status = json.dumps(dataset_version_status) @@ -577,7 +553,7 @@ def update_dataset_validation_status(self, version_id: DatasetVersionId, status: Updates the validation status for a dataset version. """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() dataset_version_status = json.loads(dataset_version.status) dataset_version_status["validation_status"] = status.value dataset_version.status = json.dumps(dataset_version_status) @@ -587,7 +563,7 @@ def update_dataset_upload_status(self, version_id: DatasetVersionId, status: Dat Updates the upload status for a dataset version. """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() dataset_version_status = json.loads(dataset_version.status) dataset_version_status["upload_status"] = status.value dataset_version.status = json.dumps(dataset_version_status) @@ -599,14 +575,14 @@ def update_dataset_conversion_status( Updates the conversion status for a dataset version and for `status_type` """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() dataset_version_status = json.loads(dataset_version.status) dataset_version_status[status_type] = status.value dataset_version.status = json.dumps(dataset_version_status) def update_dataset_validation_message(self, version_id: DatasetVersionId, validation_message: str) -> None: with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() dataset_version_status = json.loads(dataset_version.status) dataset_version_status["validation_message"] = validation_message dataset_version.status = json.dumps(dataset_version_status) @@ -616,7 +592,7 @@ def get_dataset_version_status(self, version_id: DatasetVersionId) -> DatasetSta Returns the status for a dataset version """ with self._manage_session() as session: - status = session.query(DatasetVersionTable.status).filter_by(version_id=version_id.id).one() + status = session.query(DatasetVersionTable.status).filter_by(id=version_id.id).one() return DatasetStatus.from_json(status[0]) def set_dataset_metadata(self, version_id: DatasetVersionId, metadata: DatasetMetadata) -> None: @@ -624,8 +600,8 @@ def set_dataset_metadata(self, version_id: DatasetVersionId, metadata: DatasetMe Sets the metadata for a dataset version """ with self._manage_session() as session: - dataset_version = session.query(DatasetVersionTable).filter_by(version_id=version_id.id).one() - dataset_version.metadata = metadata.to_json() + dataset_version = session.query(DatasetVersionTable).filter_by(id=version_id.id).one() + dataset_version.dataset_metadata = metadata.to_json() def add_dataset_to_collection_version_mapping( self, collection_version_id: CollectionVersionId, dataset_version_id: DatasetVersionId @@ -635,7 +611,7 @@ def add_dataset_to_collection_version_mapping( """ with self._manage_session() as session: collection_version = ( - session.query(CollectionVersionTable).filter_by(version_id=collection_version_id.id).one() + session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() ) # TODO: alternatively use postgres `array_append` # TODO: make sure that the UUID conversion works @@ -651,7 +627,7 @@ def delete_dataset_from_collection_version( """ with self._manage_session() as session: collection_version = ( - session.query(CollectionVersionTable).filter_by(version_id=collection_version_id.id).one() + session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() ) # TODO: alternatively use postgres `array_remove` updated_datasets = list(collection_version.datasets) @@ -672,14 +648,14 @@ def replace_dataset_in_collection_version( .one()[0] ) # noqa dataset_id = ( - session.query(DatasetVersionTable.dataset_id).filter_by(version_id=old_dataset_version_id.id).one()[0] + session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] ) - new_dataset_version_id = DatasetVersionId(self._generate_id()) + new_dataset_version_id = DatasetVersionId() new_dataset_version = DatasetVersionTable( version_id=new_dataset_version_id.id, dataset_id=dataset_id, collection_id=collection_id, - metadata=None, + dataset_metadata=None, artifacts=list(), status=DatasetStatus.empty().to_json(), created_at=datetime.utcnow(), @@ -687,7 +663,7 @@ def replace_dataset_in_collection_version( session.add(new_dataset_version) collection_version = ( - session.query(CollectionVersionTable).filter_by(version_id=collection_version_id.id).one() + session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() ) # noqa # This replaces the dataset while preserving the order of datasets datasets = list(collection_version.datasets) @@ -702,13 +678,13 @@ def get_dataset_mapped_version(self, dataset_id: DatasetId) -> Optional[DatasetV Returns the dataset version mapped to a canonical dataset_id, or None if not existing """ with self._manage_session() as session: - canonical_dataset = session.query(DatasetTable).filter_by(dataset_id=dataset_id.id).one_or_none() + canonical_dataset = session.query(DatasetTable).filter_by(id=dataset_id.id).one_or_none() if canonical_dataset is None: return None if canonical_dataset.dataset_version_id is None: return None dataset_version = ( - session.query(DatasetVersionTable).filter_by(version_id=canonical_dataset.dataset_version_id).one() + session.query(DatasetVersionTable).filter_by(id=canonical_dataset.dataset_version_id).one() ) dataset_version.canonical_dataset = canonical_dataset return self._hydrate_dataset_version(dataset_version) diff --git a/backend/layers/persistence/persistence_mock.py b/backend/layers/persistence/persistence_mock.py index a4993d09f1c36..ab7a843491ac2 100644 --- a/backend/layers/persistence/persistence_mock.py +++ b/backend/layers/persistence/persistence_mock.py @@ -1,5 +1,4 @@ import copy -import uuid from datetime import datetime from typing import Dict, Iterable, List, Optional, Union @@ -58,16 +57,12 @@ def __init__(self) -> None: self.datasets = {} # rename to: active_datasets self.datasets_versions = {} - @staticmethod - def _generate_id(): - return str(uuid.uuid4()) - # TODO: add publisher_metadata here? def create_canonical_collection( self, owner: str, curator_name: str, collection_metadata: CollectionMetadata ) -> CollectionVersion: - collection_id = CollectionId(self._generate_id()) - version_id = CollectionVersionId(self._generate_id()) + collection_id = CollectionId() + version_id = CollectionVersionId() canonical = CanonicalCollection(collection_id, None, None, False) version = CollectionVersion( collection_id=collection_id, @@ -146,10 +141,9 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi cc = self.collections[collection_id.id] current_version_id = cc.version_id current_version = self.collections_versions[current_version_id.id] - new_version_id = CollectionVersionId(self._generate_id()) + new_version_id = CollectionVersionId() # Note: since datasets are immutable, there is no need to clone datasets here, # but the list that contains datasets needs to be copied, since it's a pointer. - self.datasets_versions new_dataset_list = copy.deepcopy(current_version.datasets) collection_version = CollectionVersion( @@ -262,8 +256,8 @@ def get_dataset_artifacts_by_version_id(self, dataset_version_id: DatasetId) -> def create_canonical_dataset(self, collection_version_id: CollectionVersionId) -> DatasetVersion: # Creates a dataset and initializes it with one version - dataset_id = DatasetId(self._generate_id()) - version_id = DatasetVersionId(self._generate_id()) + dataset_id = DatasetId() + version_id = DatasetVersionId() collection_version = self.collections_versions[collection_version_id.id] version = DatasetVersion( dataset_id=dataset_id, @@ -290,7 +284,7 @@ def add_dataset_artifact( self, version_id: DatasetVersionId, artifact_type: str, artifact_uri: str ) -> DatasetArtifactId: version = self.datasets_versions[version_id.id] - artifact_id = DatasetArtifactId(self._generate_id()) + artifact_id = DatasetArtifactId() version.artifacts.append(DatasetArtifact(artifact_id, artifact_type, artifact_uri)) return artifact_id @@ -335,7 +329,7 @@ def delete_dataset_from_collection_version( def replace_dataset_in_collection_version( self, collection_version_id: CollectionVersionId, old_dataset_version_id: DatasetVersionId ) -> DatasetVersion: - new_version_id = DatasetVersionId(self._generate_id()) + new_version_id = DatasetVersionId() old_version = self.get_dataset_version(old_dataset_version_id) collection_version = self.collections_versions[collection_version_id.id] new_version = DatasetVersion( diff --git a/scripts/cxg_admin_scripts/migrate.py b/scripts/cxg_admin_scripts/migrate.py index 55050eb3ecd6c..7f583ff012101 100644 --- a/scripts/cxg_admin_scripts/migrate.py +++ b/scripts/cxg_admin_scripts/migrate.py @@ -410,11 +410,11 @@ def migrate_redesign_write(ctx): from sqlalchemy.orm import Session from backend.layers.persistence.orm import ( - Collection as CollectionRow, - CollectionVersion as CollectionVersionRow, - Dataset as DatasetRow, - DatasetVersion as DatasetVersionRow, - DatasetArtifact as DatasetArtifactRow, + CollectionTable as CollectionRow, + CollectionVersionTable as CollectionVersionRow, + DatasetTable as DatasetRow, + DatasetVersionTable as DatasetVersionRow, + DatasetArtifactTabel as DatasetArtifactRow, ) database_pass = os.getenv("PGPASSWORD") @@ -444,7 +444,7 @@ def migrate_redesign_write(ctx): id=collection["id"], version_id=collection["version_id"], originally_published_at=collection.get("originally_published_at"), - tombstoned=collection["tombstoned"], + tombstone=collection["tombstoned"], ) session.add(canonical_collection_row) @@ -458,10 +458,10 @@ def migrate_redesign_write(ctx): del link["url"] collection_version_row = CollectionVersionRow( + id=version["version_id"], collection_id=version["collection_id"], - version_id=version["version_id"], owner=version["owner"], - metadata=json.dumps(coll_metadata), + collection_metadata=json.dumps(coll_metadata), publisher_metadata=json.dumps(version["publisher_metadata"]), published_at=version["published_at"], datasets=version["datasets"], @@ -473,8 +473,8 @@ def migrate_redesign_write(ctx): for dataset in datasets: dataset_row = DatasetRow( - dataset_id=dataset["dataset_id"], - dataset_version_id=dataset["dataset_version_id"], + id=dataset["dataset_id"], + version_id=dataset["dataset_version_id"], published_at=dataset.get("published_at"), ) @@ -486,13 +486,11 @@ def migrate_redesign_write(ctx): if not dataset_version.get("status"): continue - metadata = dataset_version["metadata"] - dataset_version_row = DatasetVersionRow( - version_id=dataset_version["version_id"], + id=dataset_version["version_id"], dataset_id=dataset_version["dataset_id"], collection_id=dataset_version["collection_id"], - metadata=json.dumps(dataset_version["metadata"]), + dataset_metadata=json.dumps(dataset_version["metadata"]), artifacts=dataset_version["artifacts"], status=json.dumps(dataset_version["status"]), created_at=dataset_version.get("created_at"), diff --git a/tests/unit/backend/layers/api/test_portal_api.py b/tests/unit/backend/layers/api/test_portal_api.py index 7f1aa51a5d528..8ba7f307a8cc7 100644 --- a/tests/unit/backend/layers/api/test_portal_api.py +++ b/tests/unit/backend/layers/api/test_portal_api.py @@ -263,7 +263,7 @@ def test__get_collection__ok(self): # ✅ def test__get_collection_id__403_not_found(self): """Verify the test collection exists and the expected fields exist.""" - fake_id = self._generate_id() + fake_id = CollectionId() test_url = furl(path=f"/dp/v1/collections/{fake_id}", query_params=dict(visibility="PUBLIC")) response = self.app.get(test_url.url, headers=dict(host="localhost")) self.assertEqual(403, response.status_code) @@ -827,7 +827,7 @@ def test_delete_collection__not_owner(self): self.assertEqual(response.status_code, 403) def test_delete_collection__does_not_exist(self): - fake_id = self._generate_id() + fake_id = CollectionId() test_url = furl(path=f"/dp/v1/collections/{fake_id}", query_params=dict(visibility="PRIVATE")) headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} response = self.app.delete(test_url.url, headers=headers) @@ -1241,7 +1241,7 @@ def test__post_dataset_asset__file_SERVER_ERROR(self): # ✅ def test__post_dataset_asset__dataset_NOT_FOUND(self): - fake_id = self._generate_id() + fake_id = DatasetVersionId() test_url = furl(path=f"/dp/v1/datasets/{fake_id}/asset/{fake_id}") response = self.app.post(test_url.url, headers=dict(host="localhost")) self.assertEqual(404, response.status_code) @@ -1470,7 +1470,7 @@ def test__cancel_dataset_download__ok(self): # ✅ def test__cancel_dataset_download__dataset_does_not_exist(self): - fake_id = self._generate_id() + fake_id = DatasetVersionId() test_url = f"/dp/v1/datasets/{fake_id}" headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} response = self.app.delete(test_url, headers=headers) @@ -1613,7 +1613,7 @@ def test__dataset_meta__ok(self): # ✅ def test__dataset_meta__404(self): - fake_id = self._generate_id() + fake_id = DatasetVersionId() headers = {"host": "localhost", "Content-Type": "application/json"} fake_url = f"http://base.url/{fake_id}.cxg/" test_url_404 = f"/dp/v1/datasets/meta?url={fake_url}" @@ -1745,7 +1745,7 @@ def test__revision_nonexistent__403(self): """ Start a revision on a non-existing collection. """ - fake_collection_id = self._generate_id() + fake_collection_id = CollectionId() headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} response = self.app.post(f"/dp/v1/collections/{fake_collection_id}", headers=headers) self.assertEqual(403, response.status_code) @@ -1947,7 +1947,7 @@ def test__publish_revision_bad_id__403(self): """ Publish a collection with a bad uuid (non existant) returns 403 """ - collection_id = self._generate_id() + collection_id = CollectionId() body = {"data_submission_policy_version": "1.0"} path = f"{self.base_path}/{collection_id}/publish" response = self.app.post(path, headers=self.headers, data=json.dumps(body)) @@ -2218,7 +2218,7 @@ def test__oversized__413(self): # ✅ def test__link_fake_collection__403(self): - fake_id = self._generate_id() + fake_id = CollectionId() path = f"/dp/v1/collections/{fake_id}/upload-links" headers = {"host": "localhost", "Content-Type": "application/json", "Cookie": self.get_cxguser_token()} body = {"url": self.good_link} diff --git a/tests/unit/backend/layers/business/test_business.py b/tests/unit/backend/layers/business/test_business.py index 74bec3d9cb452..2172c51b42756 100644 --- a/tests/unit/backend/layers/business/test_business.py +++ b/tests/unit/backend/layers/business/test_business.py @@ -920,7 +920,7 @@ def test_create_collection_version_fails_if_collection_not_exists(self): """ A collection version can only be created on an existing collection """ - non_existing_collection_id = self.database_provider._generate_id() + non_existing_collection_id = CollectionId() with self.assertRaises(CollectionVersionException): self.business_logic.create_collection_version(CollectionId(non_existing_collection_id)) diff --git a/tests/unit/backend/layers/common/base_test.py b/tests/unit/backend/layers/common/base_test.py index 591c59120ac9c..f045e78d93316 100644 --- a/tests/unit/backend/layers/common/base_test.py +++ b/tests/unit/backend/layers/common/base_test.py @@ -235,9 +235,6 @@ def generate_dataset( [a.id for a in artifact_ids], ) - def _generate_id(self): - return DatabaseProviderMock._generate_id() - def remove_timestamps(self, body: dict, remove: typing.List[str] = None) -> dict: # TODO: implement as needed return body From e21c59f4138d25471716fa40da25b7fa0e2560c3 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:26:15 -0500 Subject: [PATCH 02/22] lint Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 24 ++++++----------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index cb40d18e7cbaf..9377ebc8f8698 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -415,9 +415,7 @@ def finalize_collection_version( collection_version.published_at = published_at # finalize collection version's dataset versions - dataset_version_ids = ( - session.query(CollectionVersionTable.datasets).filter_by(id=version_id.id).one()[0] - ) + dataset_version_ids = session.query(CollectionVersionTable.datasets).filter_by(id=version_id.id).one()[0] for dataset_version, dataset in ( session.query(DatasetVersionTable, DatasetTable) .filter(DatasetVersionTable.dataset_id == DatasetTable.id) @@ -433,9 +431,7 @@ def get_dataset_version(self, dataset_version_id: DatasetVersionId) -> DatasetVe Returns a dataset version by id. """ with self._manage_session() as session: - dataset_version = ( - session.query(DatasetVersionTable).filter_by(id=dataset_version_id.id).one_or_none() - ) + dataset_version = session.query(DatasetVersionTable).filter_by(id=dataset_version_id.id).one_or_none() if dataset_version is None: return None return self._hydrate_dataset_version(dataset_version) @@ -496,9 +492,7 @@ def create_canonical_dataset(self, collection_version_id: CollectionVersionId) - """ with self._manage_session() as session: collection_id = ( - session.query(CollectionVersionTable.collection_id) - .filter_by(id=collection_version_id.id) - .one()[0] + session.query(CollectionVersionTable.collection_id).filter_by(id=collection_version_id.id).one()[0] ) dataset_id = DatasetId() dataset_version_id = DatasetVersionId() @@ -610,9 +604,7 @@ def add_dataset_to_collection_version_mapping( Adds a mapping between an existing collection version and a dataset version """ with self._manage_session() as session: - collection_version = ( - session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() - ) + collection_version = session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() # TODO: alternatively use postgres `array_append` # TODO: make sure that the UUID conversion works updated_datasets = list(collection_version.datasets) @@ -626,9 +618,7 @@ def delete_dataset_from_collection_version( Removes a mapping between a collection version and a dataset version """ with self._manage_session() as session: - collection_version = ( - session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() - ) + collection_version = session.query(CollectionVersionTable).filter_by(id=collection_version_id.id).one() # TODO: alternatively use postgres `array_remove` updated_datasets = list(collection_version.datasets) updated_datasets.remove(uuid.UUID(dataset_version_id.id)) @@ -647,9 +637,7 @@ def replace_dataset_in_collection_version( .filter_by(version_id=collection_version_id.id) .one()[0] ) # noqa - dataset_id = ( - session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] - ) + dataset_id = session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] new_dataset_version_id = DatasetVersionId() new_dataset_version = DatasetVersionTable( version_id=new_dataset_version_id.id, From 04668e624563461746788e02284de2b5646df463 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:33:57 -0500 Subject: [PATCH 03/22] update missing id changes Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 9377ebc8f8698..5b45b851741ae 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -190,8 +190,8 @@ def create_canonical_collection( ) collection_version_row = CollectionVersionTable( + id=version_id.id, collection_id=collection_id.id, - version_id=version_id.id, owner=owner, curator_name=curator_name, collection_metadata=collection_metadata.to_json(), @@ -367,7 +367,7 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi current_version = session.query(CollectionVersionTable).filter_by(id=current_version_id).one() new_version_id = CollectionVersionId() new_version = CollectionVersionTable( - version_id=new_version_id, + id=new_version_id, collection_id=collection_id.id, collection_metadata=current_version.collection_metadata, owner=current_version.owner, @@ -497,10 +497,10 @@ def create_canonical_dataset(self, collection_version_id: CollectionVersionId) - dataset_id = DatasetId() dataset_version_id = DatasetVersionId() canonical_dataset = DatasetTable( - dataset_id=dataset_id.id, dataset_version_id=dataset_version_id.id, published_at=None + id=dataset_id.id, version_id=dataset_version_id.id, published_at=None ) dataset_version = DatasetVersionTable( - version_id=dataset_version_id.id, + id=dataset_version_id.id, dataset_id=dataset_id.id, collection_id=collection_id, dataset_metadata=None, @@ -640,7 +640,7 @@ def replace_dataset_in_collection_version( dataset_id = session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] new_dataset_version_id = DatasetVersionId() new_dataset_version = DatasetVersionTable( - version_id=new_dataset_version_id.id, + id=new_dataset_version_id.id, dataset_id=dataset_id, collection_id=collection_id, dataset_metadata=None, From 3b2f18d404119da9e369a0c40c6a5c792edf3e66 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:38:36 -0500 Subject: [PATCH 04/22] lint fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 5b45b851741ae..2f56f0cc551c0 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -496,9 +496,7 @@ def create_canonical_dataset(self, collection_version_id: CollectionVersionId) - ) dataset_id = DatasetId() dataset_version_id = DatasetVersionId() - canonical_dataset = DatasetTable( - id=dataset_id.id, version_id=dataset_version_id.id, published_at=None - ) + canonical_dataset = DatasetTable(id=dataset_id.id, version_id=dataset_version_id.id, published_at=None) dataset_version = DatasetVersionTable( id=dataset_version_id.id, dataset_id=dataset_id.id, From 04c4dc17bcc3dafbc727acac7172108f22800536 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:41:03 -0500 Subject: [PATCH 05/22] more missing id's Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 2f56f0cc551c0..e253a5fc88ab6 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -91,7 +91,7 @@ def _manage_session(self, **kwargs): def _row_to_collection_version(self, row: Any, canonical_collection: CanonicalCollection) -> CollectionVersion: return CollectionVersion( collection_id=CollectionId(str(row.collection_id)), - version_id=CollectionVersionId(str(row.version_id)), + version_id=CollectionVersionId(str(row.id)), owner=row.owner, curator_name=row.curator_name, metadata=CollectionMetadata.from_json(row.collection_metadata), @@ -107,7 +107,7 @@ def _row_to_collection_version_with_datasets( ) -> CollectionVersionWithDatasets: return CollectionVersionWithDatasets( collection_id=CollectionId(str(row.collection_id)), - version_id=CollectionVersionId(str(row.version_id)), + version_id=CollectionVersionId(str(row.id)), owner=row.owner, curator_name=row.curator_name, metadata=CollectionMetadata.from_json(row.collection_metadata), @@ -136,7 +136,7 @@ def _row_to_dataset_version(self, row: Any, canonical_dataset: CanonicalDataset, metadata = DatasetMetadata.from_json(row.dataset_metadata) return DatasetVersion( DatasetId(str(row.dataset_id)), - DatasetVersionId(str(row.version_id)), + DatasetVersionId(str(row.id)), CollectionId(str(row.collection_id)), status, metadata, From 6ba491e90511eae68817befe0547e89ea334a9ad Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:44:58 -0500 Subject: [PATCH 06/22] remove unused import Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/orm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/layers/persistence/orm.py b/backend/layers/persistence/orm.py index c6867a97e7ae6..4163707828c2a 100644 --- a/backend/layers/persistence/orm.py +++ b/backend/layers/persistence/orm.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, DateTime, Enum, ForeignKey, String, Table +from sqlalchemy import Column, DateTime, Enum, ForeignKey, String from sqlalchemy.dialects.postgresql import ARRAY, BOOLEAN, JSON, UUID from sqlalchemy.orm import registry from sqlalchemy.schema import MetaData From 4332f63f13b9670d17de1e4eb3b5a60375d88c24 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:48:10 -0500 Subject: [PATCH 07/22] correct id name Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index e253a5fc88ab6..93a1480378a0d 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -170,7 +170,7 @@ def get_canonical_dataset(self, dataset_id: DatasetId) -> CanonicalDataset: dataset = session.query(DatasetTable).filter_by(id=dataset_id.id).one_or_none() if dataset is None: return None - return CanonicalDataset(dataset_id, DatasetVersionId(str(dataset.dataset_version_id)), dataset.published_at) + return CanonicalDataset(dataset_id, DatasetVersionId(str(dataset.version_id)), dataset.published_at) def create_canonical_collection( self, owner: str, curator_name: str, collection_metadata: CollectionMetadata @@ -667,10 +667,10 @@ def get_dataset_mapped_version(self, dataset_id: DatasetId) -> Optional[DatasetV canonical_dataset = session.query(DatasetTable).filter_by(id=dataset_id.id).one_or_none() if canonical_dataset is None: return None - if canonical_dataset.dataset_version_id is None: + if canonical_dataset.version_id is None: return None dataset_version = ( - session.query(DatasetVersionTable).filter_by(id=canonical_dataset.dataset_version_id).one() + session.query(DatasetVersionTable).filter_by(id=canonical_dataset.version_id).one() ) dataset_version.canonical_dataset = canonical_dataset return self._hydrate_dataset_version(dataset_version) From b6184b9c709fd66a294b115d33fc015631df752f Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:52:06 -0500 Subject: [PATCH 08/22] lint fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 93a1480378a0d..c88dcf47c5db8 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -669,8 +669,6 @@ def get_dataset_mapped_version(self, dataset_id: DatasetId) -> Optional[DatasetV return None if canonical_dataset.version_id is None: return None - dataset_version = ( - session.query(DatasetVersionTable).filter_by(id=canonical_dataset.version_id).one() - ) + dataset_version = session.query(DatasetVersionTable).filter_by(id=canonical_dataset.version_id).one() dataset_version.canonical_dataset = canonical_dataset return self._hydrate_dataset_version(dataset_version) From 006d4ff185e3b63d5515984a4b632ced15efcebe Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 13:54:26 -0500 Subject: [PATCH 09/22] id fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index c88dcf47c5db8..571c171822cea 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -422,7 +422,7 @@ def finalize_collection_version( .filter(DatasetVersionTable.id.in_(dataset_version_ids)) .all() ): - dataset.version_id = dataset_version.version_id + dataset.version_id = dataset_version.id if dataset.published_at is None: dataset.published_at = published_at From d8e4f174bf11f9dcc5176a0888ca8eac19b689af Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 14:02:18 -0500 Subject: [PATCH 10/22] id fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 571c171822cea..62fc280515342 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -367,7 +367,7 @@ def add_collection_version(self, collection_id: CollectionId) -> CollectionVersi current_version = session.query(CollectionVersionTable).filter_by(id=current_version_id).one() new_version_id = CollectionVersionId() new_version = CollectionVersionTable( - id=new_version_id, + id=new_version_id.id, collection_id=collection_id.id, collection_metadata=current_version.collection_metadata, owner=current_version.owner, From 719705bae7c5f236428e6fb8dd1fc4b16766fbf2 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 14:08:59 -0500 Subject: [PATCH 11/22] more ids Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 62fc280515342..21a7c9cb5c1e8 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -312,7 +312,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It mapped_version_ids = [i.version_id for i in canonical_collections] versions = ( session.query(CollectionVersionTable) - .filter(CollectionVersionTable.version_id.in_(mapped_version_ids)) + .filter(CollectionVersionTable.id.in_(mapped_version_ids)) .all() ) # noqa @@ -632,7 +632,7 @@ def replace_dataset_in_collection_version( with self._manage_session() as session: collection_id = ( session.query(CollectionVersionTable.collection_id) - .filter_by(version_id=collection_version_id.id) + .filter_by(id=collection_version_id.id) .one()[0] ) # noqa dataset_id = session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] From b938f1faf9a3aa51f269fb3e11b86b6c9640bf07 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 14:14:14 -0500 Subject: [PATCH 12/22] id fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 21a7c9cb5c1e8..e87bf9bd2aa78 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -318,7 +318,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It for version in versions: # TODO: should be optimized using a map - canonical_row = next(cc for cc in canonical_collections if cc.version_id == version.version_id) + canonical_row = next(cc for cc in canonical_collections if cc.version_id == version.id) canonical = CanonicalCollection( CollectionId(str(canonical_row.id)), CollectionVersionId(str(canonical_row.version_id)), From 60c2c07a6d54098b10c28b57ebec8a770e138b6a Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 14:18:53 -0500 Subject: [PATCH 13/22] version id fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index e87bf9bd2aa78..6f5895b1040b0 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -459,7 +459,7 @@ def get_all_datasets(self) -> Iterable[DatasetVersion]: acc = [] with self._manage_session() as session: for version in session.query(DatasetVersionTable).all(): # noqa - if str(version.version_id) in active_datasets: + if str(version.id) in active_datasets: acc.append(self._hydrate_dataset_version(version)) return acc From e7f2e94ac07572230e988591cadfae8ea90aaa13 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 23 Dec 2022 14:19:16 -0500 Subject: [PATCH 14/22] lint fix Signed-off-by: nayib-jose-gloria --- backend/layers/persistence/persistence.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 6f5895b1040b0..cd93050d44d81 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -311,9 +311,7 @@ def get_all_mapped_collection_versions(self, get_tombstoned: bool = False) -> It mapped_version_ids = [i.version_id for i in canonical_collections] versions = ( - session.query(CollectionVersionTable) - .filter(CollectionVersionTable.id.in_(mapped_version_ids)) - .all() + session.query(CollectionVersionTable).filter(CollectionVersionTable.id.in_(mapped_version_ids)).all() ) # noqa for version in versions: @@ -631,9 +629,7 @@ def replace_dataset_in_collection_version( # TODO: this method should probably be split into multiple - it contains too much logic with self._manage_session() as session: collection_id = ( - session.query(CollectionVersionTable.collection_id) - .filter_by(id=collection_version_id.id) - .one()[0] + session.query(CollectionVersionTable.collection_id).filter_by(id=collection_version_id.id).one()[0] ) # noqa dataset_id = session.query(DatasetVersionTable.dataset_id).filter_by(id=old_dataset_version_id.id).one()[0] new_dataset_version_id = DatasetVersionId() From 26cf5475354812c671f30a56b356ef3da350350f Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 9 Jan 2023 16:53:27 -0500 Subject: [PATCH 15/22] autogenerate migration script for db field changes + update README links Signed-off-by: nayib-jose-gloria --- backend/database/README.md | 4 +- .../34_2be441104b48_standardize_db_fields.py | 60 +++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 backend/database/versions/34_2be441104b48_standardize_db_fields.py diff --git a/backend/database/README.md b/backend/database/README.md index 78ec150a0c23f..204fb0b16941d 100644 --- a/backend/database/README.md +++ b/backend/database/README.md @@ -16,7 +16,7 @@ 5. In the generated file, update the `Revision ID` and the `revision` (used by Alembic) to include the migration count. For example `Revision ID: a8cd0dc08805` becomes `Revision ID: 18_a8cd0dc08805` and `revision = "a8cd0dc08805"` becomes `revision = "18_a8cd0dc08805"` 6. [Test your migration](#test-a-migration) -7. Check that [corpora_orm.py](../common/corpora_orm.py) matches up with your changes. +7. Check that [orm.py](../layers/persistence/orm.py) matches up with your changes. 8. Once you've completed the changes, create a PR to get the functions reviewed. 9. Once the PR is merged, migrations will be run as part of the deployment process to each env. 10. [Connect to Remote RDS](#connect-to-remote-rds) to single-cell-dev @@ -29,7 +29,7 @@ DEPLOYMENT_STAGE=test make db/migrate ## How to autogenerate migration script -1. Make changes to the ORM class(es) in [corpora_orm.py](../common/corpora_orm.py) +1. Make changes to the ORM class(es) in [orm.py](../layers/persistence/orm.py) 2. [Connect to Remote RDS](#connect-to-remote-rds). Note, generally, you would be connecting to prod (`AWS_PROFILE=single-cell-prod DEPLOYMENT_STAGE=prod`) since we want to generate a migration from the database schema currently deployed in prod. However, if there are migrations haven't been diff --git a/backend/database/versions/34_2be441104b48_standardize_db_fields.py b/backend/database/versions/34_2be441104b48_standardize_db_fields.py new file mode 100644 index 0000000000000..977e5ba8a6457 --- /dev/null +++ b/backend/database/versions/34_2be441104b48_standardize_db_fields.py @@ -0,0 +1,60 @@ +"""standardize db fields + +Revision ID: 34_2be441104b48 +Revises: 33_c5aaf6e2ca9e +Create Date: 2023-01-09 16:40:13.184800 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '34_2be441104b48' +down_revision = '33_c5aaf6e2ca9e' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('Collection', sa.Column('tombstone', sa.BOOLEAN(), nullable=True), schema='persistence_schema') + op.drop_column('Collection', 'tombstoned', schema='persistence_schema') + op.add_column('CollectionVersion', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') + op.drop_column('CollectionVersion', 'version_id', schema='persistence_schema') + op.add_column('CollectionVersion', sa.Column('collection_metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True), schema='persistence_schema') + op.drop_column('CollectionVersion', 'metadata', schema='persistence_schema') + op.add_column('Dataset', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') + op.drop_column('Dataset', 'dataset_id', schema='persistence_schema') + op.add_column('Dataset', sa.Column('version_id', postgresql.UUID(as_uuid=True), nullable=True), schema='persistence_schema') + op.drop_column('Dataset', 'dataset_version_id', schema='persistence_schema') + op.add_column('DatasetVersion', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') + op.drop_column('DatasetVersion', 'version_id', schema='persistence_schema') + op.add_column('DatasetVersion', sa.Column('dataset_metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True), schema='persistence_schema') + op.drop_column('DatasetVersion', 'metadata', schema='persistence_schema') + op.drop_constraint('DatasetVersion_dataset_id_fkey', 'DatasetVersion', schema='persistence_schema', + type_='foreignkey') + op.create_foreign_key(None, 'DatasetVersion', 'Dataset', ['dataset_id'], ['id'], source_schema='persistence_schema', + referent_schema='persistence_schema') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('DatasetVersion', sa.Column('version_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') + op.add_column('DatasetVersion', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), schema='persistence_schema') + op.drop_constraint(None, 'DatasetVersion', schema='persistence_schema', type_='foreignkey') + op.create_foreign_key('DatasetVersion_dataset_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['dataset_id'], source_schema='persistence_schema', referent_schema='persistence_schema') + op.drop_column('DatasetVersion', 'dataset_metadata', schema='persistence_schema') + op.drop_column('DatasetVersion', 'id', schema='persistence_schema') + op.add_column('Dataset', sa.Column('dataset_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') + op.add_column('Dataset', sa.Column('dataset_version_id', postgresql.UUID(), autoincrement=False, nullable=True), schema='persistence_schema') + op.drop_column('Dataset', 'version_id', schema='persistence_schema') + op.drop_column('Dataset', 'id', schema='persistence_schema') + op.add_column('CollectionVersion', sa.Column('version_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') + op.add_column('CollectionVersion', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), schema='persistence_schema') + op.drop_column('CollectionVersion', 'collection_metadata', schema='persistence_schema') + op.drop_column('CollectionVersion', 'id', schema='persistence_schema') + op.add_column('Collection', sa.Column('tombstoned', sa.BOOLEAN(), autoincrement=False, nullable=True), schema='persistence_schema') + op.drop_column('Collection', 'tombstone', schema='persistence_schema') + # ### end Alembic commands ### From 3f198ab8ea00ecab7455282a1366d31edce6ba17 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 9 Jan 2023 17:00:40 -0500 Subject: [PATCH 16/22] update makefile test command documentation Signed-off-by: nayib-jose-gloria --- DEV_ENV.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/DEV_ENV.md b/DEV_ENV.md index 4a41422023e72..8eb5d66166497 100644 --- a/DEV_ENV.md +++ b/DEV_ENV.md @@ -51,12 +51,14 @@ The dev environment is initialized with AWS Secrets/S3 data in the [scripts/setu ### Make targets for running tests in dev -| Command | Description | Notes | -|------------------------------|----------------------------------------------------------------------------------------------| ----- | -| `make local-unit-test` | Run backend tests in the local dockerized environment, against mock of persistence layer | | -| `make local-integration-test` | Run backend tests in the local dockerized environment, against dockerized database instance. | | -| `make local-functional-test` | Run functional tests in local dockerized environment | | -| `make local-smoke-test` | Run frontend/e2e tests in the local dockerized environment | | +| Command | Description | Notes | +|-------------------------------------|----------------------------------------------------------------------------------------------| ----- | +| `make local-unit-test` | Run backend tests in the local dockerized environment, against mock of persistence layer | | +| `make local-integration-test-backend` | Run backend tests in the local dockerized environment, against dockerized database instance. | | +| `make local-unit-test-processing` | Run dataset processing upload job unit tests in the 'processing' docker environment. | | +| `make local-unit-test-wmg-backend` | Run wmg backend unit tests in the local dockerized environment. | | +| `make local-functional-test` | Run functional tests in local dockerized environment | | +| `make local-smoke-test` | Run frontend/e2e tests in the local dockerized environment | | ### External dependencies From f9a8f9572a4f093d74bd693f66176c5efa9b8862 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 9 Jan 2023 17:03:21 -0500 Subject: [PATCH 17/22] pr feedback on docs Signed-off-by: nayib-jose-gloria --- DEV_ENV.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/DEV_ENV.md b/DEV_ENV.md index 8eb5d66166497..e0d7c2cfbc47a 100644 --- a/DEV_ENV.md +++ b/DEV_ENV.md @@ -51,14 +51,14 @@ The dev environment is initialized with AWS Secrets/S3 data in the [scripts/setu ### Make targets for running tests in dev -| Command | Description | Notes | -|-------------------------------------|----------------------------------------------------------------------------------------------| ----- | -| `make local-unit-test` | Run backend tests in the local dockerized environment, against mock of persistence layer | | +| Command | Description | Notes | +|-------------------------------------|-------------------------------------------------------------------------------------------| ----- | +| `make local-unit-test` | Run backend tests in the local dockerized environment, against mock of persistence layer | | | `make local-integration-test-backend` | Run backend tests in the local dockerized environment, against dockerized database instance. | | -| `make local-unit-test-processing` | Run dataset processing upload job unit tests in the 'processing' docker environment. | | -| `make local-unit-test-wmg-backend` | Run wmg backend unit tests in the local dockerized environment. | | -| `make local-functional-test` | Run functional tests in local dockerized environment | | -| `make local-smoke-test` | Run frontend/e2e tests in the local dockerized environment | | +| `make local-unit-test-processing` | Run dataset processing upload job unit tests in the 'processing' docker environment. | | +| `make local-unit-test-wmg-backend` | Run wmg backend unit tests in the local dockerized environment. | | +| `make local-functional-test` | Run backend functional tests in local dockerized environment | | +| `make local-smoke-test` | Run frontend/e2e tests in the local dockerized environment | | ### External dependencies From f601d6825cf7ea229c4b40d409d6a6a18c1587c0 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 9 Jan 2023 18:30:38 -0500 Subject: [PATCH 18/22] update migration script + db/local/load-schema + migration docs Signed-off-by: nayib-jose-gloria --- backend/Makefile | 8 +-- backend/database/README.md | 13 +++-- .../34_2be441104b48_standardize_db_fields.py | 49 +++++++------------ 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/backend/Makefile b/backend/Makefile index 89b3f75beba96..53877ae41187d 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -75,11 +75,11 @@ db/local/load-data: db/local/load-schema: # Imports the corpora_dev.sqlc schema (schema ONLY) into the corpora_test database - # Usage: make db/local/load-schema INFILE= + # Usage: make db/local/load-schema INFILE= $(eval DB_PW = $(shell aws secretsmanager get-secret-value --secret-id corpora/backend/test/database --region us-west-2 | jq -r '.SecretString | match(":([^:]*)@").captures[0].string')) - PGPASSWORD=${DB_PW} pg_restore --schema-only --clean --no-owner --host 0.0.0.0 --dbname corpora $(INFILE) - # Also import alembic schema version - PGPASSWORD=${DB_PW} pg_restore --data-only --table=alembic_version --no-owner --host 0.0.0.0 --dbname corpora $(INFILE) + PGPASSWORD=${DB_PW} pg_restore --schema-only --schema=persistence-schema --clean --no-owner --host 0.0.0.0 --username corpora --dbname corpora $(INFILE) + # Also import alembic schema version + PGPASSWORD=${DB_PW} pg_restore --data-only --table=alembic_version --no-owner --host 0.0.0.0 --username corpora --dbname corpora $(INFILE) db/dump_schema: ifeq ($(DEPLOYMENT_STAGE),test) diff --git a/backend/database/README.md b/backend/database/README.md index 204fb0b16941d..ff1fc551a59bb 100644 --- a/backend/database/README.md +++ b/backend/database/README.md @@ -50,8 +50,7 @@ AWS_PROFILE=single-cell-{dev,prod} DEPLOYMENT_STAGE={dev,staging,prod} CORPORA_L The following steps will test that a migration script works on a local database using data downloaded from a deployed database. -1. [Connect to Remote RDS](#connect-to-remote-rds) -2. Open a new terminal and using the same values for `AWS_PROFILE` and `DEPLOYMENT_STAGE`, download the remote dev database schema: +1. Open a new terminal and using the same values for `AWS_PROFILE` and `DEPLOYMENT_STAGE`, download the remote dev database schema: ```shell cd $REPO_ROOT/backend @@ -60,15 +59,15 @@ AWS_PROFILE=single-cell-{dev,prod} DEPLOYMENT_STAGE={dev,staging,prod} make db/d This will download the database to `$REPO_ROOT/backend/corpora_dev.sqlc`. -3. The tunnel to dev should close automatically (but worth verifying `ps ax | grep ssh`) -4. Start the local database environment: +2. The tunnel to dev should close automatically (but worth verifying `ps ax | grep ssh`) +3. Start the local database environment: ```shell cd $REPO_ROOT make local-start ``` -5. Import the remote database schema into your local database: +4. Import the remote database schema into your local database: ```shell cd $REPO_ROOT/backend @@ -85,7 +84,7 @@ You may need to run this a few times, until there are no significant errors. - Note: `pg_restore: error: could not execute query: ERROR: role "rdsadmin" does not exist` is not a significant error -6. Run the migration test: +5. Run the migration test: ```shell AWS_PROFILE=single-cell-{dev,prod} DEPLOYMENT_STAGE=test make db/test_migration @@ -99,7 +98,7 @@ This test will: 1. Dump the schema (after) 1. Compare the before vs after schemas. These should be identical if the database migration's `upgrade()` and `downgrade()` functions were implemented correctly. -If there are no differences then the test passed. If the test didn't pass, make adjustments to your migration script and restart from step 5. Repeat until there are no errors. +If there are no differences then the test passed. If the test didn't pass, make adjustments to your migration script and restart from step 4. Repeat until there are no errors. ## Connect to Remote RDS diff --git a/backend/database/versions/34_2be441104b48_standardize_db_fields.py b/backend/database/versions/34_2be441104b48_standardize_db_fields.py index 977e5ba8a6457..504be58618a90 100644 --- a/backend/database/versions/34_2be441104b48_standardize_db_fields.py +++ b/backend/database/versions/34_2be441104b48_standardize_db_fields.py @@ -18,43 +18,30 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('Collection', sa.Column('tombstone', sa.BOOLEAN(), nullable=True), schema='persistence_schema') - op.drop_column('Collection', 'tombstoned', schema='persistence_schema') - op.add_column('CollectionVersion', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') - op.drop_column('CollectionVersion', 'version_id', schema='persistence_schema') - op.add_column('CollectionVersion', sa.Column('collection_metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True), schema='persistence_schema') - op.drop_column('CollectionVersion', 'metadata', schema='persistence_schema') - op.add_column('Dataset', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') - op.drop_column('Dataset', 'dataset_id', schema='persistence_schema') - op.add_column('Dataset', sa.Column('version_id', postgresql.UUID(as_uuid=True), nullable=True), schema='persistence_schema') - op.drop_column('Dataset', 'dataset_version_id', schema='persistence_schema') - op.add_column('DatasetVersion', sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), schema='persistence_schema') - op.drop_column('DatasetVersion', 'version_id', schema='persistence_schema') - op.add_column('DatasetVersion', sa.Column('dataset_metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True), schema='persistence_schema') - op.drop_column('DatasetVersion', 'metadata', schema='persistence_schema') + op.alter_column('Collection', "tombstoned", new_column_name="tombstone", schema='persistence_schema') + op.alter_column('CollectionVersion', 'version_id', new_column_name="id", schema='persistence_schema') + op.alter_column('CollectionVersion', 'metadata', new_column_name="collection_metadata", schema='persistence_schema') + op.alter_column('Dataset', 'dataset_id', new_column_name="id", schema='persistence_schema') + op.alter_column('Dataset', 'dataset_version_id', new_column_name="version_id", schema='persistence_schema') + op.alter_column('DatasetVersion', 'version_id', new_column_name="id", schema='persistence_schema') + op.alter_column('DatasetVersion', 'metadata', new_column_name='dataset_metadata', schema='persistence_schema') op.drop_constraint('DatasetVersion_dataset_id_fkey', 'DatasetVersion', schema='persistence_schema', type_='foreignkey') - op.create_foreign_key(None, 'DatasetVersion', 'Dataset', ['dataset_id'], ['id'], source_schema='persistence_schema', + op.create_foreign_key('DatasetVersion_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['id'], source_schema='persistence_schema', referent_schema='persistence_schema') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('DatasetVersion', sa.Column('version_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') - op.add_column('DatasetVersion', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), schema='persistence_schema') - op.drop_constraint(None, 'DatasetVersion', schema='persistence_schema', type_='foreignkey') - op.create_foreign_key('DatasetVersion_dataset_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['dataset_id'], source_schema='persistence_schema', referent_schema='persistence_schema') - op.drop_column('DatasetVersion', 'dataset_metadata', schema='persistence_schema') - op.drop_column('DatasetVersion', 'id', schema='persistence_schema') - op.add_column('Dataset', sa.Column('dataset_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') - op.add_column('Dataset', sa.Column('dataset_version_id', postgresql.UUID(), autoincrement=False, nullable=True), schema='persistence_schema') - op.drop_column('Dataset', 'version_id', schema='persistence_schema') - op.drop_column('Dataset', 'id', schema='persistence_schema') - op.add_column('CollectionVersion', sa.Column('version_id', postgresql.UUID(), autoincrement=False, nullable=False), schema='persistence_schema') - op.add_column('CollectionVersion', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), schema='persistence_schema') - op.drop_column('CollectionVersion', 'collection_metadata', schema='persistence_schema') - op.drop_column('CollectionVersion', 'id', schema='persistence_schema') - op.add_column('Collection', sa.Column('tombstoned', sa.BOOLEAN(), autoincrement=False, nullable=True), schema='persistence_schema') - op.drop_column('Collection', 'tombstone', schema='persistence_schema') + op.alter_column('Collection', "tombstone", new_column_name="tombstoned", schema='persistence_schema') + op.alter_column('CollectionVersion', 'id', new_column_name="version_id", schema='persistence_schema') + op.alter_column('CollectionVersion', 'collection_metadata', new_column_name="metadata", schema='persistence_schema') + op.alter_column('Dataset', 'id', new_column_name="dataset_id", schema='persistence_schema') + op.alter_column('Dataset', 'version_id', new_column_name="dataset_version_id", schema='persistence_schema') + op.alter_column('DatasetVersion', 'id', new_column_name="version_id", schema='persistence_schema') + op.alter_column('DatasetVersion', 'dataset_metadata', new_column_name='metadata', schema='persistence_schema') + op.drop_constraint('DatasetVersion_id_fkey', 'DatasetVersion', schema='persistence_schema', type_='foreignkey') + op.create_foreign_key('DatasetVersion_dataset_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['dataset_id'], source_schema='persistence_schema', + referent_schema='persistence_schema') # ### end Alembic commands ### From 5ca9ef61615261171ff673bcb5bb33e9e8fcd5e8 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 10 Jan 2023 08:59:29 -0500 Subject: [PATCH 19/22] lint fixes Signed-off-by: nayib-jose-gloria --- .../34_2be441104b48_standardize_db_fields.py | 61 ++++++++++++------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/backend/database/versions/34_2be441104b48_standardize_db_fields.py b/backend/database/versions/34_2be441104b48_standardize_db_fields.py index 504be58618a90..e45dc5064d6b9 100644 --- a/backend/database/versions/34_2be441104b48_standardize_db_fields.py +++ b/backend/database/versions/34_2be441104b48_standardize_db_fields.py @@ -10,38 +10,53 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '34_2be441104b48' -down_revision = '33_c5aaf6e2ca9e' +revision = "34_2be441104b48" +down_revision = "33_c5aaf6e2ca9e" branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('Collection', "tombstoned", new_column_name="tombstone", schema='persistence_schema') - op.alter_column('CollectionVersion', 'version_id', new_column_name="id", schema='persistence_schema') - op.alter_column('CollectionVersion', 'metadata', new_column_name="collection_metadata", schema='persistence_schema') - op.alter_column('Dataset', 'dataset_id', new_column_name="id", schema='persistence_schema') - op.alter_column('Dataset', 'dataset_version_id', new_column_name="version_id", schema='persistence_schema') - op.alter_column('DatasetVersion', 'version_id', new_column_name="id", schema='persistence_schema') - op.alter_column('DatasetVersion', 'metadata', new_column_name='dataset_metadata', schema='persistence_schema') - op.drop_constraint('DatasetVersion_dataset_id_fkey', 'DatasetVersion', schema='persistence_schema', - type_='foreignkey') - op.create_foreign_key('DatasetVersion_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['id'], source_schema='persistence_schema', - referent_schema='persistence_schema') + op.alter_column("Collection", "tombstoned", new_column_name="tombstone", schema="persistence_schema") + op.alter_column("CollectionVersion", "version_id", new_column_name="id", schema="persistence_schema") + op.alter_column("CollectionVersion", "metadata", new_column_name="collection_metadata", schema="persistence_schema") + op.alter_column("Dataset", "dataset_id", new_column_name="id", schema="persistence_schema") + op.alter_column("Dataset", "dataset_version_id", new_column_name="version_id", schema="persistence_schema") + op.alter_column("DatasetVersion", "version_id", new_column_name="id", schema="persistence_schema") + op.alter_column("DatasetVersion", "metadata", new_column_name="dataset_metadata", schema="persistence_schema") + op.drop_constraint( + "DatasetVersion_dataset_id_fkey", "DatasetVersion", schema="persistence_schema", type_="foreignkey" + ) + op.create_foreign_key( + "DatasetVersion_id_fkey", + "DatasetVersion", + "Dataset", + ["dataset_id"], + ["id"], + source_schema="persistence_schema", + referent_schema="persistence_schema", + ) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('Collection', "tombstone", new_column_name="tombstoned", schema='persistence_schema') - op.alter_column('CollectionVersion', 'id', new_column_name="version_id", schema='persistence_schema') - op.alter_column('CollectionVersion', 'collection_metadata', new_column_name="metadata", schema='persistence_schema') - op.alter_column('Dataset', 'id', new_column_name="dataset_id", schema='persistence_schema') - op.alter_column('Dataset', 'version_id', new_column_name="dataset_version_id", schema='persistence_schema') - op.alter_column('DatasetVersion', 'id', new_column_name="version_id", schema='persistence_schema') - op.alter_column('DatasetVersion', 'dataset_metadata', new_column_name='metadata', schema='persistence_schema') - op.drop_constraint('DatasetVersion_id_fkey', 'DatasetVersion', schema='persistence_schema', type_='foreignkey') - op.create_foreign_key('DatasetVersion_dataset_id_fkey', 'DatasetVersion', 'Dataset', ['dataset_id'], ['dataset_id'], source_schema='persistence_schema', - referent_schema='persistence_schema') + op.alter_column("Collection", "tombstone", new_column_name="tombstoned", schema="persistence_schema") + op.alter_column("CollectionVersion", "id", new_column_name="version_id", schema="persistence_schema") + op.alter_column("CollectionVersion", "collection_metadata", new_column_name="metadata", schema="persistence_schema") + op.alter_column("Dataset", "id", new_column_name="dataset_id", schema="persistence_schema") + op.alter_column("Dataset", "version_id", new_column_name="dataset_version_id", schema="persistence_schema") + op.alter_column("DatasetVersion", "id", new_column_name="version_id", schema="persistence_schema") + op.alter_column("DatasetVersion", "dataset_metadata", new_column_name="metadata", schema="persistence_schema") + op.drop_constraint("DatasetVersion_id_fkey", "DatasetVersion", schema="persistence_schema", type_="foreignkey") + op.create_foreign_key( + "DatasetVersion_dataset_id_fkey", + "DatasetVersion", + "Dataset", + ["dataset_id"], + ["dataset_id"], + source_schema="persistence_schema", + referent_schema="persistence_schema", + ) # ### end Alembic commands ### From eef29f05e3316b2278013becd7391eb2a3e98e29 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 10 Jan 2023 09:12:22 -0500 Subject: [PATCH 20/22] remove unused imports Signed-off-by: nayib-jose-gloria --- .../database/versions/34_2be441104b48_standardize_db_fields.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/database/versions/34_2be441104b48_standardize_db_fields.py b/backend/database/versions/34_2be441104b48_standardize_db_fields.py index e45dc5064d6b9..8a4255f204827 100644 --- a/backend/database/versions/34_2be441104b48_standardize_db_fields.py +++ b/backend/database/versions/34_2be441104b48_standardize_db_fields.py @@ -6,8 +6,6 @@ """ from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "34_2be441104b48" From 4872b0a3438d0d45723680fbee80f6593cf7af2f Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 10 Jan 2023 12:41:11 -0500 Subject: [PATCH 21/22] tabs Signed-off-by: nayib-jose-gloria --- backend/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Makefile b/backend/Makefile index 53877ae41187d..533fddeb37d6c 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -75,7 +75,7 @@ db/local/load-data: db/local/load-schema: # Imports the corpora_dev.sqlc schema (schema ONLY) into the corpora_test database - # Usage: make db/local/load-schema INFILE= + # Usage: make db/local/load-schema INFILE= $(eval DB_PW = $(shell aws secretsmanager get-secret-value --secret-id corpora/backend/test/database --region us-west-2 | jq -r '.SecretString | match(":([^:]*)@").captures[0].string')) PGPASSWORD=${DB_PW} pg_restore --schema-only --schema=persistence-schema --clean --no-owner --host 0.0.0.0 --username corpora --dbname corpora $(INFILE) # Also import alembic schema version From 31218bc0f5400752b58416ae09b55f8222a052fd Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 10 Jan 2023 12:46:56 -0500 Subject: [PATCH 22/22] tabs Signed-off-by: nayib-jose-gloria --- backend/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Makefile b/backend/Makefile index 533fddeb37d6c..cc77fecf368f3 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -78,7 +78,7 @@ db/local/load-schema: # Usage: make db/local/load-schema INFILE= $(eval DB_PW = $(shell aws secretsmanager get-secret-value --secret-id corpora/backend/test/database --region us-west-2 | jq -r '.SecretString | match(":([^:]*)@").captures[0].string')) PGPASSWORD=${DB_PW} pg_restore --schema-only --schema=persistence-schema --clean --no-owner --host 0.0.0.0 --username corpora --dbname corpora $(INFILE) - # Also import alembic schema version + # Also import alembic schema version PGPASSWORD=${DB_PW} pg_restore --data-only --table=alembic_version --no-owner --host 0.0.0.0 --username corpora --dbname corpora $(INFILE) db/dump_schema: