Skip to content

Commit

Permalink
feat: use aind data schema v1.0 (#85)
Browse files Browse the repository at this point in the history
* feat: pin aind-data-schema v.1.0.0
* feat: update external_links format
* fix: adds upper bound to pydantic

---------

Co-authored-by: jtyoung84 <[email protected]>
  • Loading branch information
helen-m-lin and jtyoung84 authored Sep 7, 2024
1 parent 27fbd81 commit 6d86ca9
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 31 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ dependencies = [
"boto3",
"boto3-stubs[s3]",
"pydantic-settings>=2.0",
"pydantic>=2.0,<2.7",
"pydantic>=2.7,<2.9",
"pymongo==4.3.3",
"dask==2023.5.0",
"aind-data-schema==0.33.3",
"aind-data-schema==1.0.0",
"aind-codeocean-api==0.5.0",
]

Expand Down
2 changes: 1 addition & 1 deletion src/aind_data_asset_indexer/codeocean_bucket_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _process_codeocean_record(
"""
location = codeocean_record["location"]
created = codeocean_record["created"]
external_links = [codeocean_record["external_links"]]
external_links = codeocean_record["external_links"]
name = codeocean_record["name"]
url_parts = get_s3_bucket_and_prefix(location)
bucket = url_parts["bucket"]
Expand Down
12 changes: 7 additions & 5 deletions src/aind_data_asset_indexer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from aind_codeocean_api.codeocean import CodeOceanClient
from aind_data_schema.core.data_description import DataLevel, DataRegex
from aind_data_schema.core.metadata import Metadata
from aind_data_schema.core.metadata import ExternalPlatforms, Metadata
from aind_data_schema.utils.json_writer import SchemaWriter
from botocore.exceptions import ClientError
from mypy_boto3_s3 import S3Client
Expand Down Expand Up @@ -567,7 +567,7 @@ def build_metadata_record_from_prefix(
s3_client: S3Client,
optional_name: Optional[str] = None,
optional_created: Optional[datetime] = None,
optional_external_links: Optional[List[dict]] = None,
optional_external_links: Optional[Dict[str, List[str]]] = None,
) -> Optional[str]:
"""
For a given bucket and prefix, this method will return a JSON string
Expand All @@ -585,7 +585,7 @@ def build_metadata_record_from_prefix(
s3_prefix. Default is None.
optional_created: Optional[datetime]
User can override created datetime. Default is None.
optional_external_links: Optional[List[dict]]
optional_external_links: Optional[Dict[str, List[str]]]
User can provide external_links. Default is None.
Returns
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def get_all_processed_codeocean_asset_records(
{"name": data_asset_name,
"location": data_asset_location,
"created": data_asset_created,
"external_links": {"Code Ocean": data_asset_id}
"external_links": {"Code Ocean": [data_asset_id]}
}
}
Expand Down Expand Up @@ -1091,7 +1091,9 @@ def get_all_processed_codeocean_asset_records(
"name": data_asset_name,
"location": location,
"created": created_datetime,
"external_links": {"Code Ocean": data_asset_id},
"external_links": {
ExternalPlatforms.CODEOCEAN.value: [data_asset_id]
},
}
# Occasionally, there are duplicate items returned. This is one
# way to remove the duplicates.
Expand Down
3 changes: 2 additions & 1 deletion tests/resources/utils/example_metadata.nd.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@
"pipeline_version": null,
"schema_version": "0.1.0"
},
"quality_control": null,
"rig": null,
"schema_version": "0.2.7",
"schema_version": "1.0.0",
"session": null,
"subject": {
"background_strain": null,
Expand Down
3 changes: 2 additions & 1 deletion tests/resources/utils/example_metadata1.nd.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@
"pipeline_version": null,
"schema_version": "0.2.5"
},
"quality_control": null,
"rig": null,
"schema_version": "0.2.7",
"schema_version": "1.0.0",
"session": null,
"subject": {
"background_strain": null,
Expand Down
3 changes: 2 additions & 1 deletion tests/resources/utils/example_metadata2.nd.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,9 @@
},
"schema_version": "0.3.1"
},
"quality_control": null,
"rig": null,
"schema_version": "0.2.7",
"schema_version": "1.0.0",
"session": null,
"subject": {
"background_strain": null,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_aind_bucket_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_write_root_file_with_record_info_same_hash(
"last_modified": datetime(
2024, 5, 15, 17, 41, 28, tzinfo=timezone.utc
),
"e_tag": '"e6dd2b7ab819f7a0fc21dba512a4071b"',
"e_tag": '"275d922d2a1e547f2e0f35b5cc54f493"',
"version_id": "version_id",
},
prefix="ecephys_642478_2023-01-17_13-56-29",
Expand Down
6 changes: 3 additions & 3 deletions tests/test_codeocean_bucket_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def setUpClass(cls) -> None:
2024, 6, 12, 21, 21, 28, tzinfo=timezone.utc
),
"external_links": {
"Code Ocean": "11ee1e1e-11e1-1111-1111-e11eeeee1e11"
"Code Ocean": ["11ee1e1e-11e1-1111-1111-e11eeeee1e11"]
},
},
{
Expand All @@ -69,7 +69,7 @@ def setUpClass(cls) -> None:
2024, 6, 12, 19, 45, 59, tzinfo=timezone.utc
),
"external_links": {
"Code Ocean": "666666cc-66cc-6c66-666c-6c66c6666666"
"Code Ocean": ["666666cc-66cc-6c66-666c-6c66c6666666"]
},
},
]
Expand All @@ -79,10 +79,10 @@ def setUpClass(cls) -> None:
"ecephys_642478_2023-01-17_13-56-29/instrument.json": None,
"ecephys_642478_2023-01-17_13-56-29/procedures.json": None,
"ecephys_642478_2023-01-17_13-56-29/processing.json": None,
"ecephys_642478_2023-01-17_13-56-29/quality_control.json": None,
"ecephys_642478_2023-01-17_13-56-29/rig.json": None,
"ecephys_642478_2023-01-17_13-56-29/session.json": None,
"ecephys_642478_2023-01-17_13-56-29/subject.json": None,
"ecephys_642478_2023-01-17_13-56-29/mri_session.json": None,
}
cls.example_docdb_records = [
{
Expand Down
32 changes: 16 additions & 16 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ def load_json_file(filename: str) -> dict:
"instrument",
"procedures",
"processing",
"quality_control",
"rig",
"session",
"subject",
"mri_session",
]
cls.example_core_files = example_core_files
example_pages = load_json_file("example_pages_response.json")
Expand Down Expand Up @@ -187,7 +187,7 @@ def test__log_message_false(self, mock_log: MagicMock):
def test_compute_md5_hash(self):
"""Tests compute_md5_hash method"""
md5_hash = compute_md5_hash(json.dumps(self.example_metadata_nd))
self.assertEqual("e6dd2b7ab819f7a0fc21dba512a4071b", md5_hash)
self.assertEqual("275d922d2a1e547f2e0f35b5cc54f493", md5_hash)

def test_is_dict_corrupt(self):
"""Tests is_dict_corrupt method"""
Expand Down Expand Up @@ -746,6 +746,7 @@ def test_build_metadata_record_from_prefix(
"e_tag": '"f4827f025e79bafeb6947e14c4e3b51a"',
"version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_",
},
"ecephys_642478_2023-01-17_13-56-29/quality_control.json": None,
"ecephys_642478_2023-01-17_13-56-29/rig.json": None,
"ecephys_642478_2023-01-17_13-56-29/session.json": None,
"ecephys_642478_2023-01-17_13-56-29/subject.json": {
Expand All @@ -755,7 +756,6 @@ def test_build_metadata_record_from_prefix(
"e_tag": '"92734946c64fc87408ef79e5e92937bc"',
"version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR",
},
"ecephys_642478_2023-01-17_13-56-29/mri_session.json": None,
}
mock_download_json_file.side_effect = [
self.example_processing,
Expand Down Expand Up @@ -811,10 +811,10 @@ def test_build_metadata_record_from_prefix_with_optional_fields(
"ecephys_642478_2023-01-17_13-56-29/instrument.json": None,
"ecephys_642478_2023-01-17_13-56-29/procedures.json": None,
"ecephys_642478_2023-01-17_13-56-29/processing.json": None,
"ecephys_642478_2023-01-17_13-56-29/quality_control.json": None,
"ecephys_642478_2023-01-17_13-56-29/rig.json": None,
"ecephys_642478_2023-01-17_13-56-29/session.json": None,
"ecephys_642478_2023-01-17_13-56-29/subject.json": None,
"ecephys_642478_2023-01-17_13-56-29/mri_session.json": None,
}
# noinspection PyTypeChecker
md = json.loads(
Expand All @@ -824,15 +824,15 @@ def test_build_metadata_record_from_prefix_with_optional_fields(
s3_client=mock_s3_client,
optional_name="ecephys_642478_2023-01-17_13-56-29",
optional_created=datetime(2020, 1, 2, 3, 4, 5),
optional_external_links=[{"Code Ocean": "123-456"}],
optional_external_links={"Code Ocean": ["123-456"]},
)
)
mock_get_dict_of_file_info.assert_called_once()
mock_download_json_file.assert_not_called()
self.assertEqual("s3://code-ocean-bucket/abc-123", md["location"])
self.assertEqual("ecephys_642478_2023-01-17_13-56-29", md["name"])
self.assertEqual("2020-01-02T03:04:05", md["created"])
self.assertEqual([{"Code Ocean": "123-456"}], md["external_links"])
self.assertEqual({"Code Ocean": ["123-456"]}, md["external_links"])

@patch("aind_data_asset_indexer.utils.Metadata.model_construct")
@patch("boto3.client")
Expand All @@ -859,6 +859,7 @@ def test_build_metadata_record_from_prefix_error(
"e_tag": '"f4827f025e79bafeb6947e14c4e3b51a"',
"version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_",
},
"ecephys_642478_2023-01-17_13-56-29/quality_control.json": None,
"ecephys_642478_2023-01-17_13-56-29/rig.json": None,
"ecephys_642478_2023-01-17_13-56-29/session.json": None,
"ecephys_642478_2023-01-17_13-56-29/subject.json": {
Expand All @@ -868,7 +869,6 @@ def test_build_metadata_record_from_prefix_error(
"e_tag": '"92734946c64fc87408ef79e5e92937bc"',
"version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR",
},
"ecephys_642478_2023-01-17_13-56-29/mri_session.json": None,
}
mock_download_json_file.side_effect = [
self.example_processing,
Expand Down Expand Up @@ -949,6 +949,7 @@ def test_sync_core_json_files(
"e_tag": f'"{md5_hash_processing_unchanged}"',
"version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_",
},
f"{pfx}/quality_control.json": None,
f"{pfx}/rig.json": {
"last_modified": datetime(
2023, 11, 4, 1, 13, 41, tzinfo=timezone.utc
Expand All @@ -964,7 +965,6 @@ def test_sync_core_json_files(
"e_tag": f'"{md5_hash_subject_unchanged}"',
"version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR",
},
f"{pfx}/mri_session.json": None,
}
mock_upload_core_record.return_value = "mock_upload_response"
mock_s3_client.delete_object.return_value = "mock_delete_response"
Expand Down Expand Up @@ -1020,6 +1020,10 @@ def test_sync_core_json_files(
f"processing is up-to-date in {s3_loc}/processing.json. "
"Skipping."
),
(
f"quality_control not found in metadata.nd.json for {pfx} nor "
f"in {s3_loc}/quality_control.json! Skipping."
),
(
f"rig not found in metadata.nd.json for {pfx} but {s3_loc}/"
"rig.json exists! Deleting."
Expand All @@ -1030,10 +1034,6 @@ def test_sync_core_json_files(
f"{s3_loc}/session.json! Skipping."
),
f"subject is up-to-date in {s3_loc}/subject.json. Skipping.",
(
f"mri_session not found in metadata.nd.json for {pfx} nor in "
f"{s3_loc}/mri_session.json! Skipping."
),
]
actual_log_messages = [
c[1]["message"] for c in mock_log_message.call_args_list
Expand Down Expand Up @@ -1072,6 +1072,7 @@ def test_cond_copy_then_sync_core_json_files(
"e_tag": '"7ebb961de9e9b00accfd1358e4561ec1"',
"version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_",
},
f"{pfx}/quality_control.json": None,
f"{pfx}/rig.json": None,
f"{pfx}/session.json": None,
f"{pfx}/subject.json": {
Expand All @@ -1081,7 +1082,6 @@ def test_cond_copy_then_sync_core_json_files(
"e_tag": '"8b8cd50a6cf1f3f667be98a69db2ad89"',
"version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR",
},
f"{pfx}/mri_session.json": None,
}
cond_copy_then_sync_core_json_files(
metadata_json=json.dumps(self.example_metadata_nd),
Expand Down Expand Up @@ -1164,6 +1164,7 @@ def test_cond_copy_then_sync_core_json_files_mismatch(
"e_tag": '"7ebb961de9e9b00accfd1358e4561ec1"',
"version_id": "jWWT0Xrb8_nE9t5C.nTlLElpYJoURbv_",
},
f"{pfx}/quality_control.json": None,
f"{pfx}/rig.json": {
"last_modified": datetime(
2022, 5, 5, 1, 13, 41, tzinfo=timezone.utc
Expand All @@ -1179,7 +1180,6 @@ def test_cond_copy_then_sync_core_json_files_mismatch(
"e_tag": '"8b8cd50a6cf1f3f667be98a69db2ad89"',
"version_id": "XS0p7m6wWNTHG_F3P76D7AUXtE23BakR",
},
f"{pfx}/mri_session.json": None,
}

cond_copy_then_sync_core_json_files(
Expand Down Expand Up @@ -1436,7 +1436,7 @@ def test_get_all_processed_codeocean_asset_records(
2024, 6, 12, 21, 21, 28, tzinfo=timezone.utc
),
"external_links": {
"Code Ocean": "11ee1e1e-11e1-1111-1111-e11eeeee1e11"
"Code Ocean": ["11ee1e1e-11e1-1111-1111-e11eeeee1e11"]
},
},
"s3://some_co_bucket/666666cc-66cc-6c66-666c-6c66c6666666": {
Expand All @@ -1451,7 +1451,7 @@ def test_get_all_processed_codeocean_asset_records(
2024, 6, 12, 19, 45, 59, tzinfo=timezone.utc
),
"external_links": {
"Code Ocean": "666666cc-66cc-6c66-666c-6c66c6666666"
"Code Ocean": ["666666cc-66cc-6c66-666c-6c66c6666666"]
},
},
}
Expand Down

0 comments on commit 6d86ca9

Please sign in to comment.