Skip to content

Commit

Permalink
Merge branch 'master' into DAR-4038
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Oct 23, 2024
2 parents a465329 + 27c2fde commit c549ccc
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 13 deletions.
15 changes: 11 additions & 4 deletions darwin/dataset/upload_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,18 +452,20 @@ def skip_existing_full_remote_filepaths(self) -> None:
local_files_to_remove.extend(multi_file_item.files)
multi_file_items_to_remove.append(multi_file_item)
console.print(
f"The remote filepath {multi_file_item.full_path} is already occupied by a dataset item in the {self.dataset.slug} dataset. Skipping upload of item.",
f"The remote filepath {multi_file_item.full_path} is already occupied by a dataset item in the `{self.dataset.slug}` dataset. Skipping upload of item.",
style="warning",
)
if self.local_files:
for local_file in self.local_files:
if Path(local_file.full_path) in full_remote_filepaths:
if (
Path(local_file.full_path) in full_remote_filepaths
and local_file not in local_files_to_remove
):
local_files_to_remove.append(local_file)
console.print(
f"The remote filepath {local_file.full_path} already exists in the {self.dataset.slug} dataset. Skipping upload of item.",
f"The remote filepath {local_file.full_path} already exists in the `{self.dataset.slug}` dataset. Skipping upload of item.",
style="warning",
)

self.local_files = [
local_file
for local_file in self.local_files
Expand All @@ -476,6 +478,11 @@ def skip_existing_full_remote_filepaths(self) -> None:
if multi_file_item not in multi_file_items_to_remove
]

if not self.local_files and not self.multi_file_items:
raise ValueError(
"All items to be uploaded have paths that already exist in the remote dataset. No items to upload."
)

def prepare_upload(
self,
) -> Optional[Iterator[Callable[[Optional[ByteReadCallback]], None]]]:
Expand Down
7 changes: 4 additions & 3 deletions darwin/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2115,12 +2115,13 @@ def _display_slot_warnings_and_errors(
Raises
------
TypeError
If there are any warnings generated and the annotation format is not Darwin JSON 2.0
If there are any warnings generated and the annotation format is not Darwin JSON 2.0 or NifTI
"""

# Warnings can only be generated by referring to slots, which is only supported by Darwin JSON
# Warnings can only be generated by referring to slots, which is only supported by the Darwin JSON & NiFTI formats
# Therefore, stop imports of all other formats if there are any warnings
if (slot_errors or slot_warnings) and annotation_format != "darwin":
supported_formats = ["darwin", "nifti"]
if (slot_errors or slot_warnings) and annotation_format not in supported_formats:
raise TypeError(
"You are attempting to import annotations to multi-slotted or multi-channeled items using an annotation format that doesn't support them. To import annotations to multi-slotted or multi-channeled items, please use the Darwin JSON 2.0 format: https://docs.v7labs.com/reference/darwin-json"
)
Expand Down
2 changes: 1 addition & 1 deletion darwin/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem":
"current_workflow_id": raw.get("workflow_data", {}).get("workflow_id"),
"current_workflow": raw.get("workflow_data"),
"slots": raw["slots"],
"layout": raw["layout"],
"layout": raw.get("layout"),
}
else:
data = {
Expand Down
2 changes: 1 addition & 1 deletion darwin/version/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.9"
__version__ = "1.0.10"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ license = "MIT"
name = "darwin-py"
readme = "README.md"
repository = "https://github.com/v7labs/darwin-py"
version = "1.0.9"
version = "1.0.10"
[[tool.poetry.packages]]
include = "darwin"

Expand Down
73 changes: 73 additions & 0 deletions tests/darwin/dataset/remote_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,47 @@ def files_content() -> Dict[str, Any]:
}


@pytest.fixture()
def unprocessed_file_content() -> Dict[str, Any]:
return {
"items": [
{
"id": "018c6826-766c-d596-44b3-46159c7c23bc",
"name": "segment_1.mp4",
"priority": 0,
"status": "processing",
"path": "/",
"tags": [],
"cursor": "018c6826-766c-d596-44b3-46159c7c23bc",
"uploads": [
{
"type": "video",
"file_name": "segment_1.mp4",
"processing_status": "processing",
"slot_name": "0",
"fps": 25,
"upload_id": "5404aafe-6434-44cb-8bc2-4b3758466bbf",
"as_frames": False,
}
],
"slots": [],
"inserted_at": "2023-12-14T11:46:40Z",
"updated_at": "2023-12-14T11:46:40Z",
"dataset_id": 611387,
"archived": False,
"processing_status": "processing",
"workflow_status": "new",
"slot_types": ["video"],
}
],
"page": {
"count": 2,
"next": None,
"previous": "018c6826-766c-d596-44b3-46159c7c23bc",
},
}


# This test was never actually running
# TODO: Fix this test
# class TestDatasetCreation:
Expand Down Expand Up @@ -599,6 +640,38 @@ def test_fetches_files_with_commas(
== "example,with, comma.mp4"
)

@responses.activate
def test_returns_unprocessed_files(
self,
darwin_client: Client,
dataset_name: str,
dataset_slug: str,
team_slug_darwin_json_v2: str,
unprocessed_file_content: dict,
):
remote_dataset = RemoteDatasetV2(
client=darwin_client,
team=team_slug_darwin_json_v2,
name=dataset_name,
slug=dataset_slug,
dataset_id=1,
)
url = "http://localhost/api/v2/teams/v7-darwin-json-v2/items?item_names%5B%5D=example%2Cwith%2C+comma.mp4&page%5Bsize%5D=500&include_workflow_data=true&dataset_ids%5B%5D=1"
responses.add(
responses.GET,
url,
json=unprocessed_file_content,
status=200,
)
filters = {"item_names": ["example,with, comma.mp4"]}

list(remote_dataset.fetch_remote_files(filters))

assert (
responses.calls[0].request.params["item_names[]"]
== "example,with, comma.mp4"
)


@pytest.mark.usefixtures("file_read_write_test")
class TestFetchRemoteClasses:
Expand Down
31 changes: 28 additions & 3 deletions tests/darwin/dataset/upload_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def test_request_upload_is_not_called_on_init(
dataset: RemoteDataset, request_upload_endpoint: str
):
with patch.object(dataset, "fetch_remote_files", return_value=[]):
upload_handler = UploadHandler.build(dataset, [])
with patch.object(
UploadHandler, "skip_existing_full_remote_filepaths", return_value=[]
):
upload_handler = UploadHandler.build(dataset, [])

assert upload_handler.pending_count == 0
assert upload_handler.blocked_count == 0
Expand Down Expand Up @@ -446,7 +449,7 @@ def test_skip_existing_full_remote_filepaths_with_local_files():
assert local_file_2 in upload_handler.local_files

mock_print.assert_any_call(
"The remote filepath /existing_file_1.jpg already exists in the test-dataset dataset. Skipping upload of item.",
"The remote filepath /existing_file_1.jpg already exists in the `test-dataset` dataset. Skipping upload of item.",
style="warning",
)

Expand Down Expand Up @@ -475,6 +478,28 @@ def test_skip_existing_full_remote_filepaths_with_multi_file_items():

# Verify that the correct warning was printed
mock_print.assert_any_call(
"The remote filepath /existing_multi_file_item.jpg is already occupied by a dataset item in the test-dataset dataset. Skipping upload of item.",
"The remote filepath /existing_multi_file_item.jpg is already occupied by a dataset item in the `test-dataset` dataset. Skipping upload of item.",
style="warning",
)


def test_skip_existing_full_remote_filepaths_raises_if_no_files_left():
mock_dataset = MagicMock()
mock_dataset.fetch_remote_files.return_value = [
MagicMock(full_path="/existing_multi_file_item_1.jpg"),
MagicMock(full_path="/existing_multi_file_item_2.jpg"),
]
mock_dataset.slug = "test-dataset"

multi_file_item_1 = MagicMock(
full_path="/existing_multi_file_item_1.jpg", files=[MagicMock()]
)
multi_file_item_2 = MagicMock(
full_path="/existing_multi_file_item_2.jpg", files=[MagicMock()]
)

with pytest.raises(
ValueError,
match="All items to be uploaded have paths that already exist in the remote dataset. No items to upload.",
):
UploadHandlerV2(mock_dataset, [], [multi_file_item_1, multi_file_item_2])
43 changes: 43 additions & 0 deletions tests/darwin/importer/importer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2591,6 +2591,49 @@ def test_does_not_raise_error_for_darwin_format_with_warnings():
assert not slot_errors


def test_does_not_raise_error_for_nifti_format_with_warnings():
bounding_box_class = dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
)
local_files = [
dt.AnnotationFile(
path=Path("file1"),
remote_path="/",
filename="file1",
annotation_classes={bounding_box_class},
annotations=[
dt.Annotation(
annotation_class=bounding_box_class,
data={"x": 5, "y": 10, "w": 5, "h": 10},
slot_names=[],
),
dt.Annotation(
annotation_class=bounding_box_class,
data={"x": 15, "y": 20, "w": 15, "h": 20},
slot_names=[],
),
],
),
]
remote_files = {
"/file1": {
"item_id": "123",
"slot_names": ["0", "1"],
"layout": {"type": "grid", "version": 1, "slots": ["0", "1"]},
},
}

local_files, slot_errors, slot_warnings = _verify_slot_annotation_alignment(
local_files,
remote_files,
)

console = MagicMock()
_display_slot_warnings_and_errors(slot_errors, slot_warnings, "nifti", console)

assert not slot_errors


@patch("darwin.importer.importer._get_team_properties_annotation_lookup")
@pytest.mark.parametrize("setup_data", ["section"], indirect=True)
def test_import_existing_section_level_property_values_without_manifest(
Expand Down

0 comments on commit c549ccc

Please sign in to comment.