Skip to content

Commit

Permalink
Unit test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Aug 29, 2024
1 parent 91dc524 commit cd806a9
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 29 deletions.
3 changes: 2 additions & 1 deletion darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,8 @@ def upload_data(
item_merge_mode : Optional[str]
If set, each file path passed to `files_to_upload` behaves as follows:
- Every path that points directly to a file is ignored
- Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
- Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
- "slots": Each file in the folder will be uploaded to a different slot of the same item.
- "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
- "channels": Each file in the folder will be uploaded to a different channel of the same item.
Expand Down
82 changes: 68 additions & 14 deletions darwin/dataset/remote_dataset_v2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -204,7 +205,8 @@ def push(
item_merge_mode: Optional[str], default: None
If set, each file path passed to `files_to_upload` behaves as follows:
- Every path that points directly to a file is ignored
- Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
- Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
- "slots": Each file in the folder will be uploaded to a different slot of the same item.
- "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
- "channels": Each file in the folder will be uploaded to a different channel of the same item.
Expand All @@ -229,7 +231,7 @@ def push(

if item_merge_mode:
try:
item_merge_mode = ItemMergeMode(item_merge_mode)
ItemMergeMode(item_merge_mode)
except ValueError:
raise ValueError(
f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels"
Expand All @@ -249,11 +251,11 @@ def push(
]

if item_merge_mode:
uploading_files = find_files_to_upload_merging(
uploading_files = _find_files_to_upload_merging(
search_files, files_to_exclude, item_merge_mode
)
else:
uploading_files = find_files_to_upload_no_merging(
uploading_files = _find_files_to_upload_no_merging(
search_files,
files_to_exclude,
path,
Expand All @@ -264,11 +266,6 @@ def push(
uploading_files,
)

if not uploading_files:
raise ValueError(
"No files to upload, check your path, exclusion filters and resume flag"
)

handler = UploadHandlerV2(self, uploading_files)
if blocking:
handler.upload(
Expand Down Expand Up @@ -857,11 +854,33 @@ def register_multi_slotted(
return results


def find_files_to_upload_merging(
def _find_files_to_upload_merging(
search_files: List[PathLike],
files_to_exclude: Optional[List[PathLike]],
files_to_exclude: List[PathLike],
item_merge_mode: str,
) -> List[MultiFileItem]:
"""
Finds files to upload as either:
- Multi-slotted items
- Multi-channel items
- Single-slotted items containing multiple `.dcm` files
Does not search each directory recursively, only considers files in the first level of each directory.
Parameters
----------
search_files : List[PathLike]
List of directories to search for files.
files_to_exclude : List[PathLike]
List of files to exclude from the file scan.
item_merge_mode : str
Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'.
Returns
-------
List[MultiFileItem]
List of files to upload.
"""
multi_file_items = []
for directory in search_files:
files_in_directory = list(
Expand All @@ -873,7 +892,9 @@ def find_files_to_upload_merging(
)
continue
multi_file_items.append(
MultiFileItem(directory, files_in_directory, item_merge_mode)
MultiFileItem(
Path(directory), files_in_directory, ItemMergeMode(item_merge_mode)
)
)
if not multi_file_items:
raise ValueError(
Expand All @@ -882,16 +903,43 @@ def find_files_to_upload_merging(
return multi_file_items


def find_files_to_upload_no_merging(
def _find_files_to_upload_no_merging(
search_files: List[PathLike],
files_to_exclude: Optional[List[PathLike]],
files_to_exclude: List[PathLike],
path: Optional[str],
fps: int,
as_frames: bool,
extract_views: bool,
preserve_folders: bool,
uploading_files: List[LocalFile],
) -> List[LocalFile]:
"""
Finds files to upload as single-slotted dataset items. Recursively searches the passed directories for files.
Parameters
----------
search_files : List[PathLike]
List of directories to search for files.
files_to_exclude : Optional[List[PathLike]]
List of files to exclude from the file scan.
path : Optional[str]
Path to store the files in.
fps: int
When uploading video files, specify the framerate.
as_frames: bool
When uploading video files, specify whether to upload as a list of frames.
extract_views: bool
When uploading volume files, specify whether to split into orthogonal views.
preserve_folders: bool
Specify whether or not to preserve folder paths when uploading.
uploading_files : List[LocalFile]
List of files to upload.
Returns
-------
List[LocalFile]
List of files to upload.
"""
generic_parameters_specified = (
path is not None or fps != 0 or as_frames is not False
)
Expand Down Expand Up @@ -919,4 +967,10 @@ def find_files_to_upload_no_merging(
path=local_path,
)
)

if not uploading_files:
raise ValueError(
"No files to upload, check your path, exclusion filters and resume flag"
)

return uploading_files
48 changes: 36 additions & 12 deletions darwin/dataset/upload_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Optional,
Set,
Tuple,
Union,
)

import requests
Expand Down Expand Up @@ -194,27 +195,46 @@ def full_path(self) -> str:


class MultiFileItem:
def __init__(
self, directory: PathLike, files: List[PathLike], merge_mode: ItemMergeMode
):
def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode):
self.directory = directory
self.name = directory.name
self.files = files
self.merge_mode = merge_mode
self.layout = self._create_layout()
self.temp = {"version": 2, "slots": ["1", "2", "3"], "type": "grid"}

def _create_layout(self):
# TODO
if (
self.merge_mode == ItemMergeMode.slots
or self.merge_mode == ItemMergeMode.series
):
"""
Creates the layout to be used when uploading the files:
- For multi-slotted items: LayoutV2
- For series items: LayoutV2, but only with `.dcm` files
- For multi-channel items: LayoutV3
Raises
------
ValueError
- If no DICOM files are found in the directory for ItemMergeMode.SEIRES items
- If the number of files is greater than 16 for ItemMergeMode.CHANNELS items
"""
if self.merge_mode == ItemMergeMode.SLOTS:
return {
"version": 2,
"slots": [str(i) for i in range(len(self.files))],
"type": "grid",
}
elif self.merge_mode == ItemMergeMode.SERIES:
self.files = [file for file in self.files if file.suffix.lower() == ".dcm"]
if not self.files:
raise ValueError("No DICOM files found in 1st level of directory")
return {
"version": 2,
"slots": [str(i) for i in range(len(self.files))],
"type": "grid", # Worth experimenting with - Is this the best option? Should we change this dynamically?
"type": "grid",
}
else:
elif self.merge_mode == ItemMergeMode.CHANNELS:
if len(self.files) > 16:
raise ValueError(
f"No multi-channel item can have more than 16 channels. The following directory has {len(self.files)} files: {self.directory}"
)
return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]}


Expand Down Expand Up @@ -434,7 +454,11 @@ def _upload_file(


class UploadHandlerV2(UploadHandler):
def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]):
def __init__(
self,
dataset: "RemoteDataset",
local_files: Union[List[LocalFile], List[MultiFileItem]],
):
super().__init__(dataset=dataset, local_files=local_files)

def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:
Expand Down
Binary file added tests/darwin/data/push_test_dir.zip
Binary file not shown.
Loading

0 comments on commit cd806a9

Please sign in to comment.