From 91dc524dae4f7b6f6dc28bf2ea01d068bb400860 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Tue, 27 Aug 2024 22:54:39 +0100
Subject: [PATCH 1/5] Added  to CLI & SDK  + created MultiFileItem parsing &
 layout building

---
 darwin/cli.py                               |   1 +
 darwin/cli_functions.py                     |   9 ++
 darwin/dataset/remote_dataset.py            |   1 +
 darwin/dataset/remote_dataset_v2.py         | 134 ++++++++++++++++----
 darwin/dataset/upload_manager.py            |  32 +++++
 darwin/options.py                           |   6 +
 tests/darwin/dataset/remote_dataset_test.py |  14 ++
 7 files changed, 169 insertions(+), 28 deletions(-)

diff --git a/darwin/cli.py b/darwin/cli.py
index bf493341a..c48f55e38 100644
--- a/darwin/cli.py
+++ b/darwin/cli.py
@@ -126,6 +126,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
                 args.extract_views,
                 args.preserve_folders,
                 args.verbose,
+                args.item_merge_mode,
             )
         # Remove a project (remotely)
         elif args.action == "remove":
diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
index 5d01d7a9d..f2b11999d 100644
--- a/darwin/cli_functions.py
+++ b/darwin/cli_functions.py
@@ -656,6 +656,7 @@ def upload_data(
     extract_views: bool = False,
     preserve_folders: bool = False,
     verbose: bool = False,
+    item_merge_mode: Optional[str] = None,
 ) -> None:
     """
     Uploads the provided files to the remote dataset.
@@ -684,6 +685,13 @@ def upload_data(
         Specify whether or not to preserve folder paths when uploading.
     verbose : bool
         Specify whether to have full traces print when uploading files or not.
+    item_merge_mode : Optional[str]
+        If set, each file path passed to `files_to_upload` behaves as follows:
+        - Every path that points directly to a file is ignored
+        - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
+            - "slots": Each file in the folder will be uploaded to a different slot of the same item.
+            - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
+            - "channels": Each file in the folder will be uploaded to a different channel of the same item.
     """
     client: Client = _load_client()
     try:
@@ -773,6 +781,7 @@ def file_upload_callback(
                 preserve_folders=preserve_folders,
                 progress_callback=progress_callback,
                 file_upload_callback=file_upload_callback,
+                item_merge_mode=item_merge_mode,
             )
         console = Console(theme=_console_theme())
 
diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py
index 154c26c85..34f642912 100644
--- a/darwin/dataset/remote_dataset.py
+++ b/darwin/dataset/remote_dataset.py
@@ -138,6 +138,7 @@ def push(
         preserve_folders: bool = False,
         progress_callback: Optional[ProgressCallback] = None,
         file_upload_callback: Optional[FileUploadCallback] = None,
+        item_merge_mode: Optional[str] = None,
     ) -> UploadHandler:
         pass
 
diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py
index 59f87b320..72241785f 100644
--- a/darwin/dataset/remote_dataset_v2.py
+++ b/darwin/dataset/remote_dataset_v2.py
@@ -18,7 +18,9 @@
 from darwin.dataset.release import Release
 from darwin.dataset.upload_manager import (
     FileUploadCallback,
+    ItemMergeMode,
     LocalFile,
+    MultiFileItem,
     ProgressCallback,
     UploadHandler,
     UploadHandlerV2,
@@ -166,6 +168,7 @@ def push(
         preserve_folders: bool = False,
         progress_callback: Optional[ProgressCallback] = None,
         file_upload_callback: Optional[FileUploadCallback] = None,
+        item_merge_mode: Optional[str] = None,
     ) -> UploadHandler:
         """
         Uploads a local dataset (images ONLY) in the datasets directory.
@@ -173,7 +176,8 @@ def push(
         Parameters
         ----------
         files_to_upload : Optional[List[Union[PathLike, LocalFile]]]
-            List of files to upload. Those can be folders.
+            List of files to upload. These can be folders.
+            If `item_merge_mode` is set, these must be folders.
         blocking : bool, default: True
             If False, the dataset is not uploaded and a generator function is returned instead.
         multi_threaded : bool, default: True
@@ -188,7 +192,7 @@ def push(
         extract_views: bool, default: False
             When the uploading file is a volume, specify whether it's going to be split into orthogonal views.
         files_to_exclude : Optional[PathLike]], default: None
-            Optional list of files to exclude from the file scan. Those can be folders.
+            Optional list of files to exclude from the file scan. These can be folders.
         path: Optional[str], default: None
             Optional path to store the files in.
         preserve_folders : bool, default: False
@@ -197,6 +201,13 @@ def push(
             Optional callback, called every time the progress of an uploading files is reported.
         file_upload_callback: Optional[FileUploadCallback], default: None
             Optional callback, called every time a file chunk is uploaded.
+        item_merge_mode: Optional[str], default: None
+            If set, each file path passed to `files_to_upload` behaves as follows:
+            - Every path that points directly to a file is ignored
+            - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
+                - "slots": Each file in the folder will be uploaded to a different slot of the same item.
+                - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
+                - "channels": Each file in the folder will be uploaded to a different channel of the same item.
 
         Returns
         -------
@@ -216,39 +227,41 @@ def push(
         if files_to_upload is None:
             raise ValueError("No files or directory specified.")
 
+        if item_merge_mode:
+            try:
+                item_merge_mode = ItemMergeMode(item_merge_mode)
+            except ValueError:
+                raise ValueError(
+                    f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels"
+                )
+
+        if item_merge_mode and preserve_folders:
+            raise TypeError("Cannot use `preserve_folders` with `item_merge_mode`")
+
+        # Direct file paths
         uploading_files = [
             item for item in files_to_upload if isinstance(item, LocalFile)
         ]
+
+        # Folder paths
         search_files = [
             item for item in files_to_upload if not isinstance(item, LocalFile)
         ]
 
-        generic_parameters_specified = (
-            path is not None or fps != 0 or as_frames is not False
-        )
-        if uploading_files and generic_parameters_specified:
-            raise ValueError("Cannot specify a path when uploading a LocalFile object.")
-
-        for found_file in find_files(search_files, files_to_exclude=files_to_exclude):
-            local_path = path
-            if preserve_folders:
-                source_files = [
-                    source_file
-                    for source_file in search_files
-                    if is_relative_to(found_file, source_file)
-                ]
-                if source_files:
-                    local_path = str(
-                        found_file.relative_to(source_files[0]).parent.as_posix()
-                    )
-            uploading_files.append(
-                LocalFile(
-                    found_file,
-                    fps=fps,
-                    as_frames=as_frames,
-                    extract_views=extract_views,
-                    path=local_path,
-                )
+        if item_merge_mode:
+            uploading_files = find_files_to_upload_merging(
+                search_files, files_to_exclude, item_merge_mode
+            )
+        else:
+            uploading_files = find_files_to_upload_no_merging(
+                search_files,
+                files_to_exclude,
+                path,
+                fps,
+                as_frames,
+                extract_views,
+                preserve_folders,
+                uploading_files,
             )
 
         if not uploading_files:
@@ -842,3 +855,68 @@ def register_multi_slotted(
                 print(f"  - {item}")
         print(f"Reistration complete. Check your items in the dataset: {self.slug}")
         return results
+
+
+def find_files_to_upload_merging(
+    search_files: List[PathLike],
+    files_to_exclude: Optional[List[PathLike]],
+    item_merge_mode: str,
+) -> List[MultiFileItem]:
+    multi_file_items = []
+    for directory in search_files:
+        files_in_directory = list(
+            find_files([directory], files_to_exclude=files_to_exclude, recursive=False)
+        )
+        if not files_in_directory:
+            print(
+                f"Warning: There are no uploading files in the first level of {directory}, skipping"
+            )
+            continue
+        multi_file_items.append(
+            MultiFileItem(directory, files_in_directory, item_merge_mode)
+        )
+    if not multi_file_items:
+        raise ValueError(
+            "No valid folders to upload after searching the passed directories for files"
+        )
+    return multi_file_items
+
+
+def find_files_to_upload_no_merging(
+    search_files: List[PathLike],
+    files_to_exclude: Optional[List[PathLike]],
+    path: Optional[str],
+    fps: int,
+    as_frames: bool,
+    extract_views: bool,
+    preserve_folders: bool,
+    uploading_files: List[LocalFile],
+) -> List[LocalFile]:
+    generic_parameters_specified = (
+        path is not None or fps != 0 or as_frames is not False
+    )
+    if uploading_files and generic_parameters_specified:
+        raise ValueError("Cannot specify a path when uploading a LocalFile object.")
+
+    for found_file in find_files(search_files, files_to_exclude=files_to_exclude):
+        local_path = path
+        if preserve_folders:
+            source_files = [
+                source_file
+                for source_file in search_files
+                if is_relative_to(found_file, source_file)
+            ]
+            if source_files:
+                local_path = str(
+                    found_file.relative_to(source_files[0]).parent.as_posix()
+                )
+        uploading_files.append(
+            LocalFile(
+                found_file,
+                fps=fps,
+                as_frames=as_frames,
+                extract_views=extract_views,
+                path=local_path,
+            )
+        )
+    return uploading_files
diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py
index bcd28f8f7..53fad692c 100644
--- a/darwin/dataset/upload_manager.py
+++ b/darwin/dataset/upload_manager.py
@@ -2,6 +2,7 @@
 import os
 import time
 from dataclasses import dataclass
+from enum import Enum
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -31,6 +32,12 @@
 from typing import Dict
 
 
+class ItemMergeMode(Enum):
+    SLOTS = "slots"
+    SERIES = "series"
+    CHANNELS = "channels"
+
+
 class ItemPayload:
     """
     Represents an item's payload.
@@ -186,6 +193,31 @@ def full_path(self) -> str:
         return construct_full_path(self.data["path"], self.data["filename"])
 
 
+class MultiFileItem:
+    def __init__(
+        self, directory: PathLike, files: List[PathLike], merge_mode: ItemMergeMode
+    ):
+        self.directory = directory
+        self.files = files
+        self.merge_mode = merge_mode
+        self.layout = self._create_layout()
+        self.temp = {"version": 2, "slots": ["1", "2", "3"], "type": "grid"}
+
+    def _create_layout(self):
+        # TODO
+        if (
+            self.merge_mode == ItemMergeMode.slots
+            or self.merge_mode == ItemMergeMode.series
+        ):
+            return {
+                "version": 2,
+                "slots": [str(i) for i in range(len(self.files))],
+                "type": "grid",  # Worth experimenting with - Is this the best option? Should we change this dynamically?
+            }
+        else:
+            return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]}
+
+
 class FileMonitor(object):
     """
     Monitors the progress of a :class:``BufferedReader``.
diff --git a/darwin/options.py b/darwin/options.py
index b6a292394..996c27b3a 100644
--- a/darwin/options.py
+++ b/darwin/options.py
@@ -183,6 +183,12 @@ def __init__(self) -> None:
             action="store_true",
             help="Preserve the local folder structure in the dataset.",
         )
+        parser_push.add_argument(
+            "--item-merge-mode",
+            type=str,
+            choices=["slots", "series", "channels"],
+            help="Specify the item merge mode: slots, series, or channels",
+        )
 
         # Remove
         parser_remove = dataset_action.add_parser(
diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
index c974ace0e..b0cf10276 100644
--- a/tests/darwin/dataset/remote_dataset_test.py
+++ b/tests/darwin/dataset/remote_dataset_test.py
@@ -659,6 +659,20 @@ def test_raises_with_unsupported_files(self, remote_dataset: RemoteDataset):
         with pytest.raises(UnsupportedFileType):
             remote_dataset.push(["test.txt"])
 
+    def test_raises_if_invalid_item_merge_mode(self, remote_dataset: RemoteDataset):
+        with pytest.raises(ValueError):
+            remote_dataset.push(["path/to/dir"], item_merge_mode="invalid")
+
+    def test_raises_if_preserve_folders_with_item_merge_mode(
+        self, remote_dataset: RemoteDataset
+    ):
+        with pytest.raises(TypeError):
+            remote_dataset.push(
+                ["path/to/dir"],
+                item_merge_mode="slots",
+                preserve_folders=True,
+            )
+
 
 @pytest.mark.usefixtures("file_read_write_test")
 class TestPull:

From cd806a9bde32159bcfb97b008c757f251d7758be Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Thu, 29 Aug 2024 18:00:39 +0100
Subject: [PATCH 2/5] Unit test coverage

---
 darwin/cli_functions.py                     |   3 +-
 darwin/dataset/remote_dataset_v2.py         |  82 +++++++++--
 darwin/dataset/upload_manager.py            |  48 +++++--
 tests/darwin/data/push_test_dir.zip         | Bin 0 -> 24374 bytes
 tests/darwin/dataset/remote_dataset_test.py | 146 +++++++++++++++++++-
 5 files changed, 250 insertions(+), 29 deletions(-)
 create mode 100644 tests/darwin/data/push_test_dir.zip

diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
index f2b11999d..ce00f6a54 100644
--- a/darwin/cli_functions.py
+++ b/darwin/cli_functions.py
@@ -688,7 +688,8 @@ def upload_data(
     item_merge_mode : Optional[str]
         If set, each file path passed to `files_to_upload` behaves as follows:
         - Every path that points directly to a file is ignored
-        - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
+        - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
+          Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
             - "slots": Each file in the folder will be uploaded to a different slot of the same item.
             - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
             - "channels": Each file in the folder will be uploaded to a different channel of the same item.
diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py
index 72241785f..eecc725d9 100644
--- a/darwin/dataset/remote_dataset_v2.py
+++ b/darwin/dataset/remote_dataset_v2.py
@@ -1,4 +1,5 @@
 import json
+from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -204,7 +205,8 @@ def push(
         item_merge_mode: Optional[str], default: None
             If set, each file path passed to `files_to_upload` behaves as follows:
             - Every path that points directly to a file is ignored
-            - Each folder of files passed to `files_to_upload` will be uploaded according to the following modes:
+            - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
+              Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
                 - "slots": Each file in the folder will be uploaded to a different slot of the same item.
                 - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
                 - "channels": Each file in the folder will be uploaded to a different channel of the same item.
@@ -229,7 +231,7 @@ def push(
 
         if item_merge_mode:
             try:
-                item_merge_mode = ItemMergeMode(item_merge_mode)
+                ItemMergeMode(item_merge_mode)
             except ValueError:
                 raise ValueError(
                     f"Invalid item merge mode: {item_merge_mode}. Valid options are: 'slots', 'series', 'channels"
@@ -249,11 +251,11 @@ def push(
         ]
 
         if item_merge_mode:
-            uploading_files = find_files_to_upload_merging(
+            uploading_files = _find_files_to_upload_merging(
                 search_files, files_to_exclude, item_merge_mode
             )
         else:
-            uploading_files = find_files_to_upload_no_merging(
+            uploading_files = _find_files_to_upload_no_merging(
                 search_files,
                 files_to_exclude,
                 path,
@@ -264,11 +266,6 @@ def push(
                 uploading_files,
             )
 
-        if not uploading_files:
-            raise ValueError(
-                "No files to upload, check your path, exclusion filters and resume flag"
-            )
-
         handler = UploadHandlerV2(self, uploading_files)
         if blocking:
             handler.upload(
@@ -857,11 +854,33 @@ def register_multi_slotted(
         return results
 
 
-def find_files_to_upload_merging(
+def _find_files_to_upload_merging(
     search_files: List[PathLike],
-    files_to_exclude: Optional[List[PathLike]],
+    files_to_exclude: List[PathLike],
     item_merge_mode: str,
 ) -> List[MultiFileItem]:
+    """
+    Finds files to upload as either:
+        - Multi-slotted items
+        - Multi-channel items
+        - Single-slotted items containing multiple `.dcm` files
+
+    Does not search each directory recursively, only considers files in the first level of each directory.
+
+    Parameters
+    ----------
+    search_files : List[PathLike]
+        List of directories to search for files.
+    files_to_exclude : List[PathLike]
+        List of files to exclude from the file scan.
+    item_merge_mode : str
+        Mode to merge the files in the folders. Valid options are: 'slots', 'series', 'channels'.
+
+    Returns
+    -------
+    List[MultiFileItem]
+        List of files to upload.
+    """
     multi_file_items = []
     for directory in search_files:
         files_in_directory = list(
@@ -873,7 +892,9 @@ def find_files_to_upload_merging(
             )
             continue
         multi_file_items.append(
-            MultiFileItem(directory, files_in_directory, item_merge_mode)
+            MultiFileItem(
+                Path(directory), files_in_directory, ItemMergeMode(item_merge_mode)
+            )
         )
     if not multi_file_items:
         raise ValueError(
@@ -882,9 +903,9 @@ def find_files_to_upload_merging(
     return multi_file_items
 
 
-def find_files_to_upload_no_merging(
+def _find_files_to_upload_no_merging(
     search_files: List[PathLike],
-    files_to_exclude: Optional[List[PathLike]],
+    files_to_exclude: List[PathLike],
     path: Optional[str],
     fps: int,
     as_frames: bool,
@@ -892,6 +913,33 @@ def find_files_to_upload_no_merging(
     preserve_folders: bool,
     uploading_files: List[LocalFile],
 ) -> List[LocalFile]:
+    """
+    Finds files to upload as single-slotted dataset items. Recursively searches the passed directories for files.
+
+    Parameters
+    ----------
+    search_files : List[PathLike]
+        List of directories to search for files.
+    files_to_exclude : Optional[List[PathLike]]
+        List of files to exclude from the file scan.
+    path : Optional[str]
+        Path to store the files in.
+    fps: int
+        When uploading video files, specify the framerate.
+    as_frames: bool
+        When uploading video files, specify whether to upload as a list of frames.
+    extract_views: bool
+        When uploading volume files, specify whether to split into orthogonal views.
+    preserve_folders: bool
+        Specify whether or not to preserve folder paths when uploading.
+    uploading_files : List[LocalFile]
+        List of files to upload.
+
+    Returns
+    -------
+    List[LocalFile]
+        List of files to upload.
+    """
     generic_parameters_specified = (
         path is not None or fps != 0 or as_frames is not False
     )
@@ -919,4 +967,10 @@ def find_files_to_upload_no_merging(
                 path=local_path,
             )
         )
+
+    if not uploading_files:
+        raise ValueError(
+            "No files to upload, check your path, exclusion filters and resume flag"
+        )
+
     return uploading_files
diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py
index 53fad692c..855d9e415 100644
--- a/darwin/dataset/upload_manager.py
+++ b/darwin/dataset/upload_manager.py
@@ -14,6 +14,7 @@
     Optional,
     Set,
     Tuple,
+    Union,
 )
 
 import requests
@@ -194,27 +195,46 @@ def full_path(self) -> str:
 
 
 class MultiFileItem:
-    def __init__(
-        self, directory: PathLike, files: List[PathLike], merge_mode: ItemMergeMode
-    ):
+    def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode):
         self.directory = directory
+        self.name = directory.name
         self.files = files
         self.merge_mode = merge_mode
         self.layout = self._create_layout()
-        self.temp = {"version": 2, "slots": ["1", "2", "3"], "type": "grid"}
 
     def _create_layout(self):
-        # TODO
-        if (
-            self.merge_mode == ItemMergeMode.slots
-            or self.merge_mode == ItemMergeMode.series
-        ):
+        """
+        Creates the layout to be used when uploading the files:
+        - For multi-slotted items: LayoutV2
+        - For series items: LayoutV2, but only with `.dcm` files
+        - For multi-channel items: LayoutV3
+
+        Raises
+        ------
+        ValueError
+            - If no DICOM files are found in the directory for ItemMergeMode.SEIRES items
+            - If the number of files is greater than 16 for ItemMergeMode.CHANNELS items
+        """
+        if self.merge_mode == ItemMergeMode.SLOTS:
+            return {
+                "version": 2,
+                "slots": [str(i) for i in range(len(self.files))],
+                "type": "grid",
+            }
+        elif self.merge_mode == ItemMergeMode.SERIES:
+            self.files = [file for file in self.files if file.suffix.lower() == ".dcm"]
+            if not self.files:
+                raise ValueError("No DICOM files found in 1st level of directory")
             return {
                 "version": 2,
                 "slots": [str(i) for i in range(len(self.files))],
-                "type": "grid",  # Worth experimenting with - Is this the best option? Should we change this dynamically?
+                "type": "grid",
             }
-        else:
+        elif self.merge_mode == ItemMergeMode.CHANNELS:
+            if len(self.files) > 16:
+                raise ValueError(
+                    f"No multi-channel item can have more than 16 channels. The following directory has {len(self.files)} files: {self.directory}"
+                )
             return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]}
 
 
@@ -434,7 +454,11 @@ def _upload_file(
 
 
 class UploadHandlerV2(UploadHandler):
-    def __init__(self, dataset: "RemoteDataset", local_files: List[LocalFile]):
+    def __init__(
+        self,
+        dataset: "RemoteDataset",
+        local_files: Union[List[LocalFile], List[MultiFileItem]],
+    ):
         super().__init__(dataset=dataset, local_files=local_files)
 
     def _request_upload(self) -> Tuple[List[ItemPayload], List[ItemPayload]]:
diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip
new file mode 100644
index 0000000000000000000000000000000000000000..2a09ce77336d274582539635428692163674766f
GIT binary patch
literal 24374
zcmd^{c|4Tg_rOQi5E3GL*|#xcPo*qvh$2a8EZNs=5tXsEpv6av6sakNh*Gj;D<u`}
zNrh4hrH?En`OQP~Wq3T_JI(XukKgSzkzTL+zUSU^&pqedbD0}6GVx)c#uh8Oarm1*
ze{o?XFusBQu7m(5{{X^PH$Uup%UKxK+|zZ=)#T3|@+UBu1H-`7!+^mI_hCNV7aN8R
z!#AccCP55l?u5Qn3@r#20X}|C!=2tGcSiY)>$G#j{R97)^DPOeyAh~vpZ3kgsO8!!
zj?RFFS{9GjLUZv#rpqnT0&LFn@wD-zyFDs-*Wgmm+BGu{+Y}od{+RPgP@J&FgTWK`
zwtx9?Lr<Jx2gA}f6J_S5GcYVdv08S@g88ZD^VjT;jyi+6<XG1%8r5Fi($v(y=g{J!
z{o+Yx>GsV(;_WbVE>uVcH<j^qGVI8#V`g+@v01`oSLxOuZl{+!<Ndp<75aojR-ENk
znT#qY%V$<;uj9CJ;C{52`emE)xOwW$$_=<U+*#sahgMaVf{0yg-B(8|ZkJhU2TQI5
zgo$sMTfsKCw(vHaGKYUczS8!-xS&0Yi}&+wsMY>%TCG;!_wu`_Zk&6*kG`96anR0W
zuPdUxHsT?<VnX%m9fIX+AL({2s@~iX6XMF<k`geG`*C?pCs)MPvTH2I|NR`@X|tvA
z+xcvMvDO(`wKa=Bm3yAQ`niaIqgzJz{zVo+Po0gtziNK$#us&8E*vV>3X;j*p7L5@
z+3C<Ak5;i=cfQ_XA2_g2F{=2nkiw3)$^9FD-%I}SqN+V;)r|8`O5cil5XAW3Jv+B;
zccS<!Y;9-;YxcVvm6rV+H&6UC_*ned;F6j*-d7d;O17A7^~rQMdh^P{%5wJ*$J2-1
z!ZzuJHkKu~|IID_EWU}*XSP2hBjoAxTd^4)(IFcw-?Fjiow}Ext*xCcmCzSyk~JHP
z7m(XBU%+33H}}uKatxJt)g1O&%RJjVSLm^^@#)GvOVv06^k+*sBv}t!Qug=VEad4V
zCH;0tHu>R@h=|A#jwv^DyYA&5R}Y1GIC=UUWxo9TAf{C&wI$T|u;Lt#s@S+6;tX5|
zZbpln8?&-4mU^HRJcF#=WECD!ZfI|C?4#Z&b0#P^d9&b0c1z#MI_D~~azmeSiVbzh
zPTngROim1Shujl7#0~F~(4;zN=mXmsE7a3}+URI|Kt_uj8EsAq;%G<g#Q4#QP8jX5
z_})GQXE#qL|34E!88__MDJF<{jM;!hk%sR&z{iK+W$ztAaCEiz_IC309~ph-pLw1X
z0cV*+;1S^F=~(WXN>!eihmr>2(h`RR7w2jz8egcGV_6$56CKfbem>tm2KhOQn7&rl
zmibG3V>#K*xcRB_d4r2hyY3D){?r@jP~XOhIisU64}-%P+6j7xTg*Np&s-&{<j`2n
zEZ3&b9Dl~I@0LTZ)yki5Z1*rE*2Sm|{Jy_FUFnFgR%Z8dMeKk<=7K$$1m}$_j);23
zALtYRvg6a^W+j*APa?gGmaqAco*H~E^~EPW?}w`tav!ID4IqhE@?UFJ)wgOZF8A@u
z3T)=veK*a;?8Bz+_fLBDQw2lM<n&=#kIC#jSm2OWaM$S2Pe+dIZ5O{g^wevFZ^}yZ
z-z;uNim2Jb!d>)kUZ~+q<JrZFzbdFn=iG59Us=7|L5G*6{==gc&bii^DuZE%suHb*
z+1<JKrt#02BaM@O784Q1fRkEYbzE+re6A*G&r$NOJ$;z@l)0fvV%7H*7S6f9<DYRl
zlnjaAUov#XI!jM!w)3T|nW?N1@}Ekq9CUuLVftb*Aw5>btb{GAO!nnV-ekLMee>LQ
zXvgMtXNklzjNP&07d@D)SF+?qClE}R8JJmEW2w)$Vck_Bz^{MZ#9i!Y$M5K3=U~@m
zYGT#JZMD}te8wR`Mp+(VhFoEu$bSB%wHS+LiL!7;?U*R0Wl8+1wr!9tVP)gkTc)i+
z9t$*{$xQ(tyn51M4`|%Je=Gyg_8o5+rQiqe=j0gZ=kFHeMDTaA_j7awvXp#eOF7Q3
zki%eLcP|bLzj}~^G0p~e+Sq#Xt>;_f;%e_4?zhP<{M_f&MyC@~zZABLU+)x<GdFfM
zHomA-q-&sy*S*qF>%DfJmjuh%3-xW0+&uBi!^)ZBm++kFI@5nab9sx|6IS*53*YWW
zv6iU$A2YfB$|{7I_#@%d%g~29+aHvC)b;QC!iTvmFZ-O4i6NYIL&8RL-bF>X!yWUE
zZdp0V@W@Vv^}fkEwS;rk|29|d;TB&K*64g%u|Mg$7AN#Q@p3BnGMTd`Tt&P|DUNxy
z?+lJ};)R_2UL3j8p)GUo`1>R&6PBbnyiUjJ-a7rD{-r@KTV?r%fA%{^exPgHhVmsv
zW~@S%D~W3ZcHKN)>zd(q$##I>?{Q5{`okudF0~&kbXnrcxfJbNYtI|@hQI!-&Z)ey
z|K!Rq7Dvjplb(CFS?NDLlJ)L%4ZFstcB{abf}VK4LQk<SxnSbl7AfCf!Gk}2Nh;QF
zdFEwsh~*rNn)f+Ec>XK{gBf}^o$sG7`L@8uMr%z``CKs;)go7`hjSmU*0iumSibKT
z`=y(u3!F%5*>5ZI9CkMBz4gMcYtv1ULkA4?+(bMUn7RKW)b&P|?bgq*6;{tVOT-&p
z6u*6z;F?+IW+oyQ$thQ-7yU^6lS#?N<R=A&3uSx0oi7M`sb;|F{quKZYMAjZ=?khC
z&1NsIxtMRH;dUVvZ`f6PH`&juukHQ9F8BLkSBiKV<16&TY^~Xros<s@+V^_fx+CdV
zydGL-q$Lj|X038MS&RErYhw{pDx3M@=a<B^)<=axvPUjP9j$SjY3%moDp$|UTOGNH
zjz<SCK3C7Py5cA(E9IgP%I}{3;fP?qW~k-DlU|xRuGw<Mx*oflN>8+A>bGlZdB6H`
z=T66d9a7$EzR*vPJ|Ep-y*|%2GQ)FLhuLv?qhsq!ZtCWoa9?M!u*c9w@*|Udlx1y_
zvhpE;Z={&7ccq`6)p@$3QOJE;zEtmB^%zZqYpnm=Wi4)Ujd+?6Q1(pSh5zS<4<YFT
z#B&KR+#Z>)iV(`az<=w(0`;CMzQ&ZN9)=f<jivbG??ki<>{^*v*!_h!!8Xf(kY(-X
zv{kDgCmlaxU0rzH^9U)j_tbV-lg!0u+tf1@J9<s;Cp_`slFC~>AmJo_qvq%J$Gc=T
zEFFvT?_@Y_z4$0;NB)!B-2<JH!cH3xiyrJ<e2sN&Px0y>YbyQR%u1U=N@Z-?vu|FB
z(l-rIzQ@WPYo{@I^akTkxx;v0-Zhm%+8iI74GLzYR($lGRk}yKX+{6cUD0oww)Zps
zUS7p6Wxa2Gf#9)bzs*F+&3lA>7kF0)UTaZljQ1+w9_W&|{f#Fhn9ua8>K?Ne`7nXh
z%E$c*rO`g`bi!D%P13*E^lsnl7ups1wq<*;=<eHuE^Kl6df%wb_ZbB@HfwCxyIwWx
zgreoThpv9x6s!4|Ll^octuvba;+m~z$%6eY4NBKJKRig@ontFJEBTC8$a~(LG7W5`
zXv2Z|MFyK>tBt~h4+|)5=Cf9MV{zeJ%e?k82WoI{WR#PdZCGzfFKX45-=1uI%W09*
z!lW;n9*lvDE-6>&m~JyYz2!8&;rSc)8<rY2%`A-XX<1Hu#w+iiys2srv&}80M&gMX
zxRj%eH<*uZ36J#^%C2B@`V?66P_J41y}X(<XG>&aQhovN27AVLADlhQy$ULuU1Wpo
zwR-Ka=gn_dzA)1{5vYBlI^9>$tZ7x)JVM#rZ_;5xgmSfD0YbG_muXEycfVLEp{nYX
zkx+^HiG6QuuXW}p7q!Z9gz(1J>$-enQT-w&bRr^j>xJ*+uf7L4L;ksJgw3sz^x<8-
z$>excUBN0gvvR2@V_C^X_PiBuq(nUBbF1wC{BKvms=bL^+Iai2**;~}HfHg&WLstx
zTs7JkzO&KU#NVjy_ux&ZUwuPM!upR_l;v5k$eQV0$|J%!aJ^Tego|Iqu4OL6$pLPI
zdm9POx*eyw@nw2L10RQqe*GH4LdPiIa}KjoX7YhgJ`F~l-tmVV>`8>fZgq3?@$x6w
zdv7JU`#QP&@gsrGy72@dxU&w2bqjFvQX4to(C(HN$C`%GYW#0TON$@zeO$;g4}NY(
zR&QjuS;&zKe4GXAT;Z_zzX`VB2?}r{$73f3Fxo2azAn?i)lpe(z5+e2BB;DLb#w&=
z2Ww21#9Gq@)|{@X#!nYmZMvo!H(g-WX_{*MbV-b#Hi@TAt5H59z)dE~hXnAUi1JJW
zb8&yWnMRTnW78l|=2#j2P@Iqkq0U-BnPW=!czo94xGsCeQa+=vu?&pl2>O9Q0JA|I
ze*r_idIXOu$*i?-^NHIx+$D1Bme+^cY2O-<a&!K`9Nrp;V-yMJtk-rp6qmW;QLb%h
zhi_Tdg=+(*I*lHes|ESpJ<S8!@0IlEcYCj7lZ@i|T4tf19M-E5#Y0$Gq!+xf&h#oS
zrOqw=c6|z0z#Oky<F>>Ht}8;xKUddMe%ECmmbXnGNB(#5_q+PPW}TILo-$;-=+sa&
zk6zrl*r)TWcS^L?r{w?6dseISnmu(hQ$l~G@BS4j>5Q)p+wl@rvf7N0Ltn98v*IN=
zCpMaMhY~+y=M>7AX_My^DT6n0a>u3gDAW7DOzFYr%as7t!9UPp^d8}_f(P12U>-WW
zpDPb26UxrlB=-Xo7GE7b4;NHYlwA;5+XWNA;#5_(I(q$$xFfb}Y@j%)?SkP_+b&33
zDH=P`c5zjx@))|ra~P|u$iMc*c-tA>omqfk-f+6FUb10!9^X21>4=bo9;^Mk-9*eV
zFS-f{;hYlX@oGyFUI;{GYd#>GCjGrT9kT-(gM{_>isDnbqq772g;QA<207ww+g58`
z&n28*aH`E@ZsDW*_YLj|E{RhVp0ghpZJpM>AxEj|t*>wEj{!Y<jUf)nU4pUi0!6yG
znC@}q1hs#T=`mi~l^UJy?U|<dy0MUrEtlU6GD_~4y<T}^j1v4V0A3`VilKq&-z*Y5
z7gi@>7D)$PEU>s!agM`?i$i%2f=;vc-)=$rR#$U+u<Fx;MfoHFU0alccu*|LI|vku
z@&*!v^AadIQj`-yh%A(q2a$!c#vrm#&h#L%rcyUhPBKiAPaetDqnQ0gm_Iyr>hv9t
zojQHTW2a8v@z|-;cRY6L^c|0#I(^5ZdE0@Y@3<+{cia@}J8lZ~9XEygj+;V#$4#NW
zqk6k10q@WJxtF9j6dzl-35DVd5NXr_I8?}a`OlT;|Dyo>zbpvKVYfKqf}k91Bg0S*
zIgw!~2b#z*l*3CL+?wIB6EcRB<SWWv8m)Mg9XDF>DEo4>;!$?*Xv5>MDBriyil0{1
zL*0j>Rr+aFy}$Of7-sM>PX3yToIoCKoy(vv)OF{>*<i%CRU>^<s-VX{O@UrlQ5uP8
zGNEs3-+=PY3Z@*|+;~)gu{Ul|=g=z`O6y!E&p2Ly(Yq?jttIR`42Bu(jPQ9*=+z6Q
zk%&7c003&<M@K)_d4f8n9{uwPos&lo@&;xUF!rJZ0zj`|D6Mmu924yH(DEi`6b1Jx
zW)u|(kD6FwHmqTYZ)8T06b&2ulme==r!*4r13Z#uHKUNmK81juW>6Z5_=*LCc+E@0
zLk&o`CsRGB5RlSH#AsI7#iL38kk#WOJb|JfAUCn2>M?$!Lba}x*11fd4OW;54$rW5
z{Fj{z**40Wjjg+-)(#RJk%l)JV~x=GX{45|mJ3EUn2VtQ+_XU!X%eeMvD@Q`P+c#j
zbuLrkh9!bl`Hn6cGA4R*3`o^4D5i083`i9)C`OHE(n+ID)8XoZN7rUQJO;Q*7}Pwp
zz;gid#*-7Et5ixO5e;X<Oc)(i!=T(^P%dCW*yLQO_Kea<M0H+3c|og&LBW@R!GM7n
zC|G6=sKkJNFeo?`7z`MEfr16eZZWmTJM1t{bW?DEcb%Z%!@yv`3_mECogY+Uz;r(-
zI1(5PSVId6{sjsKtr`Xe2MPdAHE7i^DEJF77-g+?nF}Z}4ls)WDsdAqnC?S@`cT|R
zPJsg%5V~iYnp+Rb1*{q#@tPnv8gksnR=YxV!jwiLt`dTkcY@<i9d%L4S`oZl3VLkR
z1(nlM8i{yn9&*$*aSY(awMgDQRIf^Doy)9+!6tx`s-?%vrzTg>L<aL=Cks8HQXEPn
z5tT&1WTx94fhuPyjYK>Phy|#lo!pL4ODZ4=CMn&AGWBkbhr_~zjG&iyqq`Z3=*E_b
zQtxJFF&J@SxYWBDp^g?Qok1-!UK|XK1H2vsUEvHsF5pGk<XCEPCCQ3C;hcb(Y5M(4
zYHkuJmu`)w-bk}0!Q=(JBAk4hsj+(jv4EF_px7ZmEa0^vC^l$5*d)_!m#HVY6A(*x
zkVuW)x&TaCx;-sw>?=Sl-5Ut1Q=&8yah(*bv?Dn^z)QHv^@y5Pio~K@d8k<m(g<?V
ztufTBqev{eGYQnJ88QfRp{yI;NG!U;cBl@O(n!PxBo<xSMe6y}mz{L}fU8nL^*tMj
zg|fcSBmV{vn1dhM$G}yopmN0^u~63cf00-y>-!dYggHc6-)oRqDC#?2VN#iZt5QMr
zA_a+svc9t`BFKfZeGfunp{(yMNGz1~eT@>r9HOl6g-9&A^c^aDq%;z7zA~&#G*+dK
z2DVTs#n`|WszaqT67disjXIn~s7gf&XQ{=zqJk(M-9ao=j!0=Fq9hiHMtA5+4Lg7c
zqdS13h7BOX=nmVcVXmr3(@1x)1{If48j1K64n}{C=j3MqHQo{jBP#-)?qh)(|CmNR
z-6sV#ejT1R>FGW^sPXkQ;el&BLD$|~jTY%qa6hP}e@G)9Wz}1wPMh>7tKNMY@qntw
WOs+x0V78F|JRy6mCeZKf#rz-t0Wf6%

literal 0
HcmV?d00001

diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
index b0cf10276..5af8d04ab 100644
--- a/tests/darwin/dataset/remote_dataset_test.py
+++ b/tests/darwin/dataset/remote_dataset_test.py
@@ -20,8 +20,17 @@
     download_all_images_from_annotations,
 )
 from darwin.dataset.release import Release, ReleaseStatus
-from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2
-from darwin.dataset.upload_manager import LocalFile, UploadHandlerV2
+from darwin.dataset.remote_dataset_v2 import (
+    RemoteDatasetV2,
+    _find_files_to_upload_merging,
+    _find_files_to_upload_no_merging,
+)
+from darwin.dataset.upload_manager import (
+    ItemMergeMode,
+    LocalFile,
+    MultiFileItem,
+    UploadHandlerV2,
+)
 from darwin.datatypes import ManifestItem, ObjectStore, SegmentManifest
 from darwin.exceptions import UnsupportedExportFormat, UnsupportedFileType
 from darwin.item import DatasetItem
@@ -601,6 +610,14 @@ def remote_dataset(
 
 @pytest.mark.usefixtures("file_read_write_test")
 class TestPush:
+    @pytest.fixture(scope="class")
+    def setup_zip(self):
+        zip_path = Path("tests/darwin/data/push_test_dir.zip")
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with zipfile.ZipFile(zip_path, "r") as zip_ref:
+                zip_ref.extractall(tmpdir)
+            yield Path(tmpdir)
+
     def test_raises_if_files_are_not_provided(self, remote_dataset: RemoteDataset):
         with pytest.raises(ValueError):
             remote_dataset.push(None)
@@ -673,6 +690,131 @@ def test_raises_if_preserve_folders_with_item_merge_mode(
                 preserve_folders=True,
             )
 
+    def test_find_files_to_upload_merging_slots(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1"
+        search_files = [base_path / "item1", base_path / "item2"]
+        multi_file_items = _find_files_to_upload_merging(search_files, [], "slots")
+        assert len(multi_file_items) == 2
+        assert all(isinstance(item, MultiFileItem) for item in multi_file_items)
+
+    def test_find_files_to_upload_merging_series(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1"
+        search_files = [base_path / "dicoms"]
+        multi_file_items = _find_files_to_upload_merging(search_files, [], "series")
+        assert len(multi_file_items) == 1
+        assert all(isinstance(item, MultiFileItem) for item in multi_file_items)
+
+    def test_find_files_to_upload_merging_channels(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1"
+        search_files = [base_path / "item1", base_path / "item2"]
+        multi_file_items = _find_files_to_upload_merging(search_files, [], "slots")
+        assert len(multi_file_items) == 2
+
+    def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir2"
+        search_files = [base_path / "recursive_search"]
+        multi_file_items = _find_files_to_upload_merging(search_files, [], "slots")
+        assert len(multi_file_items) == 1
+        assert len(multi_file_items[0].files) == 2
+
+    def test_find_files_to_upload_no_merging_searches_recursively(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir2"
+        search_files = [base_path / "recursive_search"]
+        local_files = _find_files_to_upload_no_merging(
+            search_files,
+            [],
+            None,
+            0,
+            False,
+            False,
+            False,
+            [],
+        )
+        assert len(local_files) == 11
+        assert all(isinstance(file, LocalFile) for file in local_files)
+
+    def test_find_files_to_upload_no_merging_no_files(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir2"
+        search_files = [base_path / "no_files1", base_path / "no_files2"]
+        with pytest.raises(
+            ValueError,
+            match="No files to upload, check your path, exclusion filters and resume flag",
+        ):
+            _find_files_to_upload_no_merging(
+                search_files,
+                [],
+                None,
+                0,
+                False,
+                False,
+                False,
+                [],
+            )
+
+
+class TestMultiFileItem:
+    @pytest.fixture(scope="class")
+    def setup_zip(self):
+        zip_path = Path("tests/darwin/data/push_test_dir.zip")
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with zipfile.ZipFile(zip_path, "r") as zip_ref:
+                zip_ref.extractall(tmpdir)
+            yield Path(tmpdir)
+
+    def test_create_multi_file_item_slots(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        files = list(base_path.glob("*"))
+        item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS)
+        assert len(item.files) == 7
+        assert item.name == "item1"
+        assert item.layout == {
+            "version": 2,
+            "slots": ["0", "1", "2", "3", "4", "5", "6"],
+            "type": "grid",
+        }
+
+    def test_create_multi_file_item_series(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms"
+        files = list(base_path.glob("*"))
+        item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES)
+        assert len(item.files) == 6
+        assert item.name == "dicoms"
+        assert item.layout == {
+            "version": 2,
+            "slots": ["0", "1", "2", "3", "4", "5"],
+            "type": "grid",
+        }
+
+    def test_create_multi_file_item_channels(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        files = list(base_path.glob("*"))
+        item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS)
+        assert len(item.files) == 7
+        assert item.name == "item1"
+        assert item.layout == {
+            "version": 3,
+            "slots_grid": [
+                [["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG", "2"]]
+            ],
+        }
+
+    def test_create_series_no_valid_files(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        files = list(base_path.glob("*"))
+        with pytest.raises(
+            ValueError, match="No DICOM files found in 1st level of directory"
+        ):
+            MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES)
+
+    def test_create_channels_too_many_files(self, setup_zip):
+        base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels"
+        files = list(base_path.glob("*"))
+        with pytest.raises(
+            ValueError,
+            match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}",
+        ):
+            MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS)
+
 
 @pytest.mark.usefixtures("file_read_write_test")
 class TestPull:

From bd325172817ab76de91d12b35f38d44a9544ccdd Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Fri, 30 Aug 2024 14:13:01 +0100
Subject: [PATCH 3/5] Cleaned up tests

---
 tests/darwin/data/push_test_dir.zip         | Bin 24374 -> 14762 bytes
 tests/darwin/dataset/remote_dataset_test.py |  28 +++++++++-----------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/tests/darwin/data/push_test_dir.zip b/tests/darwin/data/push_test_dir.zip
index 2a09ce77336d274582539635428692163674766f..6d76d817411da1608ab0374474f13321e222aaed 100644
GIT binary patch
literal 14762
zcmc(lO>7%Q6vx;3AS9)wr7bjteo)#<Ey&w(LTOK6ic|@x6eN1+rAx4#CTU{3c2Yis
zkbJ2?oH&AnxcG#qIB@I<l?xI`^@#&#PSsMO7IB0f+ru)uZ^!@XJY^@^IJvx^|Icsc
zy?M_IZ;Vgu@#HFAJ9w%8$MDw<@37b0Y%d2Jwf06(U1|9j-`?&`efnc>Nj;nIZ}SQh
ze-%8hZ(||T*6x(8$X<!tps+AF*U;4-qu=31qY<nx);|a;%Zv4TZLK{(-2S1rbSHY`
zYo-&PO*r%*ji@(*rIoc>I~czGM&w}md>lQQwx6@KJ@j=evMHbUOtzDOaFl-Ety*QX
z)n2(?3);2CR%O}3`R_D!=aU5CS63>H^>(mWuLi5l+LfX2CvW$brlUu`X8I>dn58rb
zKVepyv@*M&HamFZ1BZ-wyEERS(SG~x#sO{bouurR{B$&)DXuoJ^!Me5U)<@3BN>uY
z67t`Y`_z$pw)pzOc|Pv*s9rJ8D;NjAn3v+0@&cdE%hjd4z;k)ITI2;jm6NMWc_}XC
zrZ~5#X3i1#k;$AS;H1dxX_zY>b~6nM@MKB)0-Y37bys`y-ME^T>AyFsGIb*j>(mh>
zy1+OAYGTf|D3&<^kdZ^wwc=E<T3P2Sr(muqzIN^cAD479q4z8=7M8me2LmBjFozZD
z<}<ueg<?<VgPqR@yA>xD5mU^!0`#+G-a#mqc>|H17}X76zF`_!%)w)1F-MG%#e8)%
zvbHidm~SnIscJ}VcAvW{7(o-fN%@db8*>F<hB4<sGmJU=nPJR1Pf(v($<MrTFdvn+
zc;?g77SDX3+TxkdSUbG%nY#+M_*_Fz__>Cj@N*44;Xh>P6~^|AXU`hdcp@yzzicOu
z*|z2EQLV82X3BQlJ5ggz`I?$6p@6R_05X}30m=4^oK30~mj9pt)NH7Kua{k#%t>X8
zq-K>d@~uMh1L-@>YH}H~sM%%Ax;cV{Ofq8@HOq`y(_^HrRKDwb5{!=I<WbHx)e6g3
zDWa<QtR|hY#w1bBI@JoxztBW<@|lg1vro0c^3id^l}Z>e2`!AxjMFVc+1*jA+xdv%
zP|*+OCzSgD<$_J`Y_9CDtJUqSO%Q&i^rDkd6#P3326G__zC4L42D2av{uKs;`40sz
zJc23)+b<~iCm0M~vQhA>Q>bF_#*Kn+!C-Jsh=S*%Z(!&(O+SUEztMqCMp5uh7|g8r
z_%y1RIjX-#!NNI5=x6o58BnosP7-=j{2T>4ds^pCZ$kxRlhL@?NS45I7OGZQ?$F#+
z-G)y_QSx0YGBPR6-p8^zu2#2m@iEfZDb3E=SjYYhVx1F&9$DwM6KYjC`M)|D?U81C
zS?AtDxnQ?6o2$EdZU?3p?3-p|b-CYwSiyX79C;j*i%w3n_3Cc^66As%V-&k<Cngu{
z9i!M!KrGlrMzQ~ZSmu-J`Yz1L&bJ0Vq(Ttuyn}RX^$AR^^SLZ%Z)$ZrztPx1Hv#O)
zX2+4vI=`EgIAA#kkIwqu#Bz=popmVcfu^nkYWnPawbWUknOMxR@wbWPe2<gUOSQV4
z%3jik!!O>uR|Pt)XQnyV0&>WwR=4x=lO}D>6@U)=)(CUXgF0;XDU&ki?5D$S7-7yi
zPY$fq>UQo^F!#x6_ED+hTl)yd8t~4ir;h*A67PJV>iFiYoqFdpR>$A7!o!nO^s}$;
uw^1*A=Pa$O4=wS`p||$5oqFcbyK9LDLvJ!_%zEA{(a*^Pp11mp{O?~z7hfX)

literal 24374
zcmd^{c|4Tg_rOQi5E3GL*|#xcPo*qvh$2a8EZNs=5tXsEpv6av6sakNh*Gj;D<u`}
zNrh4hrH?En`OQP~Wq3T_JI(XukKgSzkzTL+zUSU^&pqedbD0}6GVx)c#uh8Oarm1*
ze{o?XFusBQu7m(5{{X^PH$Uup%UKxK+|zZ=)#T3|@+UBu1H-`7!+^mI_hCNV7aN8R
z!#AccCP55l?u5Qn3@r#20X}|C!=2tGcSiY)>$G#j{R97)^DPOeyAh~vpZ3kgsO8!!
zj?RFFS{9GjLUZv#rpqnT0&LFn@wD-zyFDs-*Wgmm+BGu{+Y}od{+RPgP@J&FgTWK`
zwtx9?Lr<Jx2gA}f6J_S5GcYVdv08S@g88ZD^VjT;jyi+6<XG1%8r5Fi($v(y=g{J!
z{o+Yx>GsV(;_WbVE>uVcH<j^qGVI8#V`g+@v01`oSLxOuZl{+!<Ndp<75aojR-ENk
znT#qY%V$<;uj9CJ;C{52`emE)xOwW$$_=<U+*#sahgMaVf{0yg-B(8|ZkJhU2TQI5
zgo$sMTfsKCw(vHaGKYUczS8!-xS&0Yi}&+wsMY>%TCG;!_wu`_Zk&6*kG`96anR0W
zuPdUxHsT?<VnX%m9fIX+AL({2s@~iX6XMF<k`geG`*C?pCs)MPvTH2I|NR`@X|tvA
z+xcvMvDO(`wKa=Bm3yAQ`niaIqgzJz{zVo+Po0gtziNK$#us&8E*vV>3X;j*p7L5@
z+3C<Ak5;i=cfQ_XA2_g2F{=2nkiw3)$^9FD-%I}SqN+V;)r|8`O5cil5XAW3Jv+B;
zccS<!Y;9-;YxcVvm6rV+H&6UC_*ned;F6j*-d7d;O17A7^~rQMdh^P{%5wJ*$J2-1
z!ZzuJHkKu~|IID_EWU}*XSP2hBjoAxTd^4)(IFcw-?Fjiow}Ext*xCcmCzSyk~JHP
z7m(XBU%+33H}}uKatxJt)g1O&%RJjVSLm^^@#)GvOVv06^k+*sBv}t!Qug=VEad4V
zCH;0tHu>R@h=|A#jwv^DyYA&5R}Y1GIC=UUWxo9TAf{C&wI$T|u;Lt#s@S+6;tX5|
zZbpln8?&-4mU^HRJcF#=WECD!ZfI|C?4#Z&b0#P^d9&b0c1z#MI_D~~azmeSiVbzh
zPTngROim1Shujl7#0~F~(4;zN=mXmsE7a3}+URI|Kt_uj8EsAq;%G<g#Q4#QP8jX5
z_})GQXE#qL|34E!88__MDJF<{jM;!hk%sR&z{iK+W$ztAaCEiz_IC309~ph-pLw1X
z0cV*+;1S^F=~(WXN>!eihmr>2(h`RR7w2jz8egcGV_6$56CKfbem>tm2KhOQn7&rl
zmibG3V>#K*xcRB_d4r2hyY3D){?r@jP~XOhIisU64}-%P+6j7xTg*Np&s-&{<j`2n
zEZ3&b9Dl~I@0LTZ)yki5Z1*rE*2Sm|{Jy_FUFnFgR%Z8dMeKk<=7K$$1m}$_j);23
zALtYRvg6a^W+j*APa?gGmaqAco*H~E^~EPW?}w`tav!ID4IqhE@?UFJ)wgOZF8A@u
z3T)=veK*a;?8Bz+_fLBDQw2lM<n&=#kIC#jSm2OWaM$S2Pe+dIZ5O{g^wevFZ^}yZ
z-z;uNim2Jb!d>)kUZ~+q<JrZFzbdFn=iG59Us=7|L5G*6{==gc&bii^DuZE%suHb*
z+1<JKrt#02BaM@O784Q1fRkEYbzE+re6A*G&r$NOJ$;z@l)0fvV%7H*7S6f9<DYRl
zlnjaAUov#XI!jM!w)3T|nW?N1@}Ekq9CUuLVftb*Aw5>btb{GAO!nnV-ekLMee>LQ
zXvgMtXNklzjNP&07d@D)SF+?qClE}R8JJmEW2w)$Vck_Bz^{MZ#9i!Y$M5K3=U~@m
zYGT#JZMD}te8wR`Mp+(VhFoEu$bSB%wHS+LiL!7;?U*R0Wl8+1wr!9tVP)gkTc)i+
z9t$*{$xQ(tyn51M4`|%Je=Gyg_8o5+rQiqe=j0gZ=kFHeMDTaA_j7awvXp#eOF7Q3
zki%eLcP|bLzj}~^G0p~e+Sq#Xt>;_f;%e_4?zhP<{M_f&MyC@~zZABLU+)x<GdFfM
zHomA-q-&sy*S*qF>%DfJmjuh%3-xW0+&uBi!^)ZBm++kFI@5nab9sx|6IS*53*YWW
zv6iU$A2YfB$|{7I_#@%d%g~29+aHvC)b;QC!iTvmFZ-O4i6NYIL&8RL-bF>X!yWUE
zZdp0V@W@Vv^}fkEwS;rk|29|d;TB&K*64g%u|Mg$7AN#Q@p3BnGMTd`Tt&P|DUNxy
z?+lJ};)R_2UL3j8p)GUo`1>R&6PBbnyiUjJ-a7rD{-r@KTV?r%fA%{^exPgHhVmsv
zW~@S%D~W3ZcHKN)>zd(q$##I>?{Q5{`okudF0~&kbXnrcxfJbNYtI|@hQI!-&Z)ey
z|K!Rq7Dvjplb(CFS?NDLlJ)L%4ZFstcB{abf}VK4LQk<SxnSbl7AfCf!Gk}2Nh;QF
zdFEwsh~*rNn)f+Ec>XK{gBf}^o$sG7`L@8uMr%z``CKs;)go7`hjSmU*0iumSibKT
z`=y(u3!F%5*>5ZI9CkMBz4gMcYtv1ULkA4?+(bMUn7RKW)b&P|?bgq*6;{tVOT-&p
z6u*6z;F?+IW+oyQ$thQ-7yU^6lS#?N<R=A&3uSx0oi7M`sb;|F{quKZYMAjZ=?khC
z&1NsIxtMRH;dUVvZ`f6PH`&juukHQ9F8BLkSBiKV<16&TY^~Xros<s@+V^_fx+CdV
zydGL-q$Lj|X038MS&RErYhw{pDx3M@=a<B^)<=axvPUjP9j$SjY3%moDp$|UTOGNH
zjz<SCK3C7Py5cA(E9IgP%I}{3;fP?qW~k-DlU|xRuGw<Mx*oflN>8+A>bGlZdB6H`
z=T66d9a7$EzR*vPJ|Ep-y*|%2GQ)FLhuLv?qhsq!ZtCWoa9?M!u*c9w@*|Udlx1y_
zvhpE;Z={&7ccq`6)p@$3QOJE;zEtmB^%zZqYpnm=Wi4)Ujd+?6Q1(pSh5zS<4<YFT
z#B&KR+#Z>)iV(`az<=w(0`;CMzQ&ZN9)=f<jivbG??ki<>{^*v*!_h!!8Xf(kY(-X
zv{kDgCmlaxU0rzH^9U)j_tbV-lg!0u+tf1@J9<s;Cp_`slFC~>AmJo_qvq%J$Gc=T
zEFFvT?_@Y_z4$0;NB)!B-2<JH!cH3xiyrJ<e2sN&Px0y>YbyQR%u1U=N@Z-?vu|FB
z(l-rIzQ@WPYo{@I^akTkxx;v0-Zhm%+8iI74GLzYR($lGRk}yKX+{6cUD0oww)Zps
zUS7p6Wxa2Gf#9)bzs*F+&3lA>7kF0)UTaZljQ1+w9_W&|{f#Fhn9ua8>K?Ne`7nXh
z%E$c*rO`g`bi!D%P13*E^lsnl7ups1wq<*;=<eHuE^Kl6df%wb_ZbB@HfwCxyIwWx
zgreoThpv9x6s!4|Ll^octuvba;+m~z$%6eY4NBKJKRig@ontFJEBTC8$a~(LG7W5`
zXv2Z|MFyK>tBt~h4+|)5=Cf9MV{zeJ%e?k82WoI{WR#PdZCGzfFKX45-=1uI%W09*
z!lW;n9*lvDE-6>&m~JyYz2!8&;rSc)8<rY2%`A-XX<1Hu#w+iiys2srv&}80M&gMX
zxRj%eH<*uZ36J#^%C2B@`V?66P_J41y}X(<XG>&aQhovN27AVLADlhQy$ULuU1Wpo
zwR-Ka=gn_dzA)1{5vYBlI^9>$tZ7x)JVM#rZ_;5xgmSfD0YbG_muXEycfVLEp{nYX
zkx+^HiG6QuuXW}p7q!Z9gz(1J>$-enQT-w&bRr^j>xJ*+uf7L4L;ksJgw3sz^x<8-
z$>excUBN0gvvR2@V_C^X_PiBuq(nUBbF1wC{BKvms=bL^+Iai2**;~}HfHg&WLstx
zTs7JkzO&KU#NVjy_ux&ZUwuPM!upR_l;v5k$eQV0$|J%!aJ^Tego|Iqu4OL6$pLPI
zdm9POx*eyw@nw2L10RQqe*GH4LdPiIa}KjoX7YhgJ`F~l-tmVV>`8>fZgq3?@$x6w
zdv7JU`#QP&@gsrGy72@dxU&w2bqjFvQX4to(C(HN$C`%GYW#0TON$@zeO$;g4}NY(
zR&QjuS;&zKe4GXAT;Z_zzX`VB2?}r{$73f3Fxo2azAn?i)lpe(z5+e2BB;DLb#w&=
z2Ww21#9Gq@)|{@X#!nYmZMvo!H(g-WX_{*MbV-b#Hi@TAt5H59z)dE~hXnAUi1JJW
zb8&yWnMRTnW78l|=2#j2P@Iqkq0U-BnPW=!czo94xGsCeQa+=vu?&pl2>O9Q0JA|I
ze*r_idIXOu$*i?-^NHIx+$D1Bme+^cY2O-<a&!K`9Nrp;V-yMJtk-rp6qmW;QLb%h
zhi_Tdg=+(*I*lHes|ESpJ<S8!@0IlEcYCj7lZ@i|T4tf19M-E5#Y0$Gq!+xf&h#oS
zrOqw=c6|z0z#Oky<F>>Ht}8;xKUddMe%ECmmbXnGNB(#5_q+PPW}TILo-$;-=+sa&
zk6zrl*r)TWcS^L?r{w?6dseISnmu(hQ$l~G@BS4j>5Q)p+wl@rvf7N0Ltn98v*IN=
zCpMaMhY~+y=M>7AX_My^DT6n0a>u3gDAW7DOzFYr%as7t!9UPp^d8}_f(P12U>-WW
zpDPb26UxrlB=-Xo7GE7b4;NHYlwA;5+XWNA;#5_(I(q$$xFfb}Y@j%)?SkP_+b&33
zDH=P`c5zjx@))|ra~P|u$iMc*c-tA>omqfk-f+6FUb10!9^X21>4=bo9;^Mk-9*eV
zFS-f{;hYlX@oGyFUI;{GYd#>GCjGrT9kT-(gM{_>isDnbqq772g;QA<207ww+g58`
z&n28*aH`E@ZsDW*_YLj|E{RhVp0ghpZJpM>AxEj|t*>wEj{!Y<jUf)nU4pUi0!6yG
znC@}q1hs#T=`mi~l^UJy?U|<dy0MUrEtlU6GD_~4y<T}^j1v4V0A3`VilKq&-z*Y5
z7gi@>7D)$PEU>s!agM`?i$i%2f=;vc-)=$rR#$U+u<Fx;MfoHFU0alccu*|LI|vku
z@&*!v^AadIQj`-yh%A(q2a$!c#vrm#&h#L%rcyUhPBKiAPaetDqnQ0gm_Iyr>hv9t
zojQHTW2a8v@z|-;cRY6L^c|0#I(^5ZdE0@Y@3<+{cia@}J8lZ~9XEygj+;V#$4#NW
zqk6k10q@WJxtF9j6dzl-35DVd5NXr_I8?}a`OlT;|Dyo>zbpvKVYfKqf}k91Bg0S*
zIgw!~2b#z*l*3CL+?wIB6EcRB<SWWv8m)Mg9XDF>DEo4>;!$?*Xv5>MDBriyil0{1
zL*0j>Rr+aFy}$Of7-sM>PX3yToIoCKoy(vv)OF{>*<i%CRU>^<s-VX{O@UrlQ5uP8
zGNEs3-+=PY3Z@*|+;~)gu{Ul|=g=z`O6y!E&p2Ly(Yq?jttIR`42Bu(jPQ9*=+z6Q
zk%&7c003&<M@K)_d4f8n9{uwPos&lo@&;xUF!rJZ0zj`|D6Mmu924yH(DEi`6b1Jx
zW)u|(kD6FwHmqTYZ)8T06b&2ulme==r!*4r13Z#uHKUNmK81juW>6Z5_=*LCc+E@0
zLk&o`CsRGB5RlSH#AsI7#iL38kk#WOJb|JfAUCn2>M?$!Lba}x*11fd4OW;54$rW5
z{Fj{z**40Wjjg+-)(#RJk%l)JV~x=GX{45|mJ3EUn2VtQ+_XU!X%eeMvD@Q`P+c#j
zbuLrkh9!bl`Hn6cGA4R*3`o^4D5i083`i9)C`OHE(n+ID)8XoZN7rUQJO;Q*7}Pwp
zz;gid#*-7Et5ixO5e;X<Oc)(i!=T(^P%dCW*yLQO_Kea<M0H+3c|og&LBW@R!GM7n
zC|G6=sKkJNFeo?`7z`MEfr16eZZWmTJM1t{bW?DEcb%Z%!@yv`3_mECogY+Uz;r(-
zI1(5PSVId6{sjsKtr`Xe2MPdAHE7i^DEJF77-g+?nF}Z}4ls)WDsdAqnC?S@`cT|R
zPJsg%5V~iYnp+Rb1*{q#@tPnv8gksnR=YxV!jwiLt`dTkcY@<i9d%L4S`oZl3VLkR
z1(nlM8i{yn9&*$*aSY(awMgDQRIf^Doy)9+!6tx`s-?%vrzTg>L<aL=Cks8HQXEPn
z5tT&1WTx94fhuPyjYK>Phy|#lo!pL4ODZ4=CMn&AGWBkbhr_~zjG&iyqq`Z3=*E_b
zQtxJFF&J@SxYWBDp^g?Qok1-!UK|XK1H2vsUEvHsF5pGk<XCEPCCQ3C;hcb(Y5M(4
zYHkuJmu`)w-bk}0!Q=(JBAk4hsj+(jv4EF_px7ZmEa0^vC^l$5*d)_!m#HVY6A(*x
zkVuW)x&TaCx;-sw>?=Sl-5Ut1Q=&8yah(*bv?Dn^z)QHv^@y5Pio~K@d8k<m(g<?V
ztufTBqev{eGYQnJ88QfRp{yI;NG!U;cBl@O(n!PxBo<xSMe6y}mz{L}fU8nL^*tMj
zg|fcSBmV{vn1dhM$G}yopmN0^u~63cf00-y>-!dYggHc6-)oRqDC#?2VN#iZt5QMr
zA_a+svc9t`BFKfZeGfunp{(yMNGz1~eT@>r9HOl6g-9&A^c^aDq%;z7zA~&#G*+dK
z2DVTs#n`|WszaqT67disjXIn~s7gf&XQ{=zqJk(M-9ao=j!0=Fq9hiHMtA5+4Lg7c
zqdS13h7BOX=nmVcVXmr3(@1x)1{If48j1K64n}{C=j3MqHQo{jBP#-)?qh)(|CmNR
z-6sV#ejT1R>FGW^sPXkQ;el&BLD$|~jTY%qa6hP}e@G)9Wz}1wPMh>7tKNMY@qntw
WOs+x0V78F|JRy6mCeZKf#rz-t0Wf6%

diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
index 5af8d04ab..897364580 100644
--- a/tests/darwin/dataset/remote_dataset_test.py
+++ b/tests/darwin/dataset/remote_dataset_test.py
@@ -692,7 +692,7 @@ def test_raises_if_preserve_folders_with_item_merge_mode(
 
     def test_find_files_to_upload_merging_slots(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1"
-        search_files = [base_path / "item1", base_path / "item2"]
+        search_files = [base_path / "jpegs", base_path / "dicoms"]
         multi_file_items = _find_files_to_upload_merging(search_files, [], "slots")
         assert len(multi_file_items) == 2
         assert all(isinstance(item, MultiFileItem) for item in multi_file_items)
@@ -706,8 +706,8 @@ def test_find_files_to_upload_merging_series(self, setup_zip):
 
     def test_find_files_to_upload_merging_channels(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1"
-        search_files = [base_path / "item1", base_path / "item2"]
-        multi_file_items = _find_files_to_upload_merging(search_files, [], "slots")
+        search_files = [base_path / "jpegs", base_path / "dicoms"]
+        multi_file_items = _find_files_to_upload_merging(search_files, [], "channels")
         assert len(multi_file_items) == 2
 
     def test_find_files_to_upload_merging_does_not_search_recursively(self, setup_zip):
@@ -735,7 +735,7 @@ def test_find_files_to_upload_no_merging_searches_recursively(self, setup_zip):
 
     def test_find_files_to_upload_no_merging_no_files(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir2"
-        search_files = [base_path / "no_files1", base_path / "no_files2"]
+        search_files = [base_path / "no_files_1", base_path / "no_files_2"]
         with pytest.raises(
             ValueError,
             match="No files to upload, check your path, exclusion filters and resume flag",
@@ -762,14 +762,14 @@ def setup_zip(self):
             yield Path(tmpdir)
 
     def test_create_multi_file_item_slots(self, setup_zip):
-        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
         files = list(base_path.glob("*"))
         item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS)
-        assert len(item.files) == 7
-        assert item.name == "item1"
+        assert len(item.files) == 6
+        assert item.name == "jpegs"
         assert item.layout == {
             "version": 2,
-            "slots": ["0", "1", "2", "3", "4", "5", "6"],
+            "slots": ["0", "1", "2", "3", "4", "5"],
             "type": "grid",
         }
 
@@ -786,20 +786,18 @@ def test_create_multi_file_item_series(self, setup_zip):
         }
 
     def test_create_multi_file_item_channels(self, setup_zip):
-        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
         files = list(base_path.glob("*"))
         item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS)
-        assert len(item.files) == 7
-        assert item.name == "item1"
+        assert len(item.files) == 6
+        assert item.name == "jpegs"
         assert item.layout == {
             "version": 3,
-            "slots_grid": [
-                [["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG", "2"]]
-            ],
+            "slots_grid": [[["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG"]]],
         }
 
     def test_create_series_no_valid_files(self, setup_zip):
-        base_path = setup_zip / "push_test_dir" / "dir1" / "item1"
+        base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
         files = list(base_path.glob("*"))
         with pytest.raises(
             ValueError, match="No DICOM files found in 1st level of directory"

From a123c88f724b6e12dd0541e4c1ab4f10779f96c6 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Fri, 30 Aug 2024 14:33:11 +0100
Subject: [PATCH 4/5] Added natural sorting to the order of files in multi-file
 items

---
 darwin/dataset/remote_dataset_v2.py         |  7 ++++++-
 darwin/utils/utils.py                       | 13 ++++++++++---
 poetry.lock                                 | 17 ++++++++++++++++-
 pyproject.toml                              |  1 +
 tests/darwin/dataset/remote_dataset_test.py | 13 +++++++------
 5 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py
index eecc725d9..063b402d6 100644
--- a/darwin/dataset/remote_dataset_v2.py
+++ b/darwin/dataset/remote_dataset_v2.py
@@ -884,7 +884,12 @@ def _find_files_to_upload_merging(
     multi_file_items = []
     for directory in search_files:
         files_in_directory = list(
-            find_files([directory], files_to_exclude=files_to_exclude, recursive=False)
+            find_files(
+                [directory],
+                files_to_exclude=files_to_exclude,
+                recursive=False,
+                sort=True,
+            )
         )
         if not files_in_directory:
             print(
diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index 5f57449ad..f2cbd8d85 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -25,6 +25,7 @@
 import requests
 from json_stream.base import PersistentStreamingJSONList, PersistentStreamingJSONObject
 from jsonschema import validators
+from natsort import natsorted
 from requests import Response
 from rich.progress import ProgressType, track
 from upolygon import draw_polygon
@@ -216,6 +217,7 @@ def find_files(
     *,
     files_to_exclude: List[dt.PathLike] = [],
     recursive: bool = True,
+    sort: bool = False,
 ) -> List[Path]:
     """
     Retrieve a list of all files belonging to supported extensions. The exploration can be made
@@ -229,7 +231,8 @@ def find_files(
         List of files to exclude from the search.
     recursive : bool
         Flag for recursive search.
-
+    sort : bool
+        Flag for sorting the files naturally, i.e. file2.txt will come before file10.txt.
     Returns
     -------
     List[Path]
@@ -255,8 +258,12 @@ def find_files(
             raise UnsupportedFileType(path)
 
     files_to_exclude_full_paths = [str(Path(f)) for f in files_to_exclude]
-
-    return [f for f in found_files if str(f) not in files_to_exclude_full_paths]
+    filtered_files = [
+        f for f in found_files if str(f) not in files_to_exclude_full_paths
+    ]
+    if sort:
+        return natsorted(filtered_files)
+    return filtered_files
 
 
 def secure_continue_request() -> bool:
diff --git a/poetry.lock b/poetry.lock
index 9161c0ff2..a850e8c4c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -913,6 +913,21 @@ files = [
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
 ]
 
+[[package]]
+name = "natsort"
+version = "8.4.0"
+description = "Simple yet flexible natural sorting in Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"},
+    {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"},
+]
+
+[package.extras]
+fast = ["fastnumbers (>=2.0.0)"]
+icu = ["PyICU (>=1.0.0)"]
+
 [[package]]
 name = "networkx"
 version = "3.1"
@@ -2238,4 +2253,4 @@ test = ["pytest", "responses"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.0,<3.12"
-content-hash = "6e6c0628c98652df5dd76a8d82a0f67af9ec2037388350412152d21d84fa9d57"
+content-hash = "3ea848bf4d0e5e0f22170f20321ce5d426eb79c6bc0a536b36519fd6f7c6782e"
diff --git a/pyproject.toml b/pyproject.toml
index 10c69fc3e..ef4146782 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,6 +83,7 @@ types-requests = "^2.28.11.8"
 upolygon = "0.1.11"
 tenacity = "8.5.0"
 
+natsort = "^8.4.0"
 [tool.poetry.extras]
 dev = ["black", "isort", "flake8", "mypy", "debugpy", "responses", "pytest", "flake8-pyproject", "pytest-rerunfailures", "ruff", "validate-pyproject"]
 medical = ["nibabel", "connected-components-3d", "scipy"]
diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
index 897364580..0c69b054c 100644
--- a/tests/darwin/dataset/remote_dataset_test.py
+++ b/tests/darwin/dataset/remote_dataset_test.py
@@ -10,6 +10,7 @@
 import orjson as json
 import pytest
 import responses
+from natsort import natsorted
 from pydantic import ValidationError
 
 from darwin.client import Client
@@ -763,7 +764,7 @@ def setup_zip(self):
 
     def test_create_multi_file_item_slots(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
-        files = list(base_path.glob("*"))
+        files = natsorted(list(base_path.glob("*")))
         item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SLOTS)
         assert len(item.files) == 6
         assert item.name == "jpegs"
@@ -775,7 +776,7 @@ def test_create_multi_file_item_slots(self, setup_zip):
 
     def test_create_multi_file_item_series(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1" / "dicoms"
-        files = list(base_path.glob("*"))
+        files = natsorted(list(base_path.glob("*")))
         item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES)
         assert len(item.files) == 6
         assert item.name == "dicoms"
@@ -787,18 +788,18 @@ def test_create_multi_file_item_series(self, setup_zip):
 
     def test_create_multi_file_item_channels(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
-        files = list(base_path.glob("*"))
+        files = natsorted(list(base_path.glob("*")))
         item = MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS)
         assert len(item.files) == 6
         assert item.name == "jpegs"
         assert item.layout == {
             "version": 3,
-            "slots_grid": [[["4.jpg", "5.JPG", "7.JPG", "6.jpg", "3.JPG", "1.JPG"]]],
+            "slots_grid": [[["1.JPG", "3.JPG", "4.jpg", "5.JPG", "6.jpg", "7.JPG"]]],
         }
 
     def test_create_series_no_valid_files(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
-        files = list(base_path.glob("*"))
+        files = natsorted(list(base_path.glob("*")))
         with pytest.raises(
             ValueError, match="No DICOM files found in 1st level of directory"
         ):
@@ -806,7 +807,7 @@ def test_create_series_no_valid_files(self, setup_zip):
 
     def test_create_channels_too_many_files(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir2" / "too_many_channels"
-        files = list(base_path.glob("*"))
+        files = natsorted(list(base_path.glob("*")))
         with pytest.raises(
             ValueError,
             match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}",

From 3c678b650f4608f2a58f628a407d9468606c698c Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Fri, 30 Aug 2024 16:14:50 +0100
Subject: [PATCH 5/5] Fix for failing test on Windows

---
 darwin/cli_functions.py                     |  4 ++--
 darwin/dataset/remote_dataset_v2.py         | 15 ++++++++-------
 darwin/dataset/upload_manager.py            | 10 +++++-----
 darwin/options.py                           |  2 +-
 tests/darwin/dataset/remote_dataset_test.py |  4 ++--
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
index ce00f6a54..9f67fc1b7 100644
--- a/darwin/cli_functions.py
+++ b/darwin/cli_functions.py
@@ -687,8 +687,8 @@ def upload_data(
         Specify whether to have full traces print when uploading files or not.
     item_merge_mode : Optional[str]
         If set, each file path passed to `files_to_upload` behaves as follows:
-        - Every path that points directly to a file is ignored
-        - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
+        - Paths pointing directly to individual files are ignored
+        - Paths pointing to folders of files will be uploaded according to the following mode rules.
           Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
             - "slots": Each file in the folder will be uploaded to a different slot of the same item.
             - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py
index 063b402d6..fba60c8ff 100644
--- a/darwin/dataset/remote_dataset_v2.py
+++ b/darwin/dataset/remote_dataset_v2.py
@@ -202,19 +202,18 @@ def push(
             Optional callback, called every time the progress of an uploading files is reported.
         file_upload_callback: Optional[FileUploadCallback], default: None
             Optional callback, called every time a file chunk is uploaded.
-        item_merge_mode: Optional[str], default: None
+        item_merge_mode : Optional[str]
             If set, each file path passed to `files_to_upload` behaves as follows:
-            - Every path that points directly to a file is ignored
-            - Each folder of files passed to `files_to_upload` will be uploaded according to the following mode rules.
-              Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
+            - Paths pointing directly to individual files are ignored
+            - Paths pointing to folders of files will be uploaded according to the below mode rules.
+            Note that folders will not be recursively searched, so only files in the first level of the folder will be uploaded:
                 - "slots": Each file in the folder will be uploaded to a different slot of the same item.
                 - "series": All `.dcm` files in the folder will be concatenated into a single slot. All other files are ignored.
                 - "channels": Each file in the folder will be uploaded to a different channel of the same item.
-
         Returns
         -------
         handler : UploadHandler
-           Class for handling uploads, progress and error messages.
+            Class for handling uploads, progress and error messages.
 
         Raises
         ------
@@ -238,7 +237,9 @@ def push(
                 )
 
         if item_merge_mode and preserve_folders:
-            raise TypeError("Cannot use `preserve_folders` with `item_merge_mode`")
+            raise TypeError(
+                "`item_merge_mode` does not support preserving local file structures with `preserve_folders` or `--folders`"
+            )
 
         # Direct file paths
         uploading_files = [
diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py
index 855d9e415..7ba3fcd86 100644
--- a/darwin/dataset/upload_manager.py
+++ b/darwin/dataset/upload_manager.py
@@ -204,7 +204,7 @@ def __init__(self, directory: Path, files: List[Path], merge_mode: ItemMergeMode
 
     def _create_layout(self):
         """
-        Creates the layout to be used when uploading the files:
+        Creates the layout to be used when uploading the files as a dataset item:
         - For multi-slotted items: LayoutV2
         - For series items: LayoutV2, but only with `.dcm` files
         - For multi-channel items: LayoutV3
@@ -212,8 +212,8 @@ def _create_layout(self):
         Raises
         ------
         ValueError
-            - If no DICOM files are found in the directory for ItemMergeMode.SEIRES items
-            - If the number of files is greater than 16 for ItemMergeMode.CHANNELS items
+            - If no DICOM files are found in the directory for `ItemMergeMode.SERIES` items
+            - If the number of files is greater than 16 for `ItemMergeMode.CHANNELS` items
         """
         if self.merge_mode == ItemMergeMode.SLOTS:
             return {
@@ -224,7 +224,7 @@ def _create_layout(self):
         elif self.merge_mode == ItemMergeMode.SERIES:
             self.files = [file for file in self.files if file.suffix.lower() == ".dcm"]
             if not self.files:
-                raise ValueError("No DICOM files found in 1st level of directory")
+                raise ValueError("No `.dcm` files found in 1st level of directory")
             return {
                 "version": 2,
                 "slots": [str(i) for i in range(len(self.files))],
@@ -233,7 +233,7 @@ def _create_layout(self):
         elif self.merge_mode == ItemMergeMode.CHANNELS:
             if len(self.files) > 16:
                 raise ValueError(
-                    f"No multi-channel item can have more than 16 channels. The following directory has {len(self.files)} files: {self.directory}"
+                    f"No multi-channel item can have more than 16 files. The following directory has {len(self.files)} files: {self.directory}"
                 )
             return {"version": 3, "slots_grid": [[[file.name for file in self.files]]]}
 
diff --git a/darwin/options.py b/darwin/options.py
index 996c27b3a..6ac6c6717 100644
--- a/darwin/options.py
+++ b/darwin/options.py
@@ -187,7 +187,7 @@ def __init__(self) -> None:
             "--item-merge-mode",
             type=str,
             choices=["slots", "series", "channels"],
-            help="Specify the item merge mode: slots, series, or channels",
+            help="Specify the item merge mode: `slots`, `series`, or `channels`",
         )
 
         # Remove
diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
index 0c69b054c..72dea3d65 100644
--- a/tests/darwin/dataset/remote_dataset_test.py
+++ b/tests/darwin/dataset/remote_dataset_test.py
@@ -801,7 +801,7 @@ def test_create_series_no_valid_files(self, setup_zip):
         base_path = setup_zip / "push_test_dir" / "dir1" / "jpegs"
         files = natsorted(list(base_path.glob("*")))
         with pytest.raises(
-            ValueError, match="No DICOM files found in 1st level of directory"
+            ValueError, match="No `.dcm` files found in 1st level of directory"
         ):
             MultiFileItem(base_path, files, merge_mode=ItemMergeMode.SERIES)
 
@@ -810,7 +810,7 @@ def test_create_channels_too_many_files(self, setup_zip):
         files = natsorted(list(base_path.glob("*")))
         with pytest.raises(
             ValueError,
-            match=f"No multi-channel item can have more than 16 channels. The following directory has 17 files: {base_path}",
+            match=r"No multi-channel item can have more than 16 files. The following directory has 17 files: .*",
         ):
             MultiFileItem(base_path, files, merge_mode=ItemMergeMode.CHANNELS)