diff --git a/.circleci/config.yml b/.circleci/config.yml
index fce6131ce..57eb7e03c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,11 +33,13 @@ jobs:
             - /home/circleci/project/.poetry/virtualenvs
             - poetry.lock
       - run: poetry run python3 -c 'import kolena'
+      # TODO: fix underlying mypy issues with Python>3.9 rather than skipping
       - when:
           condition:
             not:
-              # TODO: upgrade mypy version to address # https://github.com/python/mypy/issues/13627
-              equal: [ "3.10", << parameters.python-version >> ]
+              or:
+                - equal: [ "3.10", << parameters.python-version >> ]
+                - equal: [ "3.11", << parameters.python-version >> ]
           steps:
             - run: poetry run pre-commit run -a
       - run:
@@ -108,7 +110,7 @@ workflows:
           name: unit-test-<< matrix.python-version >>
           matrix:
             parameters:
-              python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+              python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
       - integration-test:
           matrix:
             parameters:
diff --git a/kolena/_api/v1/batched_load.py b/kolena/_api/v1/batched_load.py
index e3bc94414..6a462bb66 100644
--- a/kolena/_api/v1/batched_load.py
+++ b/kolena/_api/v1/batched_load.py
@@ -25,11 +25,11 @@ class Path(str, Enum):
 
         @classmethod
         def upload_signed_url(cls, load_uuid: str) -> str:
-            return f"{cls.UPLOAD_SIGNED_URL_STUB}/{load_uuid}"
+            return f"{cls.UPLOAD_SIGNED_URL_STUB.value}/{load_uuid}"
 
         @classmethod
         def download_by_path(cls, path: str) -> str:
-            return f"{cls.DOWNLOAD_BY_PATH_STUB}/{path}"
+            return f"{cls.DOWNLOAD_BY_PATH_STUB.value}/{path}"
 
     @dataclass(frozen=True)
     class WithLoadUUID:
diff --git a/kolena/_api/v1/samples.py b/kolena/_api/v1/samples.py
deleted file mode 100644
index c3297df3c..000000000
--- a/kolena/_api/v1/samples.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2021-2023 Kolena Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from enum import Enum
-
-
-class SampleType(str, Enum):
-    LOCATOR = "LOCATOR"
-    LOCATOR_TEXT = "LOCATOR_TEXT"
diff --git a/kolena/_utils/asset_path_mapper.py b/kolena/_utils/asset_path_mapper.py
index 1d231797f..c98623517 100644
--- a/kolena/_utils/asset_path_mapper.py
+++ b/kolena/_utils/asset_path_mapper.py
@@ -30,7 +30,8 @@ def absolute_locator(self, test_run_id: int, load_uuid: str, image_id: int, key:
     def relative_locator(self, path_stub: str) -> str:
         return f"{self.prefix}/{path_stub}"
 
-    def path_stub(self, test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
+    @staticmethod
+    def path_stub(test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
         return f"{test_run_id}/{image_id}/{key}-{load_uuid}.png"
 
     def _absolute_locator(self, relative_locator: str) -> str:
diff --git a/kolena/_utils/batched_load.py b/kolena/_utils/batched_load.py
index 85e10d239..a55d8ab25 100644
--- a/kolena/_utils/batched_load.py
+++ b/kolena/_utils/batched_load.py
@@ -26,26 +26,20 @@
 import numpy as np
 import pandas as pd
 import requests
-from PIL import Image
-from requests_toolbelt import MultipartEncoder
 from retrying import retry
 
 from kolena._api.v1.batched_load import BatchedLoad as API
-from kolena._api.v1.fr import Asset as AssetAPI
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils.asset_path_mapper import AssetPathMapper
 from kolena._utils.datatypes import LoadableDataFrame
 from kolena._utils.serde import from_dict
-from kolena.fr._consts import _BatchSize
-from kolena.fr.datatypes import _ImageChipsDataFrame
 
 VALIDATION_COUNT_LIMIT = 100
 STAGE_STATUS__LOADED = "LOADED"
 
 
 def init_upload() -> API.InitiateUploadResponse:
-    init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD)
+    init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD.value)
     krequests.raise_for_status(init_res)
     init_response = from_dict(data_class=API.InitiateUploadResponse, data=init_res.json())
     return init_response
@@ -78,46 +72,6 @@ def upload_data_frame_chunk(df_chunk: pd.DataFrame, load_uuid: str) -> None:
     krequests.raise_for_status(upload_response)
 
 
-def upload_image_chips(
-    df: _ImageChipsDataFrame,
-    path_mapper: AssetPathMapper,
-    batch_size: int = _BatchSize.UPLOAD_CHIPS,
-) -> None:
-    def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
-        df_batch = df_batch.reset_index(drop=True)  # reset indices so we match the signed_url indices
-
-        def as_buffer(image_raw: np.ndarray) -> io.BytesIO:
-            pil_image = Image.fromarray(image_raw).convert("RGB")
-            buf = io.BytesIO()
-            pil_image.save(buf, "png")
-            buf.seek(0)
-            return buf
-
-        data = MultipartEncoder(
-            fields=[
-                (
-                    "files",
-                    (
-                        path_mapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]),
-                        as_buffer(row["image"]),
-                    ),
-                )
-                for _, row in df_batch.iterrows()
-            ],
-        )
-        upload_response = krequests.put(
-            endpoint_path=AssetAPI.Path.BULK_UPLOAD,
-            data=data,
-            headers={"Content-Type": data.content_type},
-        )
-        krequests.raise_for_status(upload_response)
-
-    num_chunks = math.ceil(len(df) / batch_size)
-    chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
-    for df_chunk in chunk_iter:
-        upload_batch(df_chunk)
-
-
 DFType = TypeVar("DFType", bound=LoadableDataFrame)
 
 
@@ -157,7 +111,7 @@ def complete_load(uuid: Optional[str]) -> None:
             return
         complete_request = API.CompleteDownloadRequest(uuid=uuid)
         complete_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_DOWNLOAD,
+            endpoint_path=API.Path.COMPLETE_DOWNLOAD.value,
             data=json.dumps(dataclasses.asdict(complete_request)),
         )
         krequests.raise_for_status(complete_res)
diff --git a/kolena/_utils/_consts.py b/kolena/_utils/consts.py
similarity index 93%
rename from kolena/_utils/_consts.py
rename to kolena/_utils/consts.py
index fbed44f20..220a691d4 100644
--- a/kolena/_utils/_consts.py
+++ b/kolena/_utils/consts.py
@@ -14,8 +14,8 @@
 from enum import Enum
 
 
-class _BatchSize(int, Enum):
-    UPLOAD_CHIPS = 5_000
+class BatchSize(int, Enum):
+    UPLOAD_CHIPS = 1_000
     UPLOAD_RECORDS = 10_000_000
     UPLOAD_RESULTS = 1_000_000
 
diff --git a/kolena/_utils/instrumentation.py b/kolena/_utils/instrumentation.py
index a0b70ac75..6dd593ab4 100644
--- a/kolena/_utils/instrumentation.py
+++ b/kolena/_utils/instrumentation.py
@@ -54,7 +54,7 @@ def upload_log(message: str, status: str) -> None:
         message=message,
         status=status,
     )
-    krequests.post(endpoint_path=API.Path.UPLOAD, json=dataclasses.asdict(request))
+    krequests.post(endpoint_path=API.Path.UPLOAD.value, json=dataclasses.asdict(request))
 
 
 def log_telemetry(e: BaseException) -> None:
@@ -62,7 +62,7 @@ def log_telemetry(e: BaseException) -> None:
         stack = tb.format_stack()
         exc_format = tb.format_exception(None, e, e.__traceback__)
         combined = stack + exc_format
-        upload_log("".join(combined), DatadogLogLevels.ERROR)
+        upload_log("".join(combined), DatadogLogLevels.ERROR.value)
     except BaseException:
         """
         Attempting to upload the telemetry is best-effort. We don't want to have exceptions in that
diff --git a/kolena/_utils/repository.py b/kolena/_utils/repository.py
index 8ce23a021..17564c2fc 100644
--- a/kolena/_utils/repository.py
+++ b/kolena/_utils/repository.py
@@ -21,7 +21,7 @@
 
 def create(repository: str) -> None:
     response = krequests.post(
-        endpoint_path=Path.CREATE,
+        endpoint_path=Path.CREATE.value,
         data=json.dumps(dataclasses.asdict(CreateRepositoryRequest(repository=repository))),
     )
     krequests.raise_for_status(response)
diff --git a/kolena/detection/_internal/model.py b/kolena/detection/_internal/model.py
index 870017026..b5e16b282 100644
--- a/kolena/detection/_internal/model.py
+++ b/kolena/detection/_internal/model.py
@@ -33,9 +33,9 @@
 from kolena._api.v1.workflow import WorkflowType
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import DFType
+from kolena._utils.consts import BatchSize
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import WithTelemetry
 from kolena._utils.serde import from_dict
@@ -93,7 +93,7 @@ def __init__(self, name: str, workflow: WorkflowType, metadata: Optional[Dict[st
     def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) -> CoreAPI.EntityData:
         log.info(f"creating new model '{name}'")
         request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=workflow.value)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"created new model '{name}'")
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
@@ -102,7 +102,7 @@ def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) ->
     @validate_arguments(config=ValidatorConfig)
     def _load_by_name(cls, name: str) -> CoreAPI.EntityData:
         request = CoreAPI.LoadByNameRequest(name=name)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
 
@@ -131,7 +131,7 @@ def iter_inferences(
     def _iter_inference_batch_for_reference(
         self,
         test_object: Union[_TestCaseClass, _TestSuiteClass],
-        batch_size: int = _BatchSize.LOAD_SAMPLES,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
     ) -> Iterator[_LoadInferencesDataFrameClass]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -143,7 +143,7 @@ def _iter_inference_batch_for_reference(
         init_request = API.InitLoadInferencesRequest(**params)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_INFERENCES,
+            endpoint_path=API.Path.INIT_LOAD_INFERENCES.value,
             df_class=self._LoadInferencesDataFrameClass,
         )
         log.success(f"loaded inferences from model '{self.name}' on {test_object_display_name}")
@@ -166,7 +166,7 @@ def load_inferences_by_test_case(
     def _iter_inference_batch_for_test_suite(
         self,
         test_suite: _TestSuiteClass,
-        batch_size: int = _BatchSize.LOAD_SAMPLES,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
     ) -> Iterator[_LoadInferencesDataFrameClass]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
@@ -175,7 +175,7 @@ def _iter_inference_batch_for_test_suite(
         init_request = API.InitLoadInferencesByTestCaseRequest(**params)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE,
+            endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE.value,
             df_class=self._LoadInferencesDataFrameClass,
         )
         log.success(f"loaded inferences from model '{self.name}' on test suite '{test_suite.name}'")
diff --git a/kolena/detection/_internal/test_case.py b/kolena/detection/_internal/test_case.py
index 3def6cf96..a554c51e1 100644
--- a/kolena/detection/_internal/test_case.py
+++ b/kolena/detection/_internal/test_case.py
@@ -30,11 +30,11 @@
 from kolena._api.v1.workflow import WorkflowType
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import DFType
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import WithTelemetry
@@ -128,7 +128,7 @@ def _create(
         """Create a new test case with the provided name."""
         log.info(f"creating new test case '{name}'")
         request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=workflow.value)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -142,7 +142,7 @@ def _create(
     def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.EntityData:
         """Load an existing test case with the provided name."""
         request = CoreAPI.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         return from_dict(data_class=CoreAPI.EntityData, data=res.json())
 
@@ -173,10 +173,10 @@ def load_images(self) -> List[_TestImageClass]:
     def iter_images(self) -> Iterator[_TestImageClass]:
         """Iterate through all images with their associated ground truths in this test case."""
         log.info(f"loading test images for test case '{self.name}'")
-        init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
+        init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
         for df in _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_IMAGES,
+            endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
             df_class=self._TestImageDataFrameClass,
         ):
             for record in df.itertuples():
@@ -312,7 +312,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
         init_response = init_upload()
         df = self._to_data_frame(list(editor._images.values()))
         df_serialized = df.as_serializable()
-        upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
+        upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
 
         request = CoreAPI.CompleteEditRequest(
             test_case_id=self._id,
@@ -322,7 +322,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             uuid=init_response.uuid,
         )
         complete_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_EDIT,
+            endpoint_path=API.Path.COMPLETE_EDIT.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(complete_res)
diff --git a/kolena/detection/_internal/test_run.py b/kolena/detection/_internal/test_run.py
index dc361872e..bf13ff9d9 100644
--- a/kolena/detection/_internal/test_run.py
+++ b/kolena/detection/_internal/test_run.py
@@ -35,11 +35,11 @@
 from kolena._api.v1.detection import TestRun as API
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import DFType
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame_chunk
+from kolena._utils.consts import BatchSize
 from kolena._utils.datatypes import LoadableDataFrame
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import report_crash
@@ -97,7 +97,10 @@ def __init__(
             test_suite_ids=[test_suite._id],
             config=config,
         )
-        res = krequests.post(endpoint_path=API.Path.CREATE_OR_RETRIEVE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(
+            endpoint_path=API.Path.CREATE_OR_RETRIEVE.value,
+            data=json.dumps(dataclasses.asdict(request)),
+        )
         krequests.raise_for_status(res)
         response = from_dict(data_class=API.CreateOrRetrieveResponse, data=res.json())
         self._id = response.test_run_id
@@ -128,7 +131,7 @@ def __exit__(
         self._submit_custom_metrics()
         self._active = False
         if exc_type is not None:
-            report_crash(self._id, API.Path.MARK_CRASHED)
+            report_crash(self._id, API.Path.MARK_CRASHED.value)
 
     @validate_arguments(config=ValidatorConfig)
     def add_inferences(self, image: _TestImageClass, inferences: Optional[List[_InferenceClass]]) -> None:
@@ -160,7 +163,7 @@ def add_inferences(self, image: _TestImageClass, inferences: Optional[List[_Infe
 
             self._inferences[image_id] = context_image_inferences
 
-        if self._n_inferences >= _BatchSize.UPLOAD_RESULTS:
+        if self._n_inferences >= BatchSize.UPLOAD_RESULTS.value:
             log.info(f"uploading batch of '{self._n_inferences}' inference results")
             self._upload_chunk()
             log.success(f"uploaded batch of '{self._n_inferences}' inference results")
@@ -176,7 +179,7 @@ def iter_images(self) -> Iterator[_TestImageClass]:
                 yield self._image_from_load_image_record(record)
 
     @validate_arguments(config=ValidatorConfig)
-    def load_images(self, batch_size: int = _BatchSize.LOAD_SAMPLES) -> List[_TestImageClass]:
+    def load_images(self, batch_size: int = BatchSize.LOAD_SAMPLES.value) -> List[_TestImageClass]:
         """
         Returns a list of images that still need inferences evaluated, bounded in count
         by batch_size. Note that image ground truths will be excluded from the returned
@@ -195,7 +198,10 @@ def load_images(self, batch_size: int = _BatchSize.LOAD_SAMPLES) -> List[_TestIm
         return [self._image_from_load_image_record(record) for record in df_image_batch.itertuples()]
 
     @validate_arguments(config=ValidatorConfig)
-    def _iter_image_batch(self, batch_size: int = _BatchSize.LOAD_SAMPLES) -> Iterator[_LoadTestImagesDataFrameClass]:
+    def _iter_image_batch(
+        self,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
+    ) -> Iterator[_LoadTestImagesDataFrameClass]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
         init_request = API.InitLoadRemainingImagesRequest(
@@ -205,7 +211,7 @@ def _iter_image_batch(self, batch_size: int = _BatchSize.LOAD_SAMPLES) -> Iterat
         )
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_REMAINING_IMAGES,
+            endpoint_path=API.Path.INIT_LOAD_REMAINING_IMAGES.value,
             df_class=self._LoadTestImagesDataFrameClass,
         )
 
@@ -239,7 +245,7 @@ def _finalize_upload(self) -> None:
         log.info("finalizing inference upload for test run")
         request = API.UploadImageResultsRequest(uuid=self._upload_uuid, test_run_id=self._id, reset=self._reset)
         finalize_res = krequests.put(
-            endpoint_path=API.Path.UPLOAD_IMAGE_RESULTS,
+            endpoint_path=API.Path.UPLOAD_IMAGE_RESULTS.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(finalize_res)
@@ -289,6 +295,9 @@ def _submit_custom_metrics(self) -> None:
         log.info("submitting custom metrics for test run")
         custom_metrics = self._compute_custom_metrics()
         request = API.UpdateCustomMetricsRequest(model_id=self._model._id, metrics=custom_metrics)
-        res = krequests.put(endpoint_path=API.Path.UPLOAD_CUSTOM_METRICS, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(
+            endpoint_path=API.Path.UPLOAD_CUSTOM_METRICS.value,
+            data=json.dumps(dataclasses.asdict(request)),
+        )
         krequests.raise_for_status(res)
         log.success("submitted custom metrics for test run")
diff --git a/kolena/detection/_internal/test_suite.py b/kolena/detection/_internal/test_suite.py
index 798423dc9..6448debff 100644
--- a/kolena/detection/_internal/test_suite.py
+++ b/kolena/detection/_internal/test_suite.py
@@ -112,7 +112,7 @@ def _create(
         """Create a new test suite with the provided name."""
         log.info(f"creating new test suite '{name}'")
         request = CoreAPI.TestSuite.CreateRequest(name=name, description=description or "", workflow=workflow.value)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.TestSuite.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -127,7 +127,7 @@ def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.Test
         """Retrieve the existing test suite with the provided name."""
         request = CoreAPI.TestSuite.LoadByNameRequest(name=name, version=version)
         data = json.dumps(dataclasses.asdict(request))
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=data)
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=data)
         krequests.raise_for_status(res)
         return from_dict(data_class=CoreAPI.TestSuite.EntityData, data=res.json())
 
@@ -298,7 +298,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             test_case_ids=list(editor._test_cases.values()),
         )
         data = json.dumps(dataclasses.asdict(request))
-        res = krequests.post(endpoint_path=API.Path.EDIT, data=data)
+        res = krequests.post(endpoint_path=API.Path.EDIT.value, data=data)
         krequests.raise_for_status(res)
         log.success(f"updated test suite '{self.name}'")
         test_suite_data = from_dict(data_class=CoreAPI.TestSuite.EntityData, data=res.json())
diff --git a/kolena/detection/test_image.py b/kolena/detection/test_image.py
index 70ddc1436..0ebb900dc 100644
--- a/kolena/detection/test_image.py
+++ b/kolena/detection/test_image.py
@@ -23,8 +23,8 @@
 from pydantic import validate_arguments
 
 from kolena._api.v1.detection import TestImage as API
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
+from kolena._utils.consts import BatchSize
 from kolena._utils.validators import ValidatorConfig
 from kolena.detection._datatypes import TestImageDataFrame
 from kolena.detection._internal import BaseTestImage
@@ -103,10 +103,10 @@ def iter_images(dataset: Optional[str] = None) -> Iterator[TestImage]:
     :param dataset: optionally specify the single dataset to be retrieved. By default, images from all
         datasets are returned
     """
-    init_request = API.InitLoadImagesRequest(dataset=dataset, batch_size=_BatchSize.LOAD_RECORDS)
+    init_request = API.InitLoadImagesRequest(dataset=dataset, batch_size=BatchSize.LOAD_RECORDS.value)
     for df in _BatchedLoader.iter_data(
         init_request=init_request,
-        endpoint_path=API.Path.INIT_LOAD_IMAGES,
+        endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
         df_class=TestImageDataFrame,
     ):
         for record in df.itertuples():
diff --git a/kolena/fr/_consts.py b/kolena/fr/_consts.py
deleted file mode 100644
index 3355bbafb..000000000
--- a/kolena/fr/_consts.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2021-2023 Kolena Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from enum import Enum
-
-
-class _BatchSize(int, Enum):
-    UPLOAD_CHIPS = 5_000
-    UPLOAD_RECORDS = 10_000_000
diff --git a/kolena/fr/_utils.py b/kolena/fr/_utils.py
new file mode 100644
index 000000000..a1cbb145c
--- /dev/null
+++ b/kolena/fr/_utils.py
@@ -0,0 +1,52 @@
+# Copyright 2021-2023 Kolena Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import math
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+from PIL import Image
+from requests_toolbelt import MultipartEncoder
+
+from kolena._api.v1.fr import Asset as AssetAPI
+from kolena._utils import krequests
+from kolena._utils.asset_path_mapper import AssetPathMapper
+from kolena._utils.consts import BatchSize
+from kolena.fr.datatypes import _ImageChipsDataFrame
+
+
+def upload_image_chips(df: _ImageChipsDataFrame, batch_size: int = BatchSize.UPLOAD_CHIPS.value) -> None:
+    def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
+        df_batch = df_batch.reset_index(drop=True)  # reset indices so we match the signed_url indices
+
+        def as_path_stub_and_buffer(row: pd.Series) -> Tuple[str, io.BytesIO]:
+            pil_image = Image.fromarray(row["image"]).convert("RGB")
+            image_buf = io.BytesIO()
+            pil_image.save(image_buf, "png")
+            image_buf.seek(0)
+            return AssetPathMapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]), image_buf
+
+        data = MultipartEncoder(fields=[("files", as_path_stub_and_buffer(row)) for _, row in df_batch.iterrows()])
+        upload_response = krequests.put(
+            endpoint_path=AssetAPI.Path.BULK_UPLOAD.value,
+            data=data,
+            headers={"Content-Type": data.content_type},
+        )
+        krequests.raise_for_status(upload_response)
+
+    num_chunks = math.ceil(len(df) / batch_size)
+    chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
+    for df_chunk in chunk_iter:
+        upload_batch(df_chunk)
diff --git a/kolena/fr/model.py b/kolena/fr/model.py
index 94c22468c..f8f813b90 100644
--- a/kolena/fr/model.py
+++ b/kolena/fr/model.py
@@ -58,7 +58,7 @@ def create(cls, name: str, metadata: Dict[str, Any]) -> "Model":
         """
         log.info(f"creating model '{name}'")
         request = API.CreateRequest(name=name, metadata=metadata)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"created model '{name}'")
         return Model.__factory__(from_dict(data_class=Model.Data, data=res.json()))
@@ -74,7 +74,7 @@ def load_by_name(cls, name: str) -> "Model":
         """
         log.info(f"loading model '{name}'")
         request = API.LoadByNameRequest(name=name)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"loaded model '{name}'")
         return Model.__factory__(from_dict(data_class=Model.Data, data=res.json()))
@@ -146,7 +146,7 @@ def iter_pair_results(
         init_request = API.InitLoadPairResultsRequest(batch_size=batch_size, **base_load_request)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_PAIR_RESULTS,
+            endpoint_path=API.Path.INIT_LOAD_PAIR_RESULTS.value,
             df_class=LoadedPairResultDataFrame,
         )
         log.success(f"loaded pair results from model '{self.data.name}' on {test_object_display_name}")
diff --git a/kolena/fr/test_case.py b/kolena/fr/test_case.py
index 93e21f45b..b386db8c7 100644
--- a/kolena/fr/test_case.py
+++ b/kolena/fr/test_case.py
@@ -31,13 +31,13 @@
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import WithTelemetry
 from kolena._utils.serde import from_dict
 from kolena._utils.validators import ValidatorConfig
 from kolena.errors import NotFoundError
-from kolena.fr._consts import _BatchSize
 from kolena.fr.datatypes import TEST_CASE_COLUMNS
 from kolena.fr.datatypes import TestCaseDataFrame
 from kolena.fr.datatypes import TestCaseDataFrameSchema
@@ -126,7 +126,7 @@ def create(
         """
         log.info(f"creating new test case '{name}'")
         request = API.CreateRequest(name=name, description=description or "")
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=API.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -164,7 +164,7 @@ def load_by_name(cls, name: str, version: Optional[int] = None) -> "TestCase":
     def _load_by_name(cls, name: str, version: Optional[int] = None) -> "TestCase":
         log.info(f"loading test case '{name}'")
         request = API.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=API.EntityData, data=res.json())
         log.success(f"loaded test case '{name}'")
@@ -211,7 +211,7 @@ def _populate_from_other(self, other: "TestCase") -> None:
             self.image_count = other.image_count
             self.pair_count_genuine = other.pair_count_genuine
             self.pair_count_imposter = other.pair_count_imposter
-            self.data = other.data
+            self.data = other._data
 
     class Editor:
         _samples: Dict[str, TestCaseRecord]
@@ -319,7 +319,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
         df = pd.DataFrame(editor._samples.values(), columns=TEST_CASE_COLUMNS)
         df_validated = validate_df_schema(df, TestCaseDataFrameSchema)
 
-        upload_data_frame(df=df_validated, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
+        upload_data_frame(df=df_validated, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
         request = API.CompleteEditRequest(
             test_case_id=self._id,
             current_version=self.version,
@@ -328,7 +328,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             uuid=init_response.uuid,
         )
         complete_res = krequests.post(
-            endpoint_path=API.Path.COMPLETE_EDIT,
+            endpoint_path=API.Path.COMPLETE_EDIT.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(complete_res)
@@ -348,7 +348,7 @@ def iter_data(self, batch_size: int = 10_000_000) -> Iterator[TestCaseDataFrame]
         init_request = API.InitLoadDataRequest(batch_size=batch_size, test_case_id=self._id)
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_DATA,
+            endpoint_path=API.Path.INIT_LOAD_DATA.value,
             df_class=TestCaseDataFrame,
         )
         log.success(f"loaded image pairs in test case '{self.name}'")
diff --git a/kolena/fr/test_images.py b/kolena/fr/test_images.py
index 4d3af3aef..630cc91be 100644
--- a/kolena/fr/test_images.py
+++ b/kolena/fr/test_images.py
@@ -34,12 +34,12 @@
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.uninstantiable import Uninstantiable
 from kolena._utils.validators import ValidatorConfig
 from kolena.fr import TestCase
 from kolena.fr import TestSuite
-from kolena.fr._consts import _BatchSize
 from kolena.fr.datatypes import TEST_IMAGE_COLUMNS
 from kolena.fr.datatypes import TestImageDataFrame
 from kolena.fr.datatypes import TestImageDataFrameSchema
@@ -184,10 +184,10 @@ def register(cls) -> Iterator[Registrar]:
         df_validated = TestImageDataFrame(validate_df_schema(df, TestImageDataFrameSchema))
         df_serializable = df_validated.as_serializable()
 
-        upload_data_frame(df=df_serializable, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
+        upload_data_frame(df=df_serializable, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
         request = LoadAPI.WithLoadUUID(uuid=init_response.uuid)
         finalize_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_REGISTER,
+            endpoint_path=API.Path.COMPLETE_REGISTER.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(finalize_res)
@@ -228,7 +228,7 @@ def iter(
         )
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_REQUEST,
+            endpoint_path=API.Path.INIT_LOAD_REQUEST.value,
             df_class=TestImageDataFrame,
         )
         log.success(f"loaded test images{from_extra}")
diff --git a/kolena/fr/test_run.py b/kolena/fr/test_run.py
index b349bf80d..a7ea3b60a 100644
--- a/kolena/fr/test_run.py
+++ b/kolena/fr/test_run.py
@@ -31,7 +31,7 @@
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
-from kolena._utils.batched_load import upload_image_chips
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_record_count
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.frozen import Frozen
@@ -43,7 +43,7 @@
 from kolena.fr import InferenceModel
 from kolena.fr import Model
 from kolena.fr import TestSuite
-from kolena.fr._consts import _BatchSize
+from kolena.fr._utils import upload_image_chips
 from kolena.fr.datatypes import _ImageChipsDataFrame
 from kolena.fr.datatypes import _ResultStageFrame
 from kolena.fr.datatypes import EmbeddingDataFrame
@@ -82,7 +82,10 @@ def __init__(self, model: Model, test_suite: TestSuite, reset: bool = False):
             log.info("reset flag is disabled. update existing inferences by enabling the reset flag")
 
         request = API.CreateOrRetrieveRequest(model_id=model.data.id, test_suite_ids=[test_suite._id], reset=reset)
-        res = krequests.post(endpoint_path=API.Path.CREATE_OR_RETRIEVE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(
+            endpoint_path=API.Path.CREATE_OR_RETRIEVE.value,
+            data=json.dumps(dataclasses.asdict(request)),
+        )
         krequests.raise_for_status(res)
         response = from_dict(data_class=TestRun.Data, data=res.json())
 
@@ -127,7 +130,7 @@ def load_remaining_images(self, batch_size: int = 10_000_000) -> ImageDataFrame:
             load_all=self._reset,
         )
         with krequests.put(
-            endpoint_path=API.Path.INIT_LOAD_REMAINING_IMAGES,
+            endpoint_path=API.Path.INIT_LOAD_REMAINING_IMAGES.value,
             data=json.dumps(dataclasses.asdict(init_request)),
             stream=True,
         ) as init_res:
@@ -165,7 +168,7 @@ def upload_image_results(self, df_image_result: ImageResultDataFrame) -> int:
         log.info("uploading inference results for test run")
         init_response = init_upload()
 
-        asset_config_res = krequests.get(endpoint_path=AssetAPI.Path.CONFIG)
+        asset_config_res = krequests.get(endpoint_path=AssetAPI.Path.CONFIG.value)
         krequests.raise_for_status(asset_config_res)
         asset_config = from_dict(data_class=AssetAPI.Config, data=asset_config_res.json())
         asset_path_mapper = AssetPathMapper(asset_config)
@@ -177,18 +180,18 @@ def upload_image_results(self, df_image_result: ImageResultDataFrame) -> int:
             load_uuid=init_response.uuid,
             df=df_validated,
         )
-        upload_image_chips(df_image_chips, asset_path_mapper)
+        upload_image_chips(df_image_chips)
         df_result_stage = _ResultStageFrame.from_image_result_data_frame(
             test_run_id=self.data.id,
             load_uuid=init_response.uuid,
             df=df_validated,
             path_mapper=asset_path_mapper,
         )
-        upload_data_frame(df_result_stage, _BatchSize.UPLOAD_RECORDS, init_response.uuid)
+        upload_data_frame(df_result_stage, BatchSize.UPLOAD_RECORDS.value, init_response.uuid)
 
         request = API.UploadImageResultsRequest(uuid=init_response.uuid, test_run_id=self.data.id, reset=self._reset)
         finalize_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_UPLOAD_IMAGE_RESULTS,
+            endpoint_path=API.Path.COMPLETE_UPLOAD_IMAGE_RESULTS.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(finalize_res)
@@ -223,7 +226,7 @@ def load_remaining_pairs(self, batch_size: int = 10_000_000) -> Tuple[EmbeddingD
             load_all=self._reset,
         )
         with krequests.put(
-            endpoint_path=API.Path.INIT_LOAD_REMAINING_PAIRS,
+            endpoint_path=API.Path.INIT_LOAD_REMAINING_PAIRS.value,
             data=json.dumps(dataclasses.asdict(init_request)),
             stream=True,
         ) as init_res:
@@ -280,11 +283,11 @@ def upload_pair_results(self, df_pair_result: PairResultDataFrame) -> int:
 
         df_validated = validate_df_schema(df_pair_result, PairResultDataFrameSchema)
         validate_df_record_count(df_validated)
-        upload_data_frame(df_validated, _BatchSize.UPLOAD_RECORDS, init_response.uuid)
+        upload_data_frame(df_validated, BatchSize.UPLOAD_RECORDS.value, init_response.uuid)
 
         request = API.UploadPairResultsRequest(uuid=init_response.uuid, test_run_id=self.data.id, reset=self._reset)
         finalize_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_UPLOAD_PAIR_RESULTS,
+            endpoint_path=API.Path.COMPLETE_UPLOAD_PAIR_RESULTS.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(finalize_res)
@@ -339,5 +342,5 @@ def test(model: InferenceModel, test_suite: TestSuite, reset: bool = False) -> N
         log.success("completed test run")
 
     except Exception as e:
-        report_crash(test_run.data.id, API.Path.MARK_CRASHED)
+        report_crash(test_run.data.id, API.Path.MARK_CRASHED.value)
         raise e
diff --git a/kolena/fr/test_suite.py b/kolena/fr/test_suite.py
index 6b57d1078..5f032490b 100644
--- a/kolena/fr/test_suite.py
+++ b/kolena/fr/test_suite.py
@@ -124,7 +124,7 @@ def create(
         """
         log.info(f"creating test suite '{name}'")
         request = API.CreateRequest(name=name, description=description or "")
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=API.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -162,7 +162,7 @@ def load_by_name(cls, name: str, version: Optional[int] = None) -> "TestSuite":
     def _load_by_name(cls, name: str, version: Optional[int] = None) -> "TestSuite":
         log.info(f"loading test suite '{name}'")
         request = API.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"loaded test suite '{name}'")
         return cls._create_from_data(from_dict(data_class=API.EntityData, data=res.json()))
@@ -178,7 +178,7 @@ def _populate_from_other(self, other: "TestSuite") -> None:
             self.baseline_image_count = other.baseline_image_count
             self.baseline_pair_count_genuine = other.baseline_pair_count_genuine
             self.baseline_pair_count_imposter = other.baseline_pair_count_imposter
-            self.data = other.data
+            self.data = other._data
 
     @classmethod
     def _create_from_data(cls, data: API.EntityData) -> "TestSuite":
@@ -336,7 +336,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             baseline_test_case_ids=list(editor._baseline_test_cases.values()),
             non_baseline_test_case_ids=list(editor._non_baseline_test_cases.values()),
         )
-        res = krequests.post(endpoint_path=API.Path.EDIT, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.EDIT.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         test_suite_data = from_dict(data_class=API.EntityData, data=res.json())
         self._populate_from_other(self._create_from_data(test_suite_data))
diff --git a/kolena/workflow/evaluator_function.py b/kolena/workflow/evaluator_function.py
index f30d0e945..3074d3222 100644
--- a/kolena/workflow/evaluator_function.py
+++ b/kolena/workflow/evaluator_function.py
@@ -181,7 +181,7 @@ def _update_progress(self, test_case: TestCase) -> None:
             message=message,
         )
         res = krequests.put(
-            endpoint_path=API.Path.UPDATE_METRICS_STATUS,
+            endpoint_path=API.Path.UPDATE_METRICS_STATUS.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(res)
diff --git a/kolena/workflow/model.py b/kolena/workflow/model.py
index fe2273504..32f4794a9 100644
--- a/kolena/workflow/model.py
+++ b/kolena/workflow/model.py
@@ -29,8 +29,8 @@
 from kolena._api.v1.generic import Model as API
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
+from kolena._utils.consts import BatchSize
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import telemetry
 from kolena._utils.instrumentation import WithTelemetry
@@ -114,7 +114,7 @@ def create(
         log.info(f"creating model '{name}'")
         metadata = metadata or {}
         request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=cls.workflow.name)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         log.success(f"created model '{name}'")
         return cls._from_data_with_infer(from_dict(data_class=CoreAPI.EntityData, data=res.json()), infer)
@@ -128,7 +128,7 @@ def load(cls, name: str, infer: Optional[Callable[[TestSample], Inference]] = No
         :param infer: optional inference function for this model.
         """
         request = CoreAPI.LoadByNameRequest(name=name)
-        res = krequests.put(endpoint_path=API.Path.LOAD, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         return cls._from_data_with_infer(from_dict(data_class=CoreAPI.EntityData, data=res.json()), infer)
 
@@ -156,9 +156,9 @@ def iter_inferences(self, test_case: TestCase) -> Iterator[Tuple[TestSample, Gro
             init_request=API.LoadInferencesRequest(
                 model_id=self._id,
                 test_case_id=test_case._id,
-                batch_size=_BatchSize.LOAD_SAMPLES,
+                batch_size=BatchSize.LOAD_SAMPLES.value,
             ),
-            endpoint_path=API.Path.LOAD_INFERENCES,
+            endpoint_path=API.Path.LOAD_INFERENCES.value,
             df_class=TestSampleDataFrame,
         ):
             for record in df_batch.itertuples():
diff --git a/kolena/workflow/test_case.py b/kolena/workflow/test_case.py
index 1bb06d6e4..0f145a240 100644
--- a/kolena/workflow/test_case.py
+++ b/kolena/workflow/test_case.py
@@ -28,10 +28,10 @@
 from kolena._api.v1.generic import TestCase as API
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import telemetry
@@ -158,7 +158,7 @@ def create(
         log.info(f"creating new test case '{name}'")
         cls._validate_test_samples(test_samples)
         request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=cls.workflow.name)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -178,7 +178,7 @@ def load(cls, name: str, version: Optional[int] = None) -> "TestCase":
         :return: the loaded test case.
         """
         request = CoreAPI.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         return cls._create_from_data(data)
@@ -192,10 +192,10 @@ def iter_test_samples(self) -> Iterator[Tuple[TestSample, GroundTruth]]:
         log.info(f"loading test samples in test case '{self.name}'")
         test_sample_type = self.workflow.test_sample_type
         ground_truth_type = self.workflow.ground_truth_type
-        init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
+        init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
         for df in _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.INIT_LOAD_TEST_SAMPLES,
+            endpoint_path=API.Path.INIT_LOAD_TEST_SAMPLES.value,
             df_class=TestSampleDataFrame,
         ):
             has_metadata = "test_sample_metadata" in df.columns
@@ -294,7 +294,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
         log.info(f"updating test case '{self.name}'")
         init_response = init_upload()
         df_serialized = editor._to_data_frame().as_serializable()
-        upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
+        upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)
 
         request = CoreAPI.CompleteEditRequest(
             test_case_id=self._id,
@@ -304,7 +304,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             uuid=init_response.uuid,
         )
         complete_res = krequests.put(
-            endpoint_path=API.Path.COMPLETE_EDIT,
+            endpoint_path=API.Path.COMPLETE_EDIT.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(complete_res)
diff --git a/kolena/workflow/test_run.py b/kolena/workflow/test_run.py
index 327a8ff7b..664764b41 100644
--- a/kolena/workflow/test_run.py
+++ b/kolena/workflow/test_run.py
@@ -32,10 +32,10 @@
 from kolena._api.v1.generic import TestRun as API
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
 from kolena._utils.batched_load import init_upload
 from kolena._utils.batched_load import upload_data_frame_chunk
+from kolena._utils.consts import BatchSize
 from kolena._utils.dataframes.validators import validate_df_schema
 from kolena._utils.endpoints import get_results_url
 from kolena._utils.frozen import Frozen
@@ -150,7 +150,10 @@ def __init__(
             evaluator=evaluator_display_name,
             configurations=api_configurations,
         )
-        res = krequests.put(endpoint_path=API.Path.CREATE_OR_RETRIEVE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(
+            endpoint_path=API.Path.CREATE_OR_RETRIEVE.value,
+            data=json.dumps(dataclasses.asdict(request)),
+        )
         krequests.raise_for_status(res)
         response = from_dict(data_class=API.CreateOrRetrieveResponse, data=res.json())
         self._id = response.test_run_id
@@ -175,7 +178,7 @@ def run(self) -> None:
 
             self.evaluate()
         except Exception as e:
-            report_crash(self._id, API.Path.MARK_CRASHED)
+            report_crash(self._id, API.Path.MARK_CRASHED.value)
             raise e
 
     def load_test_samples(self) -> List[TestSample]:
@@ -207,9 +210,9 @@ def _iter_all_inferences(self) -> Iterator[Tuple[TestSample, GroundTruth, Infere
         for df_batch in _BatchedLoader.iter_data(
             init_request=API.LoadTestSampleInferencesRequest(
                 test_run_id=self._id,
-                batch_size=_BatchSize.LOAD_SAMPLES,
+                batch_size=BatchSize.LOAD_SAMPLES.value,
             ),
-            endpoint_path=API.Path.LOAD_INFERENCES,
+            endpoint_path=API.Path.LOAD_INFERENCES.value,
             df_class=TestSampleDataFrame,
         ):
             for record in df_batch.itertuples():
@@ -238,7 +241,10 @@ def upload_inferences(self, inferences: List[Tuple[TestSample, Inference]]) -> N
         upload_data_frame_chunk(df_serializable, init_response.uuid)
 
         request = API.UploadInferencesRequest(uuid=init_response.uuid, test_run_id=self._id, reset=self.reset)
-        res = krequests.put(endpoint_path=API.Path.UPLOAD_INFERENCES, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(
+            endpoint_path=API.Path.UPLOAD_INFERENCES.value,
+            data=json.dumps(dataclasses.asdict(request)),
+        )
         krequests.raise_for_status(res)
 
     def evaluate(self) -> None:
@@ -374,7 +380,10 @@ def process_results(results: Optional[EvaluationResults], config: Optional[Evalu
         log.info("uploading test suite metrics")
         self._upload_test_suite_metrics(test_suite_metrics)
 
-    def _iter_test_samples_batch(self, batch_size: int = _BatchSize.LOAD_SAMPLES) -> Iterator[TestSampleDataFrame]:
+    def _iter_test_samples_batch(
+        self,
+        batch_size: int = BatchSize.LOAD_SAMPLES.value,
+    ) -> Iterator[TestSampleDataFrame]:
         if batch_size <= 0:
             raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
         init_request = API.LoadRemainingTestSamplesRequest(
@@ -384,7 +393,7 @@ def _iter_test_samples_batch(self, batch_size: int = _BatchSize.LOAD_SAMPLES) ->
         )
         yield from _BatchedLoader.iter_data(
             init_request=init_request,
-            endpoint_path=API.Path.LOAD_TEST_SAMPLES,
+            endpoint_path=API.Path.LOAD_TEST_SAMPLES.value,
             df_class=TestSampleDataFrame,
         )
 
@@ -410,7 +419,7 @@ def _upload_test_sample_metrics(
             configuration=_maybe_evaluator_configuration_to_api(configuration),
         )
         res = krequests.put(
-            endpoint_path=API.Path.UPLOAD_TEST_SAMPLE_METRICS,
+            endpoint_path=API.Path.UPLOAD_TEST_SAMPLE_METRICS.value,
             data=json.dumps(dataclasses.asdict(request)),
         )
         krequests.raise_for_status(res)
@@ -425,7 +434,7 @@ def _upload_test_case_metrics(
             for config, tc_metrics in tc_metrics_by_config.items()
         ]
         df = pd.DataFrame(records, columns=["test_case_id", "configuration_display_name", "metrics"])
-        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_CASE_METRICS, df)
+        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_CASE_METRICS.value, df)
 
     def _upload_test_case_plots(
         self,
@@ -438,7 +447,7 @@ def _upload_test_case_plots(
             for tc_plot in tc_plots or []
         ]
         df = pd.DataFrame(records, columns=["test_case_id", "configuration_display_name", "metrics"])
-        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_CASE_PLOTS, df)
+        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_CASE_PLOTS.value, df)
 
     def _upload_test_suite_metrics(
         self,
@@ -450,7 +459,7 @@ def _upload_test_suite_metrics(
             if ts_metrics is not None
         ]
         df = pd.DataFrame(records, columns=["configuration_display_name", "metrics"])
-        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_SUITE_METRICS, df)
+        return self._upload_aggregate_metrics(API.Path.UPLOAD_TEST_SUITE_METRICS.value, df)
 
     def _upload_aggregate_metrics(self, endpoint_path: str, df: pd.DataFrame) -> None:
         df_validated = MetricsDataFrame(validate_df_schema(df, MetricsDataFrameSchema, trusted=True))
@@ -469,7 +478,7 @@ def _upload_aggregate_metrics(self, endpoint_path: str, df: pd.DataFrame) -> Non
 
     def _start_server_side_evaluation(self) -> None:
         request = API.EvaluateRequest(test_run_id=self._id)
-        res = krequests.put(endpoint_path=API.Path.EVALUATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.EVALUATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
 
 
diff --git a/kolena/workflow/test_suite.py b/kolena/workflow/test_suite.py
index 55fb5100c..b8674fdb8 100644
--- a/kolena/workflow/test_suite.py
+++ b/kolena/workflow/test_suite.py
@@ -29,8 +29,8 @@
 from kolena._api.v1.generic import TestSuite as API
 from kolena._utils import krequests
 from kolena._utils import log
-from kolena._utils._consts import _BatchSize
 from kolena._utils.batched_load import _BatchedLoader
+from kolena._utils.consts import BatchSize
 from kolena._utils.frozen import Frozen
 from kolena._utils.instrumentation import telemetry
 from kolena._utils.instrumentation import WithTelemetry
@@ -154,7 +154,7 @@ def create(
         log.info(f"creating test suite '{name}'")
         cls._validate_test_cases(test_cases)
         request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=cls.workflow.name)
-        res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         obj = cls._create_from_data(data)
@@ -174,7 +174,7 @@ def load(cls, name: str, version: Optional[int] = None) -> "TestSuite":
         :return: the loaded test suite.
         """
         request = CoreAPI.LoadByNameRequest(name=name, version=version)
-        res = krequests.put(endpoint_path=API.Path.LOAD, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.put(endpoint_path=API.Path.LOAD.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         return cls._create_from_data(data)
@@ -253,7 +253,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
             description=editor._description,
             test_case_ids=[tc._id for tc in editor._test_cases],
         )
-        res = krequests.post(endpoint_path=API.Path.EDIT, data=json.dumps(dataclasses.asdict(request)))
+        res = krequests.post(endpoint_path=API.Path.EDIT.value, data=json.dumps(dataclasses.asdict(request)))
         krequests.raise_for_status(res)
         test_suite_data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
         self._populate_from_other(self._create_from_data(test_suite_data))
@@ -264,9 +264,9 @@ def load_test_samples(self) -> List[Tuple[TestCase, List[TestSample]]]:
         for df_batch in _BatchedLoader.iter_data(
             init_request=API.LoadTestSamplesRequest(
                 test_suite_id=self._id,
-                batch_size=_BatchSize.LOAD_SAMPLES,
+                batch_size=BatchSize.LOAD_SAMPLES.value,
             ),
-            endpoint_path=API.Path.INIT_LOAD_TEST_SAMPLES,
+            endpoint_path=API.Path.INIT_LOAD_TEST_SAMPLES.value,
             df_class=TestSuiteTestSamplesDataFrame,
         ):
             for record in df_batch.itertuples():
diff --git a/kolena/workflow/workflow.py b/kolena/workflow/workflow.py
index 831225d14..bcaa07f6c 100644
--- a/kolena/workflow/workflow.py
+++ b/kolena/workflow/workflow.py
@@ -202,7 +202,7 @@ def register_evaluator(
     """
 
     response = krequests.post(
-        API.Path.EVALUATOR,
+        API.Path.EVALUATOR.value,
         json=dict(workflow=workflow, image=image, name=evaluator_name, secret=secret, aws_assume_role=aws_assume_role),
     )
     krequests.raise_for_status(response)
@@ -222,7 +222,7 @@ def list_evaluators(workflow: str) -> List[RemoteEvaluator]:
     :return: list of registered evaluators
     """
 
-    response = krequests.get(f"{API.Path.EVALUATOR}/{quote(workflow)}")
+    response = krequests.get(f"{API.Path.EVALUATOR.value}/{quote(workflow)}")
     krequests.raise_for_status(response)
 
     return [
@@ -243,8 +243,8 @@ def get_evaluator(workflow: str, evaluator_name: str, include_secret: bool = Fal
     """
 
     response = krequests.get(
-        f"{API.Path.EVALUATOR}/{quote(workflow)}/{quote(evaluator_name)}",
-        params={"include_secret": include_secret},
+        endpoint_path=f"{API.Path.EVALUATOR.value}/{quote(workflow)}/{quote(evaluator_name)}",
+        params=dict(include_secret=include_secret),
     )
     krequests.raise_for_status(response)
 
diff --git a/pyproject.toml b/pyproject.toml
index 874ab1e0b..8fc9b5a26 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,9 +25,15 @@ packages = [
 ]
 
 [tool.poetry.dependencies]
-python = ">=3.7.1,<3.11"
-numpy = ">=1.19"
-pandas = ">=1.1,<1.6"
+python = ">=3.7.1,<3.12"
+numpy = [
+    { version = ">=1.19", python = ">=3.7,<3.11" },
+    { version = ">=1.23", python = ">=3.11" },
+]
+pandas = [
+    { version = ">=1.1,<1.6", python = ">=3.7,<3.11" },
+    { version = ">=1.5,<1.6", python = ">=3.11" },
+]
 pandera = ">=0.9.0"
 pydantic = ">=1.8"
 dacite = ">=1.6"
diff --git a/tests/integration/fr/conftest.py b/tests/integration/fr/conftest.py
index ba4d828b8..1b524b68a 100644
--- a/tests/integration/fr/conftest.py
+++ b/tests/integration/fr/conftest.py
@@ -191,13 +191,13 @@ def fr_test_suites(fr_test_cases: List[TestCase]) -> List[TestSuite]:
     test_suite_name_a = with_test_prefix("A")
     test_suite_a = TestSuite(
         test_suite_name_a,
-        description="filler",
+        description="this description will be overwritten",
         baseline_test_cases=[fr_test_cases[0]],
         non_baseline_test_cases=[fr_test_cases[2]],
     )
     test_suite_a_updated = TestSuite(
         test_suite_name_a,
-        description="description",
+        description="description overwrites",
         baseline_test_cases=[fr_test_cases[1]],
         non_baseline_test_cases=[fr_test_cases[2]],
         reset=True,
diff --git a/tests/integration/fr/test_model.py b/tests/integration/fr/test_model.py
index d0c9392ad..a5654ff60 100644
--- a/tests/integration/fr/test_model.py
+++ b/tests/integration/fr/test_model.py
@@ -33,7 +33,7 @@
 from tests.integration.helper import with_test_prefix
 
 
-def test_create() -> None:
+def test__create() -> None:
     model_name = with_test_prefix(f"{__file__} test_create")
     model_metadata = {
         "detector": "det",
@@ -47,21 +47,21 @@ def test_create() -> None:
     assert created_model.data.metadata == model_metadata
 
 
-def test_load_by_name() -> None:
+def test__load_by_name() -> None:
     model_name = with_test_prefix(f"{__file__} test_load_by_name")
     created_model = Model.create(name=model_name, metadata={})
     loaded_model = Model.load_by_name(model_name)
     assert created_model == loaded_model
 
 
-def test_load_by_name_nonexistent() -> None:
+def test__load_by_name__nonexistent() -> None:
     model_name = with_test_prefix(f"{__file__} test_load_by_name_nonexistent")
     # TODO: Should not be HTTPError
     with pytest.raises(HTTPError):
         Model.load_by_name(model_name)
 
 
-def test_create_bad_metadata() -> None:
+def test__create__bad_metadata() -> None:
     model_name = with_test_prefix(f"{__file__} test_create_bad_metadata")
     with pytest.raises(ValidationError):
         Model.create(name=model_name, metadata=cast(Dict[str, Any], None))
@@ -69,7 +69,7 @@ def test_create_bad_metadata() -> None:
         Model.create(name=model_name, metadata=cast(Dict[str, Any], "bogus"))
 
 
-def test_create_existing() -> None:
+def test__create__existing() -> None:
     model_name = with_test_prefix(f"{__file__} test_create_existing")
     Model.create(name=model_name, metadata={})
     # TODO: Should not be HTTPError
@@ -77,7 +77,7 @@ def test_create_existing() -> None:
         Model.create(name=model_name, metadata={})
 
 
-def test_load_by_name_seeded(fr_models: List[Model]) -> None:
+def test__load_by_name__seeded(fr_models: List[Model]) -> None:
     for i, model in enumerate(fr_models):
         db_model = Model.load_by_name(model.data.name)
         assert db_model.data == model.data
@@ -91,7 +91,7 @@ def test_load_by_name_seeded(fr_models: List[Model]) -> None:
         assert inference_model.compare(np.ones(1), np.ones(2)) == 3
 
 
-def test_load_pair_results_empty(fr_test_cases: List[TestCase]) -> None:
+def test__load_pair_results__empty(fr_test_cases: List[TestCase]) -> None:
     name = with_test_prefix(f"{__file__} test_load_pair_results_empty")
     model = Model.create(name, dict(test="metadata"))
 
@@ -107,7 +107,7 @@ def test_load_pair_results_empty(fr_test_cases: List[TestCase]) -> None:
     assert len(df) == 0
 
 
-def test_iter_pair_results_empty(fr_test_cases: List[TestCase]) -> None:
+def test__iter_pair_results__empty(fr_test_cases: List[TestCase]) -> None:
     name = with_test_prefix(f"{__file__} test_iter_pair_results_empty")
     model = Model.create(name, dict(test="metadata"))
 
@@ -149,7 +149,7 @@ def _seed_results(model_name: str, test_suite: TestSuite) -> Tuple[Model, pd.Dat
     return model, df_expected, fte_locators
 
 
-def test_load_pair_results(fr_test_suites: List[TestSuite]) -> None:
+def test__load_pair_results(fr_test_suites: List[TestSuite]) -> None:
     test_suite = fr_test_suites[0]
     model_name = with_test_prefix(f"{__file__} test_load_pair_results")
     model, df_expected, fte_locators = _seed_results(model_name, test_suite)
@@ -167,7 +167,7 @@ def test_load_pair_results(fr_test_suites: List[TestSuite]) -> None:
     )
 
 
-def test_iter_pair_results(fr_test_suites: List[TestSuite]) -> None:
+def test__iter_pair_results(fr_test_suites: List[TestSuite]) -> None:
     test_suite = fr_test_suites[0]
     model_name = with_test_prefix(f"{__file__} test_iter_pair_results")
     model, df_expected, _ = _seed_results(model_name, test_suite)
diff --git a/tests/integration/fr/test_test_run.py b/tests/integration/fr/test_test_run.py
index 13cb6ba7d..a8760d20d 100644
--- a/tests/integration/fr/test_test_run.py
+++ b/tests/integration/fr/test_test_run.py
@@ -378,7 +378,7 @@ def extract(locator: str) -> np.ndarray:
         with pytest.raises(RuntimeError):
             test(model, test_suites[0])
 
-    patched.assert_called_once_with(test_run._id, TestRunAPI.Path.MARK_CRASHED)
+    patched.assert_called_once_with(test_run._id, TestRunAPI.Path.MARK_CRASHED.value)
 
 
 def test__multi_face(fr_multi_face_test_run: Tuple[Model, TestSuite]) -> None:
diff --git a/tests/integration/fr/test_test_suite.py b/tests/integration/fr/test_test_suite.py
index 7e53c0fb8..5ae6f74c1 100644
--- a/tests/integration/fr/test_test_suite.py
+++ b/tests/integration/fr/test_test_suite.py
@@ -65,7 +65,7 @@ def test__init() -> None:
     assert test_suite == test_suite3
 
 
-def test__init__with_test_cases(single_test_case) -> None:
+def test__init__with_test_cases(single_test_case: TestCase) -> None:
     name = with_test_prefix(f"{__file__}::test__init__with_test_cases test suite")
     description = "A\n\tlong\ndescription including special characters! 🎉"
     test_suite = TestSuite(name, description=description, baseline_test_cases=[single_test_case])
@@ -79,7 +79,7 @@ def test__init__with_test_cases(single_test_case) -> None:
     assert test_suite.baseline_pair_count_imposter == 6
 
 
-def test__init__no_baseline_error(single_test_case) -> None:
+def test__init__no_baseline_error(single_test_case: TestCase) -> None:
     name = with_test_prefix(f"{__file__}::test__init__no_baseline_error test suite")
     description = "A\n\tlong\ndescription including special characters! 🎉"
     expected_error_msg = "to a state without any baseline test cases"
@@ -95,7 +95,7 @@ def test__init__no_baseline_error(single_test_case) -> None:
     assert expected_error_msg in exc_info_value
 
 
-def test__init__with_version(single_test_case) -> None:
+def test__init__with_version(single_test_case: TestCase) -> None:
     name = with_test_prefix(f"{__file__}::test__init__with_version test suite")
     description = "test suite description"
     test_suite = TestSuite(name, description=description, baseline_test_cases=[single_test_case])
@@ -103,23 +103,25 @@ def test__init__with_version(single_test_case) -> None:
     test_suite0 = TestSuite(name, version=test_suite.version)
     assert test_suite == test_suite0
 
-    with pytest.raises(NameConflictError):
+    with pytest.raises(NameConflictError):  # TODO: should raise NotFoundError when version is specified
         TestSuite(name, version=123)
 
-    with test_suite.edit() as editor:
-        new_description = "new description"
+    new_description = "new description"
+    with test_suite.edit() as editor:  # description-only edit does not bump version
         editor.description(new_description)
 
     assert test_suite.description == new_description
+    assert test_suite.version == test_suite0.version
     assert test_suite == TestSuite(name, version=test_suite.version)
     assert test_suite == TestSuite(name)
+
     test_suite0_reloaded = TestSuite(name, version=test_suite0.version)
     assert test_suite0.baseline_test_cases == test_suite0_reloaded.baseline_test_cases
     assert test_suite0.non_baseline_test_cases == test_suite0_reloaded.non_baseline_test_cases
     assert test_suite0_reloaded.description == new_description
 
 
-def test__init__reset(single_test_case, multi_version_test_case) -> None:
+def test__init__reset(single_test_case: TestCase, multi_version_test_case: List[TestCase]) -> None:
     name = with_test_prefix(f"{__file__}::test__init__reset test suite")
     description = f"{name} (description)"
     TestSuite(
@@ -144,21 +146,21 @@ def test__init__reset(single_test_case, multi_version_test_case) -> None:
     assert test_suite.non_baseline_test_cases == []
 
 
-def test_load(fr_test_suites: List[TestSuite], fr_test_cases: List[TestCase]) -> None:
+def test__load(fr_test_suites: List[TestSuite], fr_test_cases: List[TestCase]) -> None:
     test_suite = fr_test_suites[0]
     test_suite_updated = fr_test_suites[1]
 
     loaded_test_suite = TestSuite.load(test_suite.name)
-    assert loaded_test_suite.data.id == test_suite_updated._id
-    assert loaded_test_suite.data.name == test_suite.name
-    assert loaded_test_suite.data.description == test_suite_updated.description
-    assert loaded_test_suite.data.version == test_suite_updated.version
+    assert loaded_test_suite._id == test_suite_updated._id
+    assert loaded_test_suite.name == test_suite.name
+    assert loaded_test_suite.description == test_suite_updated.description
+    assert loaded_test_suite.version == test_suite_updated.version
 
     assert loaded_test_suite.data.baseline_test_cases == [fr_test_cases[1].data]
     assert loaded_test_suite.data.non_baseline_test_cases == [fr_test_cases[2].data]
 
 
-def test_load_with_version(fr_test_suites: List[TestSuite], fr_test_cases: List[TestCase]) -> None:
+def test__load__with_version(fr_test_suites: List[TestSuite], fr_test_cases: List[TestCase]) -> None:
     # the test suite is an older version
     test_suite_prev = fr_test_suites[0]
     test_suite_updated = fr_test_suites[1]
@@ -173,7 +175,7 @@ def test_load_with_version(fr_test_suites: List[TestSuite], fr_test_cases: List[
     assert loaded_test_suite.non_baseline_test_cases == [fr_test_cases[2]]
 
 
-def test_load_absent(fr_test_suites: List[TestSuite]) -> None:
+def test__load__absent(fr_test_suites: List[TestSuite]) -> None:
     with pytest.raises(NotFoundError):
         TestSuite.load("name of a test suite that does not exist")
 
@@ -182,20 +184,20 @@ def test_load_absent(fr_test_suites: List[TestSuite]) -> None:
         TestSuite.load(fr_test_suites[0].name.lower())
 
 
-def test_create() -> None:
-    name = with_test_prefix(f"{__file__}::test_create test suite")
+def test__create() -> None:
+    name = with_test_prefix(f"{__file__}::test__create test suite")
     description = "\tSome test suite description\nspanning\nmultiple lines."
     test_suite = TestSuite.create(name, description=description)
-    assert test_suite.data.name == name
-    assert test_suite.data.version == 0
-    assert test_suite.data.description == description
+    assert test_suite.name == name
+    assert test_suite.version == 0
+    assert test_suite.description == description
 
     test_suite = TestSuite.create(with_test_prefix(f"{__file__}::test_create test suite 2"))
-    assert test_suite.data.description == ""
+    assert test_suite.description == ""
 
 
-def test_edit(fr_test_cases: List[TestCase]) -> None:
-    name = with_test_prefix(f"{__file__}::test_edit test suite")
+def test__edit(fr_test_cases: List[TestCase]) -> None:
+    name = with_test_prefix(f"{__file__}::test__edit test suite")
     test_suite = TestSuite.create(name)
     test_cases = fr_test_cases
 
@@ -206,9 +208,9 @@ def test_edit(fr_test_cases: List[TestCase]) -> None:
         editor.add(TestCase.load(test_cases[2].name))
         editor.remove(TestCase.load(test_cases[2].name))
 
-    assert test_suite.data.version == 1
-    assert test_suite.data.description == new_description
-    all_test_cases = test_suite.data.baseline_test_cases + test_suite.data.non_baseline_test_cases
+    assert test_suite.version == 1
+    assert test_suite.description == new_description
+    all_test_cases = test_suite.baseline_test_cases + test_suite.non_baseline_test_cases
     actual_names = sorted(tc.name for tc in all_test_cases)
     expected_names = sorted([test_cases[0].name])
     assert actual_names == expected_names
@@ -222,8 +224,8 @@ def test_edit(fr_test_cases: List[TestCase]) -> None:
 
 
 # Note: editor.merge is deprecated
-def test_edit_merge(fr_test_cases: List[TestCase]) -> None:
-    name = with_test_prefix(f"{__file__}::test_edit_merge test suite")
+def test__edit__merge(fr_test_cases: List[TestCase]) -> None:
+    name = with_test_prefix(f"{__file__}::test__edit__merge test suite")
     test_suite = TestSuite.create(name)
     test_cases = fr_test_cases
 
@@ -235,27 +237,27 @@ def test_edit_merge(fr_test_cases: List[TestCase]) -> None:
         editor.merge(TestCase.load(test_cases[1].name))
         editor.add(TestCase.load(test_cases[2].name))
 
-    assert test_suite.data.version == 2
-    all_test_cases = test_suite.data.baseline_test_cases + test_suite.data.non_baseline_test_cases
+    assert test_suite.version == 2
+    all_test_cases = test_suite.baseline_test_cases + test_suite.non_baseline_test_cases
     actual_names = sorted(tc.name for tc in all_test_cases)
     expected_names = sorted([tc.name for tc in test_cases[1:3]])
     assert actual_names == expected_names
     # expect updated test case to still be considered the baseline
-    assert test_suite.data.baseline_test_cases[0].version == test_cases[1].version
+    assert test_suite.baseline_test_cases[0].version == test_cases[1].version
 
 
-def test_edit_no_op() -> None:
-    name = with_test_prefix(f"{__file__}::test_edit_no_op test suite")
+def test__edit__no_op() -> None:
+    name = with_test_prefix(f"{__file__}::test__edit__no_op test suite")
     test_suite = TestSuite.create(name)
-    version = test_suite.data.version
+    version = test_suite.version
     # no-op editor contexts do not bump version
     with test_suite.edit():
         ...
-    assert test_suite.data.version == version
+    assert test_suite.version == version
 
 
-def test_edit_same_name(fr_test_cases: List[TestCase]) -> None:
-    name = with_test_prefix(f"{__file__}::test_edit_same_name test suite")
+def test__edit__same_name(fr_test_cases: List[TestCase]) -> None:
+    name = with_test_prefix(f"{__file__}::test__edit__same_name test suite")
     test_suite = TestSuite.create(name)
     # test_case_1 is updated version of test_case_0
     test_case_0 = fr_test_cases[0]
@@ -276,8 +278,8 @@ def test_edit_same_name(fr_test_cases: List[TestCase]) -> None:
     assert test_suite.baseline_test_cases == [test_case_1]
 
 
-def test_edit_empty(fr_test_cases: List[TestCase]) -> None:
-    name = with_test_prefix(f"{__file__}::test_edit_empty test suite")
+def test__edit__empty(fr_test_cases: List[TestCase]) -> None:
+    name = with_test_prefix(f"{__file__}::test__edit__empty test suite")
     test_suite = TestSuite.create(name)
     test_case = fr_test_cases[0]
 
@@ -290,21 +292,21 @@ def test_edit_empty(fr_test_cases: List[TestCase]) -> None:
             editor.remove(test_case)
 
 
-def test_edit_baseline_counts(fr_test_cases: List[TestCase]) -> None:
-    name = with_test_prefix(f"{__file__}::test_edit_baseline_counts test suite")
+def test__edit__baseline_counts(fr_test_cases: List[TestCase]) -> None:
+    name = with_test_prefix(f"{__file__}::test__edit__baseline_counts test suite")
     test_suite = TestSuite.create(name)
-    assert test_suite.data.baseline_image_count == 0
-    assert test_suite.data.baseline_pair_count_genuine == 0
-    assert test_suite.data.baseline_pair_count_imposter == 0
+    assert test_suite.baseline_image_count == 0
+    assert test_suite.baseline_pair_count_genuine == 0
+    assert test_suite.baseline_pair_count_imposter == 0
 
     test_case_record = fr_test_cases[0]
     test_case = TestCase.load(test_case_record.name, version=test_case_record.version)
     with test_suite.edit() as editor:
         editor.add(test_case, is_baseline=True)
 
-    assert test_suite.data.baseline_image_count == 3
-    assert test_suite.data.baseline_pair_count_genuine == 3
-    assert test_suite.data.baseline_pair_count_imposter == 1
+    assert test_suite.baseline_image_count == 3
+    assert test_suite.baseline_pair_count_genuine == 3
+    assert test_suite.baseline_pair_count_imposter == 1
 
     # this test case has overlapping images and pairs with the previously added test case
     test_case_record = fr_test_cases[2]
@@ -313,12 +315,12 @@ def test_edit_baseline_counts(fr_test_cases: List[TestCase]) -> None:
         editor.add(test_case, is_baseline=True)
 
     # assert images and pairs are properly deduped
-    assert test_suite.data.baseline_image_count == 4
-    assert test_suite.data.baseline_pair_count_genuine == 3
-    assert test_suite.data.baseline_pair_count_imposter == 2
+    assert test_suite.baseline_image_count == 4
+    assert test_suite.baseline_pair_count_genuine == 3
+    assert test_suite.baseline_pair_count_imposter == 2
 
 
-def test__edit__no_baseline_error(single_test_case) -> None:
+def test__edit__no_baseline_error(single_test_case: TestCase) -> None:
     name = with_test_prefix(f"{__file__}::test__edit__no_baseline_error test suite")
     test_suite = TestSuite(name)
     expected_error_msg = "to a state without any baseline test cases"
@@ -360,26 +362,26 @@ def test__edit__no_baseline_error(single_test_case) -> None:
     assert expected_error_msg in exc_info_value
 
 
-def test__edit__reset(single_test_case, multi_version_test_case) -> None:
+def test__edit__reset(single_test_case: TestCase, multi_version_test_case: List[TestCase]) -> None:
     name = with_test_prefix(f"{__file__}::test__edit__reset test suite")
     test_suite = TestSuite(
         name,
         baseline_test_cases=[single_test_case],
         non_baseline_test_cases=[multi_version_test_case[2]],
     )
-    new_description = "new description"
 
     with test_suite.edit(reset=True) as editor:
-        editor.description(new_description)
         editor.add(multi_version_test_case[1], True)
     assert test_suite.version == 2
-    assert test_suite.description == new_description
+    assert test_suite.description == ""
     assert test_suite.baseline_test_cases == [multi_version_test_case[1]]
 
-    with test_suite.edit(reset=True) as editor:
+    new_description = "new description"
+    with test_suite.edit(reset=True) as editor:  # no change to test suite contents does not bump version
+        editor.description(new_description)
         editor.add(multi_version_test_case[1], True)
     assert test_suite.version == 2
-    assert test_suite.description == new_description  # not updated or cleared
+    assert test_suite.description == new_description  # updated without version bump
     assert test_suite.baseline_test_cases == [multi_version_test_case[1]]
 
     with test_suite.edit(reset=True) as editor:
diff --git a/tests/integration/generic/test_test_run.py b/tests/integration/generic/test_test_run.py
index ca3b2b7ae..8363d11c8 100644
--- a/tests/integration/generic/test_test_run.py
+++ b/tests/integration/generic/test_test_run.py
@@ -207,7 +207,7 @@ class MarkCrashedDummyEvaluator(DummyEvaluator):
         with pytest.raises(RuntimeError):
             test_run.run()
 
-    patched.assert_called_once_with(test_run._id, TestRunAPI.Path.MARK_CRASHED)
+    patched.assert_called_once_with(test_run._id, TestRunAPI.Path.MARK_CRASHED.value)
 
 
 def test__evaluator__unconfigured(
diff --git a/tests/integration/generic/test_test_suite.py b/tests/integration/generic/test_test_suite.py
index 32c795ddf..188392a33 100644
--- a/tests/integration/generic/test_test_suite.py
+++ b/tests/integration/generic/test_test_suite.py
@@ -86,13 +86,12 @@ def test__init__reset(test_case: TestCase, test_case_versions: List[TestCase]) -
 
 def test__init__with_version(test_case_versions: List[TestCase]) -> None:
     name = with_test_prefix(f"{__file__}::test__init__with_version test suite")
-    description = "test suite description"
-    test_suite = TestSuite(name, description=description)
+    test_suite = TestSuite(name, description="initial description")
 
     test_suite0 = TestSuite(name, version=test_suite.version)
     assert test_suite == test_suite0
 
-    with pytest.raises(NameConflictError):
+    with pytest.raises(NameConflictError):  # TODO: this should be a NotFoundError when version is specified
         TestSuite(name, version=123)
 
     with test_suite.edit() as editor:
diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py
index b6c9f6a35..8e9f6889f 100644
--- a/tests/unit/utils/test_endpoints.py
+++ b/tests/unit/utils/test_endpoints.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Union
 from urllib.parse import urlparse
 
 import pytest
@@ -20,7 +19,6 @@
 from kolena._utils.endpoints import _get_platform_url
 from kolena._utils.endpoints import _get_results_url
 from kolena._utils.state import _ClientState
-from kolena.workflow import Workflow
 
 
 def assert_url_equals(a: str, b: str) -> None:
@@ -91,5 +89,5 @@ def test__get_platform_url(client_state: _ClientState, expected: str) -> None:
         ),
     ],
 )
-def test__get_results_url(client_state: _ClientState, workflow: Union[Workflow, WorkflowType], expected: str) -> None:
+def test__get_results_url(client_state: _ClientState, workflow: str, expected: str) -> None:
     assert_url_equals(_get_results_url(client_state, workflow, 1, 2), expected)