Skip to content

Commit

Permalink
Support Python 3.11 (#7)
Browse files Browse the repository at this point in the history
* Pin transient urllib3 dependency to unbreak requests==2.30

* Pin to requests-toolbelt>=1 instead of pinning transient urllib3

* Add back importlib-metadata dependency

* Allow Python 3.11 and add version to CI matrix

* Pin urllib3 per requests issue recommendation, leave TODO

* Pin transient urllib3 dependency to unbreak requests==2.30

* Pin to requests-toolbelt>=1 instead of pinning transient urllib3

* Add back importlib-metadata dependency

* Tediously update all enum usages to use Enum.KEY.value instead of Enum.KEY, which works on Python<3.11 but broke in 3.11 (see: python/cpython#100458)

* Move FR-specific upload_image_chips to kolena.fr._utils; remove kolena.fr._consts; rename some already-hidden classes to remove leading underscore (already in underscored files)

* Revert {_=>}BatchedLoader diff, this PR is not the place for that

* Start updating integration tests with new description edit semantics

* Add missing pytest-depends dev dependency

* Fix FR test__edit__reset

* Assert description update without version update

* Unbreak upload_image_chips after move

* Unbreak FR test suite test__load by comparing data to data

* Remove bad comparison
  • Loading branch information
gordonhart authored May 11, 2023
1 parent 295f79f commit a3ce16a
Show file tree
Hide file tree
Showing 34 changed files with 268 additions and 271 deletions.
8 changes: 5 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ jobs:
- /home/circleci/project/.poetry/virtualenvs
- poetry.lock
- run: poetry run python3 -c 'import kolena'
# TODO: fix underlying mypy issues with Python>3.9 rather than skipping
- when:
condition:
not:
# TODO: upgrade mypy version to address # https://github.com/python/mypy/issues/13627
equal: [ "3.10", << parameters.python-version >> ]
or:
- equal: [ "3.10", << parameters.python-version >> ]
- equal: [ "3.11", << parameters.python-version >> ]
steps:
- run: poetry run pre-commit run -a
- run:
Expand Down Expand Up @@ -108,7 +110,7 @@ workflows:
name: unit-test-<< matrix.python-version >>
matrix:
parameters:
python-version: [ "3.7", "3.8", "3.9", "3.10" ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
- integration-test:
matrix:
parameters:
Expand Down
4 changes: 2 additions & 2 deletions kolena/_api/v1/batched_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ class Path(str, Enum):

@classmethod
def upload_signed_url(cls, load_uuid: str) -> str:
return f"{cls.UPLOAD_SIGNED_URL_STUB}/{load_uuid}"
return f"{cls.UPLOAD_SIGNED_URL_STUB.value}/{load_uuid}"

@classmethod
def download_by_path(cls, path: str) -> str:
return f"{cls.DOWNLOAD_BY_PATH_STUB}/{path}"
return f"{cls.DOWNLOAD_BY_PATH_STUB.value}/{path}"

@dataclass(frozen=True)
class WithLoadUUID:
Expand Down
19 changes: 0 additions & 19 deletions kolena/_api/v1/samples.py

This file was deleted.

3 changes: 2 additions & 1 deletion kolena/_utils/asset_path_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def absolute_locator(self, test_run_id: int, load_uuid: str, image_id: int, key:
def relative_locator(self, path_stub: str) -> str:
return f"{self.prefix}/{path_stub}"

def path_stub(self, test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
@staticmethod
def path_stub(test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
return f"{test_run_id}/{image_id}/{key}-{load_uuid}.png"

def _absolute_locator(self, relative_locator: str) -> str:
Expand Down
50 changes: 2 additions & 48 deletions kolena/_utils/batched_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,20 @@
import numpy as np
import pandas as pd
import requests
from PIL import Image
from requests_toolbelt import MultipartEncoder
from retrying import retry

from kolena._api.v1.batched_load import BatchedLoad as API
from kolena._api.v1.fr import Asset as AssetAPI
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils.asset_path_mapper import AssetPathMapper
from kolena._utils.datatypes import LoadableDataFrame
from kolena._utils.serde import from_dict
from kolena.fr._consts import _BatchSize
from kolena.fr.datatypes import _ImageChipsDataFrame

VALIDATION_COUNT_LIMIT = 100
STAGE_STATUS__LOADED = "LOADED"


def init_upload() -> API.InitiateUploadResponse:
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD)
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD.value)
krequests.raise_for_status(init_res)
init_response = from_dict(data_class=API.InitiateUploadResponse, data=init_res.json())
return init_response
Expand Down Expand Up @@ -78,46 +72,6 @@ def upload_data_frame_chunk(df_chunk: pd.DataFrame, load_uuid: str) -> None:
krequests.raise_for_status(upload_response)


def upload_image_chips(
df: _ImageChipsDataFrame,
path_mapper: AssetPathMapper,
batch_size: int = _BatchSize.UPLOAD_CHIPS,
) -> None:
def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
df_batch = df_batch.reset_index(drop=True) # reset indices so we match the signed_url indices

def as_buffer(image_raw: np.ndarray) -> io.BytesIO:
pil_image = Image.fromarray(image_raw).convert("RGB")
buf = io.BytesIO()
pil_image.save(buf, "png")
buf.seek(0)
return buf

data = MultipartEncoder(
fields=[
(
"files",
(
path_mapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]),
as_buffer(row["image"]),
),
)
for _, row in df_batch.iterrows()
],
)
upload_response = krequests.put(
endpoint_path=AssetAPI.Path.BULK_UPLOAD,
data=data,
headers={"Content-Type": data.content_type},
)
krequests.raise_for_status(upload_response)

num_chunks = math.ceil(len(df) / batch_size)
chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
for df_chunk in chunk_iter:
upload_batch(df_chunk)


DFType = TypeVar("DFType", bound=LoadableDataFrame)


Expand Down Expand Up @@ -157,7 +111,7 @@ def complete_load(uuid: Optional[str]) -> None:
return
complete_request = API.CompleteDownloadRequest(uuid=uuid)
complete_res = krequests.put(
endpoint_path=API.Path.COMPLETE_DOWNLOAD,
endpoint_path=API.Path.COMPLETE_DOWNLOAD.value,
data=json.dumps(dataclasses.asdict(complete_request)),
)
krequests.raise_for_status(complete_res)
Expand Down
4 changes: 2 additions & 2 deletions kolena/_utils/_consts.py → kolena/_utils/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from enum import Enum


class _BatchSize(int, Enum):
UPLOAD_CHIPS = 5_000
class BatchSize(int, Enum):
UPLOAD_CHIPS = 1_000
UPLOAD_RECORDS = 10_000_000
UPLOAD_RESULTS = 1_000_000

Expand Down
4 changes: 2 additions & 2 deletions kolena/_utils/instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def upload_log(message: str, status: str) -> None:
message=message,
status=status,
)
krequests.post(endpoint_path=API.Path.UPLOAD, json=dataclasses.asdict(request))
krequests.post(endpoint_path=API.Path.UPLOAD.value, json=dataclasses.asdict(request))


def log_telemetry(e: BaseException) -> None:
try:
stack = tb.format_stack()
exc_format = tb.format_exception(None, e, e.__traceback__)
combined = stack + exc_format
upload_log("".join(combined), DatadogLogLevels.ERROR)
upload_log("".join(combined), DatadogLogLevels.ERROR.value)
except BaseException:
"""
Attempting to upload the telemetry is best-effort. We don't want to have exceptions in that
Expand Down
2 changes: 1 addition & 1 deletion kolena/_utils/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

def create(repository: str) -> None:
response = krequests.post(
endpoint_path=Path.CREATE,
endpoint_path=Path.CREATE.value,
data=json.dumps(dataclasses.asdict(CreateRepositoryRequest(repository=repository))),
)
krequests.raise_for_status(response)
14 changes: 7 additions & 7 deletions kolena/detection/_internal/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
from kolena._api.v1.workflow import WorkflowType
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils._consts import _BatchSize
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import DFType
from kolena._utils.consts import BatchSize
from kolena._utils.frozen import Frozen
from kolena._utils.instrumentation import WithTelemetry
from kolena._utils.serde import from_dict
Expand Down Expand Up @@ -93,7 +93,7 @@ def __init__(self, name: str, workflow: WorkflowType, metadata: Optional[Dict[st
def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) -> CoreAPI.EntityData:
log.info(f"creating new model '{name}'")
request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=workflow.value)
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
log.success(f"created new model '{name}'")
return from_dict(data_class=CoreAPI.EntityData, data=res.json())
Expand All @@ -102,7 +102,7 @@ def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) ->
@validate_arguments(config=ValidatorConfig)
def _load_by_name(cls, name: str) -> CoreAPI.EntityData:
request = CoreAPI.LoadByNameRequest(name=name)
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
return from_dict(data_class=CoreAPI.EntityData, data=res.json())

Expand Down Expand Up @@ -131,7 +131,7 @@ def iter_inferences(
def _iter_inference_batch_for_reference(
self,
test_object: Union[_TestCaseClass, _TestSuiteClass],
batch_size: int = _BatchSize.LOAD_SAMPLES,
batch_size: int = BatchSize.LOAD_SAMPLES.value,
) -> Iterator[_LoadInferencesDataFrameClass]:
if batch_size <= 0:
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
Expand All @@ -143,7 +143,7 @@ def _iter_inference_batch_for_reference(
init_request = API.InitLoadInferencesRequest(**params)
yield from _BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_INFERENCES,
endpoint_path=API.Path.INIT_LOAD_INFERENCES.value,
df_class=self._LoadInferencesDataFrameClass,
)
log.success(f"loaded inferences from model '{self.name}' on {test_object_display_name}")
Expand All @@ -166,7 +166,7 @@ def load_inferences_by_test_case(
def _iter_inference_batch_for_test_suite(
self,
test_suite: _TestSuiteClass,
batch_size: int = _BatchSize.LOAD_SAMPLES,
batch_size: int = BatchSize.LOAD_SAMPLES.value,
) -> Iterator[_LoadInferencesDataFrameClass]:
if batch_size <= 0:
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
Expand All @@ -175,7 +175,7 @@ def _iter_inference_batch_for_test_suite(
init_request = API.InitLoadInferencesByTestCaseRequest(**params)
yield from _BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE,
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE.value,
df_class=self._LoadInferencesDataFrameClass,
)
log.success(f"loaded inferences from model '{self.name}' on test suite '{test_suite.name}'")
Expand Down
14 changes: 7 additions & 7 deletions kolena/detection/_internal/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from kolena._api.v1.workflow import WorkflowType
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils._consts import _BatchSize
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import DFType
from kolena._utils.batched_load import init_upload
from kolena._utils.batched_load import upload_data_frame
from kolena._utils.consts import BatchSize
from kolena._utils.dataframes.validators import validate_df_schema
from kolena._utils.frozen import Frozen
from kolena._utils.instrumentation import WithTelemetry
Expand Down Expand Up @@ -128,7 +128,7 @@ def _create(
"""Create a new test case with the provided name."""
log.info(f"creating new test case '{name}'")
request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=workflow.value)
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
obj = cls._create_from_data(data)
Expand All @@ -142,7 +142,7 @@ def _create(
def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.EntityData:
"""Load an existing test case with the provided name."""
request = CoreAPI.LoadByNameRequest(name=name, version=version)
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
return from_dict(data_class=CoreAPI.EntityData, data=res.json())

Expand Down Expand Up @@ -173,10 +173,10 @@ def load_images(self) -> List[_TestImageClass]:
def iter_images(self) -> Iterator[_TestImageClass]:
"""Iterate through all images with their associated ground truths in this test case."""
log.info(f"loading test images for test case '{self.name}'")
init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
for df in _BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_IMAGES,
endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
df_class=self._TestImageDataFrameClass,
):
for record in df.itertuples():
Expand Down Expand Up @@ -312,7 +312,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
init_response = init_upload()
df = self._to_data_frame(list(editor._images.values()))
df_serialized = df.as_serializable()
upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)

request = CoreAPI.CompleteEditRequest(
test_case_id=self._id,
Expand All @@ -322,7 +322,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
uuid=init_response.uuid,
)
complete_res = krequests.put(
endpoint_path=API.Path.COMPLETE_EDIT,
endpoint_path=API.Path.COMPLETE_EDIT.value,
data=json.dumps(dataclasses.asdict(request)),
)
krequests.raise_for_status(complete_res)
Expand Down
Loading

0 comments on commit a3ce16a

Please sign in to comment.