Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Python 3.11 #7

Merged
merged 23 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c69e9b5
Pin transient urllib3 dependency to unbreak requests==2.30
gordonhart May 9, 2023
01d67c6
Pin to requests-toolbelt>=1 instead of pinning transient urllib3
gordonhart May 9, 2023
0094a1f
Add back importlib-metadata dependency
gordonhart May 9, 2023
900513d
Allow Python 3.11 and add version to CI matrix
gordonhart May 9, 2023
cac7341
Pin urllib3 per requests issue recommendation, leave TODO
gordonhart May 9, 2023
9e7aced
Pin transient urllib3 dependency to unbreak requests==2.30
gordonhart May 9, 2023
5c94727
Pin to requests-toolbelt>=1 instead of pinning transient urllib3
gordonhart May 9, 2023
920ee06
Add back importlib-metadata dependency
gordonhart May 9, 2023
4b821d7
Merge branch 'trunk' into gh/pin-urllib3
gordonhart May 9, 2023
6093dce
Merge branch 'gh/pin-urllib3' into gh/3.11
gordonhart May 9, 2023
3e9baba
Tediously update all enum usages to use Enum.KEY.value instead of Enu…
gordonhart May 10, 2023
3605ad4
Move FR-specific upload_image_chips to kolena.fr._utils; remove kolen…
gordonhart May 10, 2023
2555b09
Revert {_=>}BatchedLoader diff, this PR is not the place for that
gordonhart May 10, 2023
ddda9f8
Merge branch 'trunk' into gh/3.11
gordonhart May 10, 2023
9aef8e8
Start updating integration tests with new description edit semantics
gordonhart May 10, 2023
80faca6
Add missing pytest-depends dev dependency
gordonhart May 10, 2023
53c6a20
Fix FR test__edit__reset
gordonhart May 10, 2023
d682515
Assert description update without version update
gordonhart May 10, 2023
643dc5a
Merge branch 'trunk' into gh/3.11
gordonhart May 11, 2023
868464c
Unbreak upload_image_chips after move
gordonhart May 11, 2023
d00ef3b
Unbreak FR test suite test__load by comparing data to data
gordonhart May 11, 2023
23ffad5
Remove bad comparison
gordonhart May 11, 2023
c04c4ed
Merge branch 'trunk' into gh/3.11
gordonhart May 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,44 @@ jobs:
POETRY_CACHE_DIR: /home/circleci/project/.poetry
steps:
- checkout

- restore_cache:
key: &kolena-ci-cache kolena-ci-cache-<< parameters.python-version >>-{{ checksum "pyproject.toml" }}

- run: |
poetry config experimental.new-installer false
poetry config installer.max-workers 10
poetry install --no-ansi

- save_cache:
key: *kolena-ci-cache
paths:
- /home/circleci/project/.poetry/virtualenvs
- poetry.lock

- run: poetry run python3 -c 'import kolena'

# TODO: fix underlying mypy issues with Python>3.9 rather than skipping
- when:
condition:
not:
# TODO: upgrade mypy version to address # https://github.com/python/mypy/issues/13627
equal: [ "3.10", << parameters.python-version >> ]
or:
- equal: [ "3.10", << parameters.python-version >> ]
- equal: [ "3.11", << parameters.python-version >> ]
steps:
- run: poetry run pre-commit run -a

- run:
name: Run unit tests
command: |
poetry run pytest -vv --cov=kolena --cov-branch tests/unit

- run:
name: Run integration tests
command: |
export KOLENA_TOKEN=${KOLENA_TOKEN_PROD}
poetry run pytest -vv --cov-append --durations=0 --cov=kolena --cov-branch tests/integration

- when:
# Generate coverage only from one python version
condition:
Expand All @@ -67,4 +77,4 @@ workflows:
- kolena-ci:
matrix:
parameters:
python-version: [ "3.7", "3.8", "3.9", "3.10" ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
4 changes: 2 additions & 2 deletions kolena/_api/v1/batched_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ class Path(str, Enum):

@classmethod
def upload_signed_url(cls, load_uuid: str) -> str:
return f"{cls.UPLOAD_SIGNED_URL_STUB}/{load_uuid}"
return f"{cls.UPLOAD_SIGNED_URL_STUB.value}/{load_uuid}"

@classmethod
def download_by_path(cls, path: str) -> str:
return f"{cls.DOWNLOAD_BY_PATH_STUB}/{path}"
return f"{cls.DOWNLOAD_BY_PATH_STUB.value}/{path}"

@dataclass(frozen=True)
class WithLoadUUID:
Expand Down
19 changes: 0 additions & 19 deletions kolena/_api/v1/samples.py

This file was deleted.

3 changes: 2 additions & 1 deletion kolena/_utils/asset_path_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def absolute_locator(self, test_run_id: int, load_uuid: str, image_id: int, key:
def relative_locator(self, path_stub: str) -> str:
return f"{self.prefix}/{path_stub}"

def path_stub(self, test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
@staticmethod
def path_stub(test_run_id: int, load_uuid: str, image_id: int, key: str) -> str:
return f"{test_run_id}/{image_id}/{key}-{load_uuid}.png"

def _absolute_locator(self, relative_locator: str) -> str:
Expand Down
56 changes: 5 additions & 51 deletions kolena/_utils/batched_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,20 @@
import numpy as np
import pandas as pd
import requests
from PIL import Image
from requests_toolbelt import MultipartEncoder
from retrying import retry

from kolena._api.v1.batched_load import BatchedLoad as API
from kolena._api.v1.fr import Asset as AssetAPI
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils.asset_path_mapper import AssetPathMapper
from kolena._utils.datatypes import LoadableDataFrame
from kolena._utils.serde import from_dict
from kolena.fr._consts import _BatchSize
from kolena.fr.datatypes import _ImageChipsDataFrame

VALIDATION_COUNT_LIMIT = 100
STAGE_STATUS__LOADED = "LOADED"


def init_upload() -> API.InitiateUploadResponse:
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD)
init_res = krequests.put(endpoint_path=API.Path.INIT_UPLOAD.value)
krequests.raise_for_status(init_res)
init_response = from_dict(data_class=API.InitiateUploadResponse, data=init_res.json())
return init_response
Expand Down Expand Up @@ -78,50 +72,10 @@ def upload_data_frame_chunk(df_chunk: pd.DataFrame, load_uuid: str) -> None:
krequests.raise_for_status(upload_response)


def upload_image_chips(
df: _ImageChipsDataFrame,
path_mapper: AssetPathMapper,
batch_size: int = _BatchSize.UPLOAD_CHIPS,
) -> None:
def upload_batch(df_batch: _ImageChipsDataFrame) -> None:
df_batch = df_batch.reset_index(drop=True) # reset indices so we match the signed_url indices

def as_buffer(image_raw: np.ndarray) -> io.BytesIO:
pil_image = Image.fromarray(image_raw).convert("RGB")
buf = io.BytesIO()
pil_image.save(buf, "png")
buf.seek(0)
return buf

data = MultipartEncoder(
fields=[
(
"files",
(
path_mapper.path_stub(row["test_run_id"], row["uuid"], row["image_id"], row["key"]),
as_buffer(row["image"]),
),
)
for _, row in df_batch.iterrows()
],
)
upload_response = krequests.put(
endpoint_path=AssetAPI.Path.BULK_UPLOAD,
data=data,
headers={"Content-Type": data.content_type},
)
krequests.raise_for_status(upload_response)

num_chunks = math.ceil(len(df) / batch_size)
chunk_iter = np.array_split(df, num_chunks) if len(df) > 0 else []
for df_chunk in chunk_iter:
upload_batch(df_chunk)


DFType = TypeVar("DFType", bound=LoadableDataFrame)


class _BatchedLoader(Generic[DFType]):
class BatchedLoader(Generic[DFType]):
@staticmethod
def load_path(path: str, df_class: Type[DFType]) -> DFType:
with tempfile.TemporaryFile() as tmp:
Expand Down Expand Up @@ -157,7 +111,7 @@ def complete_load(uuid: Optional[str]) -> None:
return
complete_request = API.CompleteDownloadRequest(uuid=uuid)
complete_res = krequests.put(
endpoint_path=API.Path.COMPLETE_DOWNLOAD,
endpoint_path=API.Path.COMPLETE_DOWNLOAD.value,
data=json.dumps(dataclasses.asdict(complete_request)),
)
krequests.raise_for_status(complete_res)
Expand All @@ -182,6 +136,6 @@ def iter_data(
data=json.loads(line),
)
load_uuid = partial_response.uuid
yield _BatchedLoader.load_path(partial_response.path, df_class)
yield BatchedLoader.load_path(partial_response.path, df_class)
finally:
_BatchedLoader.complete_load(load_uuid)
BatchedLoader.complete_load(load_uuid)
4 changes: 2 additions & 2 deletions kolena/_utils/_consts.py → kolena/_utils/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from enum import Enum


class _BatchSize(int, Enum):
UPLOAD_CHIPS = 5_000
class BatchSize(int, Enum):
UPLOAD_CHIPS = 1_000
UPLOAD_RECORDS = 10_000_000
UPLOAD_RESULTS = 1_000_000

Expand Down
4 changes: 2 additions & 2 deletions kolena/_utils/instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def upload_log(message: str, status: str) -> None:
message=message,
status=status,
)
krequests.post(endpoint_path=API.Path.UPLOAD, json=dataclasses.asdict(request))
krequests.post(endpoint_path=API.Path.UPLOAD.value, json=dataclasses.asdict(request))


def log_telemetry(e: BaseException) -> None:
try:
stack = tb.format_stack()
exc_format = tb.format_exception(None, e, e.__traceback__)
combined = stack + exc_format
upload_log("".join(combined), DatadogLogLevels.ERROR)
upload_log("".join(combined), DatadogLogLevels.ERROR.value)
except BaseException:
"""
Attempting to upload the telemetry is best-effort. We don't want to have exceptions in that
Expand Down
2 changes: 1 addition & 1 deletion kolena/_utils/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

def create(repository: str) -> None:
response = krequests.post(
endpoint_path=Path.CREATE,
endpoint_path=Path.CREATE.value,
data=json.dumps(dataclasses.asdict(CreateRepositoryRequest(repository=repository))),
)
krequests.raise_for_status(response)
20 changes: 10 additions & 10 deletions kolena/detection/_internal/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
from kolena._api.v1.workflow import WorkflowType
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils._consts import _BatchSize
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import BatchedLoader
from kolena._utils.batched_load import DFType
from kolena._utils.consts import BatchSize
from kolena._utils.frozen import Frozen
from kolena._utils.instrumentation import WithTelemetry
from kolena._utils.serde import from_dict
Expand Down Expand Up @@ -93,7 +93,7 @@ def __init__(self, name: str, workflow: WorkflowType, metadata: Optional[Dict[st
def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) -> CoreAPI.EntityData:
log.info(f"creating new model '{name}'")
request = CoreAPI.CreateRequest(name=name, metadata=metadata, workflow=workflow.value)
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
log.success(f"created new model '{name}'")
return from_dict(data_class=CoreAPI.EntityData, data=res.json())
Expand All @@ -102,7 +102,7 @@ def _create(cls, workflow: WorkflowType, name: str, metadata: Dict[str, Any]) ->
@validate_arguments(config=ValidatorConfig)
def _load_by_name(cls, name: str) -> CoreAPI.EntityData:
request = CoreAPI.LoadByNameRequest(name=name)
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
return from_dict(data_class=CoreAPI.EntityData, data=res.json())

Expand Down Expand Up @@ -131,7 +131,7 @@ def iter_inferences(
def _iter_inference_batch_for_reference(
self,
test_object: Union[_TestCaseClass, _TestSuiteClass],
batch_size: int = _BatchSize.LOAD_SAMPLES,
batch_size: int = BatchSize.LOAD_SAMPLES.value,
) -> Iterator[_LoadInferencesDataFrameClass]:
if batch_size <= 0:
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
Expand All @@ -141,9 +141,9 @@ def _iter_inference_batch_for_reference(
test_id_key = "test_case_id" if isinstance(test_object, self._TestCaseClass) else "test_suite_id"
params = dict(model_id=self._id, batch_size=batch_size, **{test_id_key: test_object._id})
init_request = API.InitLoadInferencesRequest(**params)
yield from _BatchedLoader.iter_data(
yield from BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_INFERENCES,
endpoint_path=API.Path.INIT_LOAD_INFERENCES.value,
df_class=self._LoadInferencesDataFrameClass,
)
log.success(f"loaded inferences from model '{self.name}' on {test_object_display_name}")
Expand All @@ -166,16 +166,16 @@ def load_inferences_by_test_case(
def _iter_inference_batch_for_test_suite(
self,
test_suite: _TestSuiteClass,
batch_size: int = _BatchSize.LOAD_SAMPLES,
batch_size: int = BatchSize.LOAD_SAMPLES.value,
) -> Iterator[_LoadInferencesDataFrameClass]:
if batch_size <= 0:
raise InputValidationError(f"invalid batch_size '{batch_size}': expected positive integer")
log.info(f"loading inferences from model '{self.name}' on test suite '{test_suite.name}'")
params = dict(model_id=self._id, batch_size=batch_size, test_suite_id=test_suite._id)
init_request = API.InitLoadInferencesByTestCaseRequest(**params)
yield from _BatchedLoader.iter_data(
yield from BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE,
endpoint_path=API.Path.INIT_LOAD_INFERENCES_BY_TEST_CASE.value,
df_class=self._LoadInferencesDataFrameClass,
)
log.success(f"loaded inferences from model '{self.name}' on test suite '{test_suite.name}'")
Expand Down
18 changes: 9 additions & 9 deletions kolena/detection/_internal/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from kolena._api.v1.workflow import WorkflowType
from kolena._utils import krequests
from kolena._utils import log
from kolena._utils._consts import _BatchSize
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import BatchedLoader
from kolena._utils.batched_load import DFType
from kolena._utils.batched_load import init_upload
from kolena._utils.batched_load import upload_data_frame
from kolena._utils.consts import BatchSize
from kolena._utils.dataframes.validators import validate_df_schema
from kolena._utils.frozen import Frozen
from kolena._utils.instrumentation import WithTelemetry
Expand Down Expand Up @@ -128,7 +128,7 @@ def _create(
"""Create a new test case with the provided name."""
log.info(f"creating new test case '{name}'")
request = CoreAPI.CreateRequest(name=name, description=description or "", workflow=workflow.value)
res = krequests.post(endpoint_path=API.Path.CREATE, data=json.dumps(dataclasses.asdict(request)))
res = krequests.post(endpoint_path=API.Path.CREATE.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
data = from_dict(data_class=CoreAPI.EntityData, data=res.json())
obj = cls._create_from_data(data)
Expand All @@ -142,7 +142,7 @@ def _create(
def _load_by_name(cls, name: str, version: Optional[int] = None) -> CoreAPI.EntityData:
"""Load an existing test case with the provided name."""
request = CoreAPI.LoadByNameRequest(name=name, version=version)
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME, data=json.dumps(dataclasses.asdict(request)))
res = krequests.put(endpoint_path=API.Path.LOAD_BY_NAME.value, data=json.dumps(dataclasses.asdict(request)))
krequests.raise_for_status(res)
return from_dict(data_class=CoreAPI.EntityData, data=res.json())

Expand Down Expand Up @@ -173,10 +173,10 @@ def load_images(self) -> List[_TestImageClass]:
def iter_images(self) -> Iterator[_TestImageClass]:
"""Iterate through all images with their associated ground truths in this test case."""
log.info(f"loading test images for test case '{self.name}'")
init_request = CoreAPI.InitLoadContentsRequest(batch_size=_BatchSize.LOAD_SAMPLES, test_case_id=self._id)
for df in _BatchedLoader.iter_data(
init_request = CoreAPI.InitLoadContentsRequest(batch_size=BatchSize.LOAD_SAMPLES.value, test_case_id=self._id)
for df in BatchedLoader.iter_data(
init_request=init_request,
endpoint_path=API.Path.INIT_LOAD_IMAGES,
endpoint_path=API.Path.INIT_LOAD_IMAGES.value,
df_class=self._TestImageDataFrameClass,
):
for record in df.itertuples():
Expand Down Expand Up @@ -312,7 +312,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
init_response = init_upload()
df = self._to_data_frame(list(editor._images.values()))
df_serialized = df.as_serializable()
upload_data_frame(df=df_serialized, batch_size=_BatchSize.UPLOAD_RECORDS, load_uuid=init_response.uuid)
upload_data_frame(df=df_serialized, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=init_response.uuid)

request = CoreAPI.CompleteEditRequest(
test_case_id=self._id,
Expand All @@ -322,7 +322,7 @@ def edit(self, reset: bool = False) -> Iterator[Editor]:
uuid=init_response.uuid,
)
complete_res = krequests.put(
endpoint_path=API.Path.COMPLETE_EDIT,
endpoint_path=API.Path.COMPLETE_EDIT.value,
data=json.dumps(dataclasses.asdict(request)),
)
krequests.raise_for_status(complete_res)
Expand Down
Loading