From 75453044afdf1292369264fc48bbc1068e1af3df Mon Sep 17 00:00:00 2001 From: Nan Wei Date: Tue, 17 Dec 2024 14:36:59 -0500 Subject: [PATCH] Revert "feat: add special datatype for timestamp (#730)" This reverts commit d4504d2f022e0f2b4c170527dac872c41d75db72. --- docs/reference/experimental/index.md | 3 - .../multiclass/upload_results.py | 2 +- kolena/_experimental/special_data_type.py | 85 ------------------- kolena/_utils/datatypes.py | 3 - kolena/annotation.py | 1 - tests/integration/dataset/test_dataset.py | 11 +-- .../_experimental/test_special_data_type.py | 83 ------------------ 7 files changed, 4 insertions(+), 184 deletions(-) delete mode 100644 kolena/_experimental/special_data_type.py delete mode 100644 tests/unit/_experimental/test_special_data_type.py diff --git a/docs/reference/experimental/index.md b/docs/reference/experimental/index.md index 9a5c16933..cd9ea994c 100644 --- a/docs/reference/experimental/index.md +++ b/docs/reference/experimental/index.md @@ -24,6 +24,3 @@ options: members: ["download_results_by_tag"] show_root_heading: true -::: kolena._experimental.special_data_type - options: - show_root_heading: true diff --git a/examples/dataset/classification/classification/multiclass/upload_results.py b/examples/dataset/classification/classification/multiclass/upload_results.py index f7f08ff26..459ccc61b 100644 --- a/examples/dataset/classification/classification/multiclass/upload_results.py +++ b/examples/dataset/classification/classification/multiclass/upload_results.py @@ -22,9 +22,9 @@ from classification.multiclass.constants import DATASET from classification.multiclass.constants import ID_FIELDS -from kolena.annotation import ScoredClassificationLabel from kolena.dataset import download_dataset from kolena.dataset import upload_results +from kolena.workflow.annotation import ScoredClassificationLabel MODELS = ["resnet50v2", "inceptionv3"] diff --git a/kolena/_experimental/special_data_type.py b/kolena/_experimental/special_data_type.py deleted file mode 100644 index e36896c8e..000000000 --- a/kolena/_experimental/special_data_type.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2021-2024 Kolena Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Special data types supported on the Kolena platform. - -""" # noqa: E501 -from abc import ABCMeta -from datetime import datetime -from typing import Optional - -from kolena._utils.datatypes import DataCategory -from kolena._utils.datatypes import DataType -from kolena._utils.datatypes import TypedDataObject -from kolena._utils.pydantic_v1.dataclasses import dataclass -from kolena._utils.validators import ValidatorConfig - - -class _SpecialDataType(DataType): - TIMESTAMP = "TIMESTAMP" - - @staticmethod - def _data_category() -> DataCategory: - return DataCategory.SPECIAL - - -@dataclass(frozen=True, config=ValidatorConfig) -class SpecialDataType(TypedDataObject[_SpecialDataType], metaclass=ABCMeta): - """The base class for all special data types.""" - - -@dataclass(frozen=True, config=ValidatorConfig) -class Timestamp(SpecialDataType): - """ - !!! note "Experimental" - This class is considered **experimental** - - Timestamp data type. - """ - - epoch_time: Optional[float] = None - """The epoch time of the timestamp. If `value` and `format` are specified, the `epoch_time` will be calculated.""" - - value: Optional[str] = None - """ - The timestamp in a string representation. If present, the corresponding `format` must be specified too. - Note that GMT timezone is assumed unless the offset is specified in the string. - """ - - format: Optional[str] = None - """ - The format of the `value` string following the - [python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes). - """ - - @staticmethod - def _data_type() -> _SpecialDataType: - return _SpecialDataType.TIMESTAMP - - def __post_init__(self) -> None: - if self.value: - if not self.format: - raise ValueError("format needs to be specified for string timestamp") - if "%z" in self.format: - time_value = self.value - time_format = self.format - else: - time_value = self.value + " +0000" - time_format = self.format + " %z" - - object.__setattr__( - self, - "epoch_time", - datetime.strptime(time_value, time_format).timestamp(), - ) diff --git a/kolena/_utils/datatypes.py b/kolena/_utils/datatypes.py index 2850d9041..1eec79b06 100644 --- a/kolena/_utils/datatypes.py +++ b/kolena/_utils/datatypes.py @@ -85,7 +85,6 @@ class DataCategory(str, Enum): METRICS = "METRICS" ASSET = "ASSET" ANNOTATION = "ANNOTATION" - SPECIAL = "SPECIAL" def data_category_to_module_name(self) -> str: if self == DataCategory.TEST_SAMPLE: @@ -98,8 +97,6 @@ def data_category_to_module_name(self) -> str: return "kolena.asset" if self == DataCategory.ANNOTATION: return "kolena.annotation" - if self == DataCategory.SPECIAL: - return "kolena._experimental.data_type.special" raise ValueError(f"Must specify module name for data category: {self}") diff --git a/kolena/annotation.py b/kolena/annotation.py index 424d3fdf0..02f8fceed 100644 --- a/kolena/annotation.py +++ b/kolena/annotation.py @@ -62,7 +62,6 @@ class _AnnotationType(DataType): TIME_SEGMENT = "TIME_SEGMENT" TEXT_SEGMENT = "TEXT_SEGMENT" CUSTOM = "CUSTOM" - TIMESTAMP = "TIMESTAMP" @staticmethod def _data_category() -> DataCategory: diff --git a/tests/integration/dataset/test_dataset.py b/tests/integration/dataset/test_dataset.py index dfc0b2a12..d053b1dc2 100644 --- a/tests/integration/dataset/test_dataset.py +++ b/tests/integration/dataset/test_dataset.py @@ -21,9 +21,6 @@ import pytest from kolena._api.v2.dataset import CommitData -from kolena._experimental.special_data_type import Timestamp -from kolena.annotation import BoundingBox -from kolena.annotation import LabeledBoundingBox from kolena.dataset import download_dataset from kolena.dataset import list_datasets from kolena.dataset import upload_dataset @@ -31,6 +28,8 @@ from kolena.dataset.dataset import _load_dataset_metadata from kolena.errors import InputValidationError from kolena.errors import NotFoundError +from kolena.workflow.annotation import BoundingBox +from kolena.workflow.annotation import LabeledBoundingBox from tests.integration.helper import assert_frame_equal from tests.integration.helper import fake_locator from tests.integration.helper import upload_extracted_properties @@ -84,8 +83,6 @@ def test__upload_dataset() -> None: LabeledBoundingBox(label="cat", top_left=[i, i], bottom_right=[i + 10, i + 10]), LabeledBoundingBox(label="dog", top_left=[i + 5, i + 5], bottom_right=[i + 20, i + 20]), ], - time_str=Timestamp(value=f"12/31/2024, 00:00:{'{:02d}'.format(i)}", format="%m/%d/%Y, %H:%M:%S"), - time_num=Timestamp(epoch_time=1735689600 + i), ) for i in range(20) ] @@ -99,12 +96,10 @@ def test__upload_dataset() -> None: BoundingBox(label=bbox.label, top_left=bbox.top_left, bottom_right=bbox.bottom_right) for bbox in dp["bboxes"] ], - time_str=dp["time_str"], - time_num=dp["time_num"], ) for dp in datapoints ] - columns = ["locator", "width", "height", "city", "bboxes", "time_str", "time_num"] + columns = ["locator", "width", "height", "city", "bboxes"] upload_dataset(name, pd.DataFrame(datapoints[:10], columns=columns), id_fields=["locator"]) diff --git a/tests/unit/_experimental/test_special_data_type.py b/tests/unit/_experimental/test_special_data_type.py deleted file mode 100644 index 8ba781c21..000000000 --- a/tests/unit/_experimental/test_special_data_type.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2021-2024 Kolena Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any -from typing import Dict -from typing import Optional - -import pytest - -from kolena._experimental.special_data_type import _SpecialDataType -from kolena._experimental.special_data_type import Timestamp -from kolena._utils.datatypes import DATA_TYPE_FIELD - - -@pytest.mark.parametrize( - "object, json_data", - [ - ( - Timestamp(epoch_time=1700000000), - { - "epoch_time": 1700000000, - "value": None, - "format": None, - }, - ), - ( - Timestamp(value="12/31/2024, 00:00:00", format="%m/%d/%Y, %H:%M:%S"), - { - "epoch_time": 1735603200, - "value": "12/31/2024, 00:00:00", - "format": "%m/%d/%Y, %H:%M:%S", - }, - ), - ], -) -def test__serde__timestamp(object: Timestamp, json_data: Dict[str, Any]) -> None: - object_dict = object._to_dict() - assert object_dict == { - **json_data, - DATA_TYPE_FIELD: f"{_SpecialDataType._data_category().value}/{_SpecialDataType.TIMESTAMP.value}", - } - assert Timestamp._from_dict(object_dict) == object - - -@pytest.mark.parametrize( - "value, format, epoch_time", - [ - ("12/31/2024, 00:00:00", "%m/%d/%Y, %H:%M:%S", 1735603200), - ("25/05/99 02:35:5.523", "%d/%m/%y %H:%M:%S.%f", 927599705.523), - ("2021/05/25", "%Y/%m/%d", 1621900800), - ("2021-05-25 02:35:15", "%Y-%m-%d %H:%M:%S", 1621910115), - ("Tuesday, December 31, 2024 5:00:00 AM", "%A, %B %d, %Y %H:%M:%S %p", 1735621200), - ("Tuesday, December 31, 2024 00:00:00 AM GMT-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200), - ("Tuesday, December 31, 2024 00:00:00 AM UTC-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200), - ], -) -def test__timestamp_epoch_conversion(value: str, format: str, epoch_time: float) -> None: - timestamp_object = Timestamp(value=value, format=format) - assert epoch_time == timestamp_object.epoch_time - - -@pytest.mark.parametrize( - "value, format", - [ - # value without format - ("12/31/2024, 00:00:00", None), - # format inconsistent with value - ("12/31/2024, 00:00:00", "%m/%d/%Y, %s"), - ], -) -def test__timestamp_validation(value: str, format: Optional[str]) -> None: - with pytest.raises(ValueError): - Timestamp(value=value, format=format)