Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add back special datatype for timestamp #733

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/reference/experimental/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@
options:
members: ["download_results_by_tag"]
show_root_heading: true
::: kolena._experimental.special_data_type
options:
show_root_heading: true
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from classification.multiclass.constants import DATASET
from classification.multiclass.constants import ID_FIELDS

from kolena.annotation import ScoredClassificationLabel
from kolena.dataset import download_dataset
from kolena.dataset import upload_results
from kolena.workflow.annotation import ScoredClassificationLabel


MODELS = ["resnet50v2", "inceptionv3"]
Expand Down
83 changes: 83 additions & 0 deletions kolena/_experimental/special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Special data types supported on the Kolena platform.

""" # noqa: E501
from abc import ABCMeta
from datetime import datetime
from typing import Optional

import pytz

from kolena._utils.datatypes import DataCategory
from kolena._utils.datatypes import DataType
from kolena._utils.datatypes import TypedDataObject
from kolena._utils.pydantic_v1.dataclasses import dataclass
from kolena._utils.validators import ValidatorConfig


class _SpecialDataType(DataType):
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
return DataCategory.SPECIAL


@dataclass(frozen=True, config=ValidatorConfig)
class SpecialDataType(TypedDataObject[_SpecialDataType], metaclass=ABCMeta):
"""The base class for all special data types."""


@dataclass(frozen=True, config=ValidatorConfig)
class Timestamp(SpecialDataType):
"""
!!! note "Experimental"
This class is considered **experimental**

Timestamp data type.
"""

epoch_time: Optional[float] = None
"""The epoch time of the timestamp. If `value` and `format` are specified, the `epoch_time` will be calculated."""

value: Optional[str] = None
"""
The timestamp in a string representation. Note that GMT timezone is assumed unless the offset is specified in the
string.
"""

format: Optional[str] = None
"""
The format of the `value` string following the
[python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes). If not
provided, the `value` will be parsed using
[python's `fromisoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat).
"""

@staticmethod
def _data_type() -> _SpecialDataType:
return _SpecialDataType.TIMESTAMP

def __post_init__(self) -> None:
if self.value:
if not self.format:
time_obj = datetime.fromisoformat(self.value)
else:
time_obj = datetime.strptime(self.value, self.format)
# assume GMT if timezone is not provided
if not time_obj.tzinfo:
time_obj = pytz.utc.localize(time_obj)
object.__setattr__(self, "epoch_time", time_obj.timestamp())
3 changes: 3 additions & 0 deletions kolena/_utils/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class DataCategory(str, Enum):
METRICS = "METRICS"
ASSET = "ASSET"
ANNOTATION = "ANNOTATION"
SPECIAL = "SPECIAL"

def data_category_to_module_name(self) -> str:
if self == DataCategory.TEST_SAMPLE:
Expand All @@ -97,6 +98,8 @@ def data_category_to_module_name(self) -> str:
return "kolena.asset"
if self == DataCategory.ANNOTATION:
return "kolena.annotation"
if self == DataCategory.SPECIAL:
return "kolena._experimental.data_type.special"
raise ValueError(f"Must specify module name for data category: {self}")


Expand Down
1 change: 1 addition & 0 deletions kolena/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class _AnnotationType(DataType):
TIME_SEGMENT = "TIME_SEGMENT"
TEXT_SEGMENT = "TEXT_SEGMENT"
CUSTOM = "CUSTOM"
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
Expand Down
11 changes: 8 additions & 3 deletions tests/integration/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
import pytest

from kolena._api.v2.dataset import CommitData
from kolena._experimental.special_data_type import Timestamp
from kolena.annotation import BoundingBox
from kolena.annotation import LabeledBoundingBox
from kolena.dataset import download_dataset
from kolena.dataset import list_datasets
from kolena.dataset import upload_dataset
from kolena.dataset.dataset import _fetch_dataset_history
from kolena.dataset.dataset import _load_dataset_metadata
from kolena.errors import InputValidationError
from kolena.errors import NotFoundError
from kolena.workflow.annotation import BoundingBox
from kolena.workflow.annotation import LabeledBoundingBox
from tests.integration.helper import assert_frame_equal
from tests.integration.helper import fake_locator
from tests.integration.helper import upload_extracted_properties
Expand Down Expand Up @@ -83,6 +84,8 @@ def test__upload_dataset() -> None:
LabeledBoundingBox(label="cat", top_left=[i, i], bottom_right=[i + 10, i + 10]),
LabeledBoundingBox(label="dog", top_left=[i + 5, i + 5], bottom_right=[i + 20, i + 20]),
],
time_str=Timestamp(value=f"12/31/2024, 00:00:{'{:02d}'.format(i)}", format="%m/%d/%Y, %H:%M:%S"),
time_num=Timestamp(epoch_time=1735689600 + i),
)
for i in range(20)
]
Expand All @@ -96,10 +99,12 @@ def test__upload_dataset() -> None:
BoundingBox(label=bbox.label, top_left=bbox.top_left, bottom_right=bbox.bottom_right)
for bbox in dp["bboxes"]
],
time_str=dp["time_str"],
time_num=dp["time_num"],
)
for dp in datapoints
]
columns = ["locator", "width", "height", "city", "bboxes"]
columns = ["locator", "width", "height", "city", "bboxes", "time_str", "time_num"]

upload_dataset(name, pd.DataFrame(datapoints[:10], columns=columns), id_fields=["locator"])

Expand Down
99 changes: 99 additions & 0 deletions tests/unit/_experimental/test_special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict
from typing import Optional

import pytest

from kolena._experimental.special_data_type import _SpecialDataType
from kolena._experimental.special_data_type import Timestamp
from kolena._utils.datatypes import DATA_TYPE_FIELD


@pytest.mark.parametrize(
"object, json_data",
[
(
Timestamp(epoch_time=1700000000),
{
"epoch_time": 1700000000,
"value": None,
"format": None,
},
),
(
Timestamp(value="12/31/2024, 00:00:00", format="%m/%d/%Y, %H:%M:%S"),
{
"epoch_time": 1735603200,
"value": "12/31/2024, 00:00:00",
"format": "%m/%d/%Y, %H:%M:%S",
},
),
],
)
def test__serde__timestamp(object: Timestamp, json_data: Dict[str, Any]) -> None:
object_dict = object._to_dict()
assert object_dict == {
**json_data,
DATA_TYPE_FIELD: f"{_SpecialDataType._data_category().value}/{_SpecialDataType.TIMESTAMP.value}",
}
assert Timestamp._from_dict(object_dict) == object


@pytest.mark.parametrize(
"value, format, epoch_time",
[
("12/31/2024, 00:00:00", "%m/%d/%Y, %H:%M:%S", 1735603200),
("25/05/99 02:35:5.523", "%d/%m/%y %H:%M:%S.%f", 927599705.523),
("2021/05/25", "%Y/%m/%d", 1621900800),
("2021-05-25 02:35:15", "%Y-%m-%d %H:%M:%S", 1621910115),
("Tuesday, December 31, 2024 5:00:00 AM", "%A, %B %d, %Y %H:%M:%S %p", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM GMT-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM UTC-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
],
)
def test__timestamp_epoch_conversion_with_format(value: str, format: str, epoch_time: float) -> None:
timestamp_object = Timestamp(value=value, format=format)
assert epoch_time == timestamp_object.epoch_time


@pytest.mark.parametrize(
"value, epoch_time",
[
("2024-12-31", 1735603200),
("2024-12-31 00:00:00", 1735603200),
("2024-12-31 12:00:00+00:00", 1735646400),
("2024-12-31 12:00:00-00:00", 1735646400),
("2024-12-31 12:00:00+05:00", 1735628400),
("2024-12-31 12:00:00-05:00", 1735664400),
],
)
def test__timestamp_epoch_conversion_iso(value: str, epoch_time: float) -> None:
timestamp_object = Timestamp(value=value)
assert epoch_time == timestamp_object.epoch_time


@pytest.mark.parametrize(
"value, format",
[
# value without format and not following ISO 8601 format
("12/31/2024, 00:00:00", None),
# format inconsistent with value
("12/31/2024, 00:00:00", "%m/%d/%Y, %s"),
],
)
def test__timestamp_validation(value: str, format: Optional[str]) -> None:
with pytest.raises(ValueError):
Timestamp(value=value, format=format)