Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow different UID types #529

Merged
merged 16 commits into from
Jan 23, 2024
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
]

[project.optional-dependencies]
ulid = ["python-ulid"]
docs = [
"autodoc_pydantic==2.0.1",
"furo==2023.9.10",
Expand All @@ -62,6 +63,7 @@ strict = [
"pydash==7.0.6",
"pydot==2.0.0",
"typing-extensions==4.9.0",
"python-ulid==2.2.0",
]

[project.urls]
Expand Down
4 changes: 2 additions & 2 deletions src/jobflow/core/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import jobflow
from jobflow.core.reference import find_and_get_references
from jobflow.utils import ValueEnum, contains_flow_or_job, suuid
from jobflow.utils import ValueEnum, contains_flow_or_job, suid

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -135,7 +135,7 @@ def __init__(
jobs = [jobs]

if uuid is None:
uuid = suuid()
uuid = suid()

self.name = name
self.order = order
Expand Down
4 changes: 2 additions & 2 deletions src/jobflow/core/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from monty.json import MSONable, jsanitize

from jobflow.core.reference import OnMissing, OutputReference
from jobflow.utils.uuid import suuid
from jobflow.utils.uid import suid

if typing.TYPE_CHECKING:
from collections.abc import Hashable, Sequence
Expand Down Expand Up @@ -321,7 +321,7 @@ def __init__(

function_args = () if function_args is None else function_args
function_kwargs = {} if function_kwargs is None else function_kwargs
uuid = suuid() if uuid is None else uuid
uuid = suid() if uuid is None else uuid
metadata = {} if metadata is None else metadata
config = JobConfig() if config is None else config

Expand Down
4 changes: 2 additions & 2 deletions src/jobflow/core/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ def _group_blobs(infos, locs):


def _get_blob_info(obj: Any, store_name: str) -> dict[str, str]:
from jobflow.utils.uuid import suuid
from jobflow.utils.uid import suid

class_name = ""
module_name = ""
Expand All @@ -797,6 +797,6 @@ def _get_blob_info(obj: Any, store_name: str) -> dict[str, str]:
return {
"@class": class_name,
"@module": module_name,
"blob_uuid": suuid(),
"blob_uuid": suid(),
"store": store_name,
}
5 changes: 5 additions & 0 deletions src/jobflow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ class JobflowSettings(BaseSettings):
"%Y-%m-%d-%H-%M-%S-%f",
description="Date stamp format used to create directories",
)

UID_TYPE: str = Field(
"uuid4", description="Type of unique identifier to use to track jobs. "
)

model_config = SettingsConfigDict(env_prefix="jobflow_")

@model_validator(mode="before")
Expand Down
1 change: 1 addition & 0 deletions src/jobflow/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
update_in_dictionary,
)
from jobflow.utils.log import initialize_logger
from jobflow.utils.uid import suid
from jobflow.utils.uuid import suuid
114 changes: 114 additions & 0 deletions src/jobflow/utils/uid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""Tools for generating UUIDs."""
from __future__ import annotations

from uuid import UUID

try: # pragma: no cover
from ulid import ULID
except ImportError: # pragma: no cover
err_msg = (
"The ulid package is not installed. "
"Install it with `pip install jobflow[ulid]` or `pip install python-ulid`."
)

class ULID: # type: ignore
"""Fake ULID class for raising import error."""

def __init__(self, *args, **kwargs):
raise ImportError(err_msg)

def from_str(self, *args, **kwargs):
"""Raise import error."""
raise ImportError(err_msg)


def suid(id_type: str | None = None) -> str:
"""Generate a string UUID (universally unique identifier).

Since the timestamp of the IDs are important for sorting,
only id types that include a timestamp are supported.

Parameters
----------
uuid_type:
The type of UUID to generate.
In the future, ``uuid7`` and ``ulid`` may be supported.

Returns
-------
str
A UUID.
"""
import uuid

from jobflow import SETTINGS

if id_type is None:
id_type = SETTINGS.UID_TYPE

funcs = {
"uuid1": uuid.uuid1,
"uuid4": uuid.uuid4,
"ulid": ULID,
}
if id_type not in funcs:
raise ValueError(f"UUID type {id_type} not supported.")
return str(funcs[id_type]())


def get_timestamp_from_uid(uid: str) -> float:
"""
Get the time that a UID was generated.

Parameters
----------
uuid
A UUID.

Returns
-------
float
The time stamp from the UUID.
"""
id_type = _get_id_type(uid)
if id_type == "uuid4":
raise ValueError(
"UUID4 is randomly generated and not associated with a time stamp."
)
funcs = {
"uuid1": lambda uuid: (UUID(uuid).time - 0x01B21DD213814000) / 1e7,
"ulid": lambda uuid: ULID.from_str(uuid).timestamp,
}
return funcs[id_type](uid)


def _get_id_type(uid: str) -> str:
"""
Get the type of a UUID.

Parameters
----------
uuid
A UUID.

Returns
-------
str
The type of the UUID.
"""
try:
version = UUID(uid).version
return {
1: "uuid1",
4: "uuid4",
}[version]
except ValueError:
pass

try:
ULID.from_str(uid)
return "ulid"
except ValueError:
pass

raise ValueError(f"ID type for {uid} not recognized.")
4 changes: 4 additions & 0 deletions src/jobflow/utils/uuid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Tools for generating UUIDs."""
from monty.dev import deprecated


@deprecated(
message="The UUID system will be replace with UID that contains both UUID and ULID."
)
def suuid() -> str:
"""
Generate a string UUID (universally unique identifier).
Expand Down
38 changes: 38 additions & 0 deletions tests/utils/test_uid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest


def test_uid():
from uuid import UUID

from ulid import ULID

from jobflow.utils.uid import get_timestamp_from_uid, suid

uuid = suid("uuid1")
assert UUID(uuid).version == 1
t1 = get_timestamp_from_uid(uuid)
assert isinstance(t1, float)

uuid = suid("uuid4")
assert UUID(uuid).version == 4

with pytest.raises(
ValueError,
match="UUID4 is randomly generated and not associated with a time stamp.",
):
get_timestamp_from_uid(uuid)

ulid = suid("ulid")
assert ULID.from_str(ulid)
t2 = get_timestamp_from_uid(ulid)
assert isinstance(t2, float)

with pytest.raises(ValueError, match="UUID type uuid2 not supported."):
suid("uuid2")

with pytest.raises(ValueError, match="ID type for FAKEUUID not recognized."):
get_timestamp_from_uid("FAKEUUID")

default_uid = suid()
assert UUID(default_uid).version == 4
# assert len(ULID.from_str(default_uid).hex) == 32 # uncomment when ulid is default
Loading