Skip to content

Commit

Permalink
feat(pm4py): OCEL clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Mar 24, 2023
1 parent 8921c40 commit 3e5164b
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ In PM4Py, we offer object-centric process mining features:
* :meth:`pm4py.ocel.ocel_merge_duplicates`; merge events in the OCEL which are happening with the same activity at the same timestamp.
* :meth:`pm4py.ocel.ocel_o2o_enrichment`; enriches the O2O table of the OCEL with the grah-based relationships.
* :meth:`pm4py.ocel.ocel_e2o_lifecycle_enrichment`; enriches the relations table of the OCEL with lifecycle-based information.
* :meth:`pm4py.ocel.cluster_equivalent_ocel`; perform a clustering of the objects of an OCEL based on lifecycle/interactions similarity.


Some object-centric process discovery algorithms are also offered:
Expand Down Expand Up @@ -555,6 +556,7 @@ Overall List of Methods
pm4py.ocel.ocel_merge_duplicates
pm4py.ocel.ocel_o2o_enrichment
pm4py.ocel.ocel_e2o_lifecycle_enrichment
pm4py.ocel.cluster_equivalent_ocel
pm4py.openai
pm4py.openai.describe_process
pm4py.openai.describe_path
Expand Down
2 changes: 1 addition & 1 deletion pm4py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
conformance_log_skeleton
from pm4py.ocel import ocel_objects_interactions_summary, ocel_temporal_summary, ocel_objects_summary, ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count, \
discover_ocdfg, discover_oc_petri_net, discover_objects_graph, sample_ocel_objects, ocel_drop_duplicates, ocel_merge_duplicates, ocel_sort_by_additional_column, \
ocel_add_index_based_timedelta, sample_ocel_connected_components, ocel_o2o_enrichment, ocel_e2o_lifecycle_enrichment
ocel_add_index_based_timedelta, sample_ocel_connected_components, ocel_o2o_enrichment, ocel_e2o_lifecycle_enrichment, cluster_equivalent_ocel
from pm4py.vis import view_petri_net, save_vis_petri_net, view_dfg, save_vis_dfg, view_process_tree, \
save_vis_process_tree, \
view_ocdfg, save_vis_ocdfg, view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn, view_sna, save_vis_sna,\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from pm4py.util import exec_utils
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any, Collection

import sys

class Parameters(Enum):
OBJECT_TYPE = "object_type"
MAX_OBJS = "max_objs"


def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection[OCEL]:
Expand Down Expand Up @@ -36,6 +37,7 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection
object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, None)
if object_type is None:
raise Exception("the object type should be provided as parameter")
max_objs = exec_utils.get_param_value(Parameters.MAX_OBJS, parameters, sys.maxsize)

import pm4py
interaction_graph = pm4py.discover_objects_graph(ocel, "object_interaction")
Expand All @@ -58,7 +60,10 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection

lst = []

for obj in objects:
for index, obj in enumerate(objects):
if index >= max_objs:
break

ancestors = nx.ancestors(G, obj)
descendants = nx.descendants(G, obj)
overall_set = ancestors.union(descendants).union({obj})
Expand Down
33 changes: 33 additions & 0 deletions pm4py/ocel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pm4py.objects.ocel.obj import OCEL
from pm4py.util import constants
import sys


def ocel_get_object_types(ocel: OCEL) -> List[str]:
"""
Expand Down Expand Up @@ -493,3 +495,34 @@ def ocel_add_index_based_timedelta(ocel: OCEL) -> OCEL:
del ocel.events["@@timedelta"]
del ocel.relations["@@timedelta"]
return ocel


def cluster_equivalent_ocel(ocel: OCEL, object_type: str, max_objs: int = sys.maxsize) -> Dict[str, Collection[OCEL]]:
"""
Perform a clustering of the object-centric event log, based on the 'executions' of
a single object type. Equivalent 'executions' are grouped in the output dictionary.
:param ocel: object-centric event log
:param object_type: reference object type
:param max_objs: maximum number of objects (of the given object type)
:rtype: ``Dict[str, Collection[OCEL]]``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel('trial.ocel')
clusters = pm4py.cluster_equivalent_ocel(ocel, "order")
"""
from pm4py.algo.transformation.ocel.split_ocel import algorithm as split_ocel_algorithm
from pm4py.objects.ocel.util import rename_objs_ot_tim_lex
from pm4py.algo.transformation.ocel.description import algorithm as ocel_description
lst_ocels = split_ocel_algorithm.apply(ocel, variant=split_ocel_algorithm.Variants.ANCESTORS_DESCENDANTS, parameters={"object_type": object_type, "max_objs": max_objs})
ret = {}
for index, oc in enumerate(lst_ocels):
oc_ren = rename_objs_ot_tim_lex.apply(oc)
descr = ocel_description.apply(oc_ren, parameters={"include_timestamps": False})
if descr not in ret:
ret[descr] = []
ret[descr].append(oc)
return ret

0 comments on commit 3e5164b

Please sign in to comment.