From 3e5164b72835aaa29051f4fd6ce4329253a17f95 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Fri, 24 Mar 2023 08:26:56 +0100 Subject: [PATCH] feat(pm4py): OCEL clustering --- docs/source/api.rst | 2 ++ pm4py/__init__.py | 2 +- .../variants/ancestors_descendants.py | 9 +++-- pm4py/ocel.py | 33 +++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index a209247d6..0299e72d7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -280,6 +280,7 @@ In PM4Py, we offer object-centric process mining features: * :meth:`pm4py.ocel.ocel_merge_duplicates`; merge events in the OCEL which are happening with the same activity at the same timestamp. * :meth:`pm4py.ocel.ocel_o2o_enrichment`; enriches the O2O table of the OCEL with the grah-based relationships. * :meth:`pm4py.ocel.ocel_e2o_lifecycle_enrichment`; enriches the relations table of the OCEL with lifecycle-based information. + * :meth:`pm4py.ocel.cluster_equivalent_ocel`; perform a clustering of the objects of an OCEL based on lifecycle/interactions similarity. Some object-centric process discovery algorithms are also offered: @@ -555,6 +556,7 @@ Overall List of Methods pm4py.ocel.ocel_merge_duplicates pm4py.ocel.ocel_o2o_enrichment pm4py.ocel.ocel_e2o_lifecycle_enrichment + pm4py.ocel.cluster_equivalent_ocel pm4py.openai pm4py.openai.describe_process pm4py.openai.describe_path diff --git a/pm4py/__init__.py b/pm4py/__init__.py index a12382579..b4d28b7fa 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -28,7 +28,7 @@ conformance_log_skeleton from pm4py.ocel import ocel_objects_interactions_summary, ocel_temporal_summary, ocel_objects_summary, ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count, \ discover_ocdfg, discover_oc_petri_net, discover_objects_graph, sample_ocel_objects, ocel_drop_duplicates, ocel_merge_duplicates, ocel_sort_by_additional_column, \ - ocel_add_index_based_timedelta, sample_ocel_connected_components, ocel_o2o_enrichment, ocel_e2o_lifecycle_enrichment + ocel_add_index_based_timedelta, sample_ocel_connected_components, ocel_o2o_enrichment, ocel_e2o_lifecycle_enrichment, cluster_equivalent_ocel from pm4py.vis import view_petri_net, save_vis_petri_net, view_dfg, save_vis_dfg, view_process_tree, \ save_vis_process_tree, \ view_ocdfg, save_vis_ocdfg, view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn, view_sna, save_vis_sna,\ diff --git a/pm4py/algo/transformation/ocel/split_ocel/variants/ancestors_descendants.py b/pm4py/algo/transformation/ocel/split_ocel/variants/ancestors_descendants.py index 28b305db1..9ee0d627f 100644 --- a/pm4py/algo/transformation/ocel/split_ocel/variants/ancestors_descendants.py +++ b/pm4py/algo/transformation/ocel/split_ocel/variants/ancestors_descendants.py @@ -5,10 +5,11 @@ from pm4py.util import exec_utils from pm4py.objects.ocel.obj import OCEL from typing import Optional, Dict, Any, Collection - +import sys class Parameters(Enum): OBJECT_TYPE = "object_type" + MAX_OBJS = "max_objs" def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection[OCEL]: @@ -36,6 +37,7 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, None) if object_type is None: raise Exception("the object type should be provided as parameter") + max_objs = exec_utils.get_param_value(Parameters.MAX_OBJS, parameters, sys.maxsize) import pm4py interaction_graph = pm4py.discover_objects_graph(ocel, "object_interaction") @@ -58,7 +60,10 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> Collection lst = [] - for obj in objects: + for index, obj in enumerate(objects): + if index >= max_objs: + break + ancestors = nx.ancestors(G, obj) descendants = nx.descendants(G, obj) overall_set = ancestors.union(descendants).union({obj}) diff --git a/pm4py/ocel.py b/pm4py/ocel.py index 477afcaf8..a48600040 100644 --- a/pm4py/ocel.py +++ b/pm4py/ocel.py @@ -8,6 +8,8 @@ from pm4py.objects.ocel.obj import OCEL from pm4py.util import constants +import sys + def ocel_get_object_types(ocel: OCEL) -> List[str]: """ @@ -493,3 +495,34 @@ def ocel_add_index_based_timedelta(ocel: OCEL) -> OCEL: del ocel.events["@@timedelta"] del ocel.relations["@@timedelta"] return ocel + + +def cluster_equivalent_ocel(ocel: OCEL, object_type: str, max_objs: int = sys.maxsize) -> Dict[str, Collection[OCEL]]: + """ + Perform a clustering of the object-centric event log, based on the 'executions' of + a single object type. Equivalent 'executions' are grouped in the output dictionary. + + :param ocel: object-centric event log + :param object_type: reference object type + :param max_objs: maximum number of objects (of the given object type) + :rtype: ``Dict[str, Collection[OCEL]]`` + + .. code-block:: python3 + + import pm4py + + ocel = pm4py.read_ocel('trial.ocel') + clusters = pm4py.cluster_equivalent_ocel(ocel, "order") + """ + from pm4py.algo.transformation.ocel.split_ocel import algorithm as split_ocel_algorithm + from pm4py.objects.ocel.util import rename_objs_ot_tim_lex + from pm4py.algo.transformation.ocel.description import algorithm as ocel_description + lst_ocels = split_ocel_algorithm.apply(ocel, variant=split_ocel_algorithm.Variants.ANCESTORS_DESCENDANTS, parameters={"object_type": object_type, "max_objs": max_objs}) + ret = {} + for index, oc in enumerate(lst_ocels): + oc_ren = rename_objs_ot_tim_lex.apply(oc) + descr = ocel_description.apply(oc_ren, parameters={"include_timestamps": False}) + if descr not in ret: + ret[descr] = [] + ret[descr].append(oc) + return ret