From d865660c5370640b4c5c013239f6940f145d9e93 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Fri, 1 Mar 2024 10:13:01 +0100 Subject: [PATCH] refactor(pm4py): refactoring log_to_ocel --- pm4py/convert.py | 6 ++-- pm4py/objects/ocel/util/log_ocel.py | 56 +++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/pm4py/convert.py b/pm4py/convert.py index 8232a7ca9..edf004a38 100644 --- a/pm4py/convert.py +++ b/pm4py/convert.py @@ -2,7 +2,7 @@ The ``pm4py.convert`` module contains the cross-conversions implemented in ``pm4py`` """ -from typing import Union, Tuple, Optional, Collection, List, Any +from typing import Union, Tuple, Optional, Collection, List, Any, Dict import pandas as pd from copy import deepcopy @@ -259,7 +259,7 @@ def convert_to_reachability_graph(*args: Union[Tuple[PetriNet, Marking, Marking] return reachability_graph.construct_reachability_graph(net, im) -def convert_log_to_ocel(log: Union[EventLog, EventStream, pd.DataFrame], activity_column: str = "concept:name", timestamp_column: str = "time:timestamp", object_types: Optional[Collection[str]] = None, obj_separator: str = " AND ", additional_event_attributes: Optional[Collection[str]] = None) -> OCEL: +def convert_log_to_ocel(log: Union[EventLog, EventStream, pd.DataFrame], activity_column: str = "concept:name", timestamp_column: str = "time:timestamp", object_types: Optional[Collection[str]] = None, obj_separator: str = " AND ", additional_event_attributes: Optional[Collection[str]] = None, additional_object_attributes: Optional[Dict[str, Collection[str]]] = None) -> OCEL: """ Converts an event log to an object-centric event log with one or more than one object types. @@ -287,7 +287,7 @@ def convert_log_to_ocel(log: Union[EventLog, EventStream, pd.DataFrame], activit object_types = list(set(x for x in log.columns if x == "case:concept:name" or x.startswith("ocel:type"))) from pm4py.objects.ocel.util import log_ocel - return log_ocel.log_to_ocel_multiple_obj_types(log, activity_column, timestamp_column, object_types, obj_separator, additional_event_attributes=additional_event_attributes) + return log_ocel.log_to_ocel_multiple_obj_types(log, activity_column, timestamp_column, object_types, obj_separator, additional_event_attributes=additional_event_attributes, additional_object_attributes=additional_object_attributes) def convert_ocel_to_networkx(ocel: OCEL, variant: str = "ocel_to_nx") -> nx.DiGraph: diff --git a/pm4py/objects/ocel/util/log_ocel.py b/pm4py/objects/ocel/util/log_ocel.py index 3f5b72cad..9f2229a14 100644 --- a/pm4py/objects/ocel/util/log_ocel.py +++ b/pm4py/objects/ocel/util/log_ocel.py @@ -8,6 +8,7 @@ from pm4py.objects.conversion.log import converter as log_converter from pm4py.objects.ocel.util import ocel_consistency from copy import copy +import math class Parameters(Enum): @@ -22,6 +23,37 @@ class Parameters(Enum): DIRECTION = "direction" +def __postprocess_stream(list_events): + """ + Postprocess the list of events of the stream in order to make sure + that there are no NaN/NaT values + + Parameters + ------------- + list_events + List of events + + Returns + ------------- + list_events + Postprocessed stream + """ + for event in list_events: + event_keys = list(event.keys()) + for k in event_keys: + typ_k = type(event[k]) + if typ_k is pd._libs.tslibs.nattype.NaTType: + del event[k] + continue + elif (typ_k is float or typ_k is int) and math.isnan(event[k]): + del event[k] + continue + elif event[k] is None: + del event[k] + continue + return list_events + + def from_traditional_log(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Transforms an EventLog to an OCEL @@ -258,7 +290,7 @@ def from_interleavings(df1: pd.DataFrame, df2: pd.DataFrame, interleavings: pd.D return OCEL(events=events, objects=objects, relations=relations) -def log_to_ocel_multiple_obj_types(log_obj: Union[EventLog, EventStream, pd.DataFrame], activity_column: str, timestamp_column: str, obj_types: Collection[str], obj_separator: str = " AND ", additional_event_attributes: Optional[Collection[str]] = None) -> OCEL: +def log_to_ocel_multiple_obj_types(log_obj: Union[EventLog, EventStream, pd.DataFrame], activity_column: str, timestamp_column: str, obj_types: Collection[str], obj_separator: str = " AND ", additional_event_attributes: Optional[Collection[str]] = None, additional_object_attributes: Optional[Dict[str, Collection[str]]] = None) -> OCEL: """ Converts an event log to an object-centric event log with one or more than one object types. @@ -288,6 +320,9 @@ def log_to_ocel_multiple_obj_types(log_obj: Union[EventLog, EventStream, pd.Data if additional_event_attributes is None: additional_event_attributes = {} + if additional_object_attributes is None: + additional_object_attributes = {} + events = [] objects = [] relations = [] @@ -295,27 +330,36 @@ def log_to_ocel_multiple_obj_types(log_obj: Union[EventLog, EventStream, pd.Data obj_ids = set() stream = log_obj.to_dict("records") + stream = __postprocess_stream(stream) for index, eve in enumerate(stream): ocel_eve = {ocel_constants.DEFAULT_EVENT_ID: str(index), ocel_constants.DEFAULT_EVENT_ACTIVITY: eve[activity_column], ocel_constants.DEFAULT_EVENT_TIMESTAMP: eve[timestamp_column]} for attr in additional_event_attributes: - ocel_eve[attr] = eve[attr] + if attr in eve: + ocel_eve[attr] = eve[attr] events.append(ocel_eve) - for col in obj_types: + for ot in obj_types: try: - objs = eve[col].split(obj_separator) + objs = eve[ot].split(obj_separator) for obj in objs: if len(obj.strip()) > 0: if obj not in obj_ids: obj_ids.add(obj) + obj_instance = {ocel_constants.DEFAULT_OBJECT_ID: obj, ocel_constants.DEFAULT_OBJECT_TYPE: ot} + + if ot in additional_object_attributes: + for objattname in additional_object_attributes[ot]: + if objattname in eve: + objattvalue = eve[objattname] + obj_instance[objattname] = objattvalue - objects.append({ocel_constants.DEFAULT_OBJECT_ID: obj, ocel_constants.DEFAULT_OBJECT_TYPE: col}) + objects.append(obj_instance) rel = copy(ocel_eve) rel[ocel_constants.DEFAULT_OBJECT_ID] = obj - rel[ocel_constants.DEFAULT_OBJECT_TYPE] = col + rel[ocel_constants.DEFAULT_OBJECT_TYPE] = ot relations.append(rel) except: