Skip to content

Commit

Permalink
Merge branch 'ft-1279-artificial-start-end-all' into 'integration'
Browse files Browse the repository at this point in the history
FT 1279 Artificial start/end activities (Pandas + log + simplified interface)

See merge request pm4py/pm4py-core!502
  • Loading branch information
fit-sebastiaan-van-zelst committed Oct 15, 2021
2 parents d5d0b49 + a9f16f0 commit b43d425
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pm4py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pm4py import util, objects, statistics, algo, visualization
from pm4py.analysis import check_soundness, solve_marking_equation, solve_extended_marking_equation, \
construct_synchronous_product_net
construct_synchronous_product_net, insert_artificial_start_end
from pm4py.conformance import conformance_diagnostics_token_based_replay, conformance_diagnostics_alignments, \
fitness_token_based_replay, \
fitness_alignments, precision_token_based_replay, \
Expand Down
33 changes: 31 additions & 2 deletions pm4py/analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import List, Optional, Tuple, Dict
from typing import List, Optional, Tuple, Dict, Union

from pm4py.objects.log.obj import Trace
from pm4py.objects.log.obj import Trace, EventLog
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.utils import get_properties
from pm4py.util.pandas_utils import check_is_pandas_dataframe, check_pandas_dataframe_columns

import pandas as pd


def construct_synchronous_product_net(trace: Trace, petri_net: PetriNet, initial_marking: Marking,
Expand Down Expand Up @@ -139,3 +143,28 @@ def check_soundness(petri_net: PetriNet, initial_marking: Marking,
"""
from pm4py.algo.analysis.woflan import algorithm as woflan
return woflan.apply(petri_net, initial_marking, final_marking)


def insert_artificial_start_end(log: Union[EventLog, pd.DataFrame]) -> Union[EventLog, pd.DataFrame]:
"""
Inserts the artificial start/end activities in an event log / Pandas dataframe
Parameters
------------------
log
Event log / Pandas dataframe
Returns
------------------
log
Event log / Pandas dataframe with artificial start / end activities
"""
properties = get_properties(log)
if check_is_pandas_dataframe(log):
check_pandas_dataframe_columns(log)
from pm4py.objects.log.util import dataframe_utils
return dataframe_utils.insert_artificial_start_end(log, parameters=properties)
else:
from pm4py.objects.log.util import artificial
return artificial.insert_artificial_start_end(log, parameters=properties)

2 changes: 1 addition & 1 deletion pm4py/objects/log/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pm4py.objects.log.util import insert_classifier, log, sampling, \
sorting, index_attribute, get_class_representation, get_prefixes, \
get_log_encoded, interval_lifecycle, log_regex, basic_filter, \
filtering_utils, split_train_test, xes
filtering_utils, split_train_test, xes, artificial
import pkgutil

if pkgutil.find_loader("pandas"):
Expand Down
47 changes: 47 additions & 0 deletions pm4py/objects/log/util/artificial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from enum import Enum
from typing import Optional, Dict, Any

from pm4py.objects.log.obj import Event
from pm4py.objects.log.obj import EventLog
from pm4py.util import constants
from pm4py.util import exec_utils
from pm4py.util import xes_constants


class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
PARAM_ARTIFICIAL_START_ACTIVITY = constants.PARAM_ARTIFICIAL_START_ACTIVITY
PARAM_ARTIFICIAL_END_ACTIVITY = constants.PARAM_ARTIFICIAL_END_ACTIVITY


def insert_artificial_start_end(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> EventLog:
"""
Inserts the artificial start/end activities in an event log
Parameters
-------------------
log
Event log
parameters
Parameters of the algorithm, including:
- Parameters.ACTIVITY_KEY: the activity
Returns
------------------
log
Enriched log
"""
if parameters is None:
parameters = {}

activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
artificial_start_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters,
constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)

for trace in log:
trace.insert(0, Event({activity_key: artificial_start_activity}))
trace.append(Event({activity_key: artificial_end_activity}))

return log
58 changes: 57 additions & 1 deletion pm4py/objects/log/util/dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pm4py.util import constants
from pm4py.util import exec_utils
from pm4py.util import points_subset
from pm4py.util import xes_constants
from pm4py.util import xes_constants, pandas_utils

LEGACY_PARQUET_TP_REPLACER = "AAA"
LEGACY_PARQUET_CASECONCEPTNAME = "caseAAAconceptAAAname"
Expand All @@ -23,6 +23,9 @@ class Parameters(Enum):
MAX_DIFFERENT_OCC_STR_ATTR = 50
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
PARAM_ARTIFICIAL_START_ACTIVITY = constants.PARAM_ARTIFICIAL_START_ACTIVITY
PARAM_ARTIFICIAL_END_ACTIVITY = constants.PARAM_ARTIFICIAL_END_ACTIVITY
INDEX_KEY = "index_key"


def insert_partitioning(df, num_partitions, parameters=None):
Expand Down Expand Up @@ -371,3 +374,56 @@ def automatic_feature_extraction_df(df: pd.DataFrame, parameters: Optional[Dict[
columns.remove(timestamp_key)

return get_features_df(fea_sel_df, list(columns), parameters=parameters)


def insert_artificial_start_end(df0: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> pd.DataFrame:
"""
Inserts the artificial start/end activities in a Pandas dataframe
Parameters
------------------
df0
Dataframe
parameters
Parameters of the algorithm, including:
- Parameters.CASE_ID_KEY: the case identifier
- Parameters.TIMESTAMP_KEY: the timestamp
- Parameters.ACTIVITY_KEY: the activity
Returns
-----------------
enriched_df
Dataframe with artificial start/end activities
"""
if parameters is None:
parameters = {}

case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY)
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

artificial_start_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters, constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters, constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)

index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters, constants.DEFAULT_INDEX_KEY)

df = df0.copy()
df = pandas_utils.insert_index(df, index_key)
df = df.sort_values([case_id_key, timestamp_key, index_key])

start_df = df[[case_id_key, timestamp_key]].groupby(case_id_key).first().reset_index()
end_df = df[[case_id_key, timestamp_key]].groupby(case_id_key).last().reset_index()
# stability trick: remove 1ms from the artificial start activity timestamp, add 1ms to the artificial end activity timestamp
start_df[timestamp_key] = start_df[timestamp_key] - pd.Timedelta("1 ms")
end_df[timestamp_key] = end_df[timestamp_key] + pd.Timedelta("1 ms")

start_df[activity_key] = artificial_start_activity
end_df[activity_key] = artificial_end_activity

df = pd.concat([start_df, df, end_df])
df = pandas_utils.insert_index(df, index_key)
df = df.sort_values([case_id_key, timestamp_key, index_key])

df.attrs = df0.attrs

return df

0 comments on commit b43d425

Please sign in to comment.