Skip to content

Commit

Permalink
Merge branch 'ft-1288-possibility-specify-variant-separator' into 'in…
Browse files Browse the repository at this point in the history
…tegration'

FT 1288 Possibility to specify the default variant separator (, or custom)

See merge request pm4py/pm4py-core!504
  • Loading branch information
fit-sebastiaan-van-zelst committed Oct 15, 2021
2 parents b43d425 + 5468e2a commit fcc4eeb
Show file tree
Hide file tree
Showing 16 changed files with 34 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def occu_var_suc(var_list, parameters=None):

binarize = exec_utils.get_param_value(Parameters.BINARIZE, parameters, True)

comb_list = [var_list[i] + ',' + var_list[i + 1] for i in range(len(var_list) - 1)]
comb_list = [var_list[i] + constants.DEFAULT_VARIANT_SEP + var_list[i + 1] for i in range(len(var_list) - 1)]
result = Counter(comb_list) # count number of occurrence of each element
df = pd.DataFrame.from_dict(dict(result), orient='index', columns=['freq'])
df = df.reset_index().rename(columns={'index': 'direct_suc'})
Expand Down
6 changes: 3 additions & 3 deletions pm4py/algo/enhancement/roles/common/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def aggregate_roles_iteration(roles, parameters=None):
for j in range(i + 1, len(roles)):
sim.append((i, j, roles[i][0], roles[j][0], -find_role_similarity(roles, i, j, parameters=parameters)))

sim = sorted(sim, key=lambda x: (x[-1], ",".join(x[-3]), ",".join(x[-2])))
sim = sorted(sim, key=lambda x: (x[-1], constants.DEFAULT_VARIANT_SEP.join(x[-3]), constants.DEFAULT_VARIANT_SEP.join(x[-2])))

found_feasible = False

Expand All @@ -200,7 +200,7 @@ def aggregate_roles_iteration(roles, parameters=None):

roles.append([total_set_act, total_set_res])

roles = sorted(roles, key=lambda x: ",".join(x[0]))
roles = sorted(roles, key=lambda x: constants.DEFAULT_VARIANT_SEP.join(x[0]))

found_feasible = True

Expand Down Expand Up @@ -265,7 +265,7 @@ def get_initial_roles(res_act_couples, parameters=None):
for act in roles0:
roles.append([[act], roles0[act]])

roles = sorted(roles, key=lambda x: ",".join(x[0]))
roles = sorted(roles, key=lambda x: constants.DEFAULT_VARIANT_SEP.join(x[0]))

roles = aggregate_roles_algorithm(roles, parameters=parameters)

Expand Down
6 changes: 4 additions & 2 deletions pm4py/algo/evaluation/precision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pm4py.objects.petri_net.utils.petri_utils import decorate_places_preset_trans, decorate_transitions_prepostset
from pm4py.objects.petri_net.utils import align_utils as utils
from pm4py.objects.petri_net.utils.incidence_matrix import construct
from pm4py.util import constants


def __search(sync_net, ini, fin, stop, cost_function, skip):
decorate_transitions_prepostset(sync_net)
Expand Down Expand Up @@ -100,7 +102,7 @@ def get_log_prefixes(log, activity_key=xes_util.DEFAULT_NAME_KEY):
for trace in log:
for i in range(1, len(trace)):
red_trace = trace[0:i]
prefix = ",".join([x[activity_key] for x in red_trace])
prefix = constants.DEFAULT_VARIANT_SEP.join([x[activity_key] for x in red_trace])
next_activity = trace[i][activity_key]
if prefix not in prefixes:
prefixes[prefix] = set()
Expand All @@ -123,7 +125,7 @@ def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY):
fake_log = EventLog()
for prefix in prefixes_keys:
trace = Trace()
prefix_activities = prefix.split(",")
prefix_activities = prefix.split(constants.DEFAULT_VARIANT_SEP)
for activity in prefix_activities:
event = Event()
event[activity_key] = activity
Expand Down
8 changes: 4 additions & 4 deletions pm4py/algo/filtering/log/paths/paths_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pm4py.objects.log.obj import EventLog, Trace
from pm4py.util import exec_utils
from pm4py.util import xes_constants as xes
from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY, PARAMETER_CONSTANT_TIMESTAMP_KEY
from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY, PARAMETER_CONSTANT_TIMESTAMP_KEY, DEFAULT_VARIANT_SEP
import deprecation
import sys

Expand Down Expand Up @@ -127,7 +127,7 @@ def get_paths_from_log(log, attribute_key="concept:name"):
for trace in log:
for i in range(0, len(trace) - 1):
if attribute_key in trace[i] and attribute_key in trace[i + 1]:
path = trace[i][attribute_key] + "," + trace[i + 1][attribute_key]
path = trace[i][attribute_key] + DEFAULT_VARIANT_SEP + trace[i + 1][attribute_key]
if path not in paths:
paths[path] = 0
paths[path] = paths[path] + 1
Expand Down Expand Up @@ -209,7 +209,7 @@ def filter_log_by_paths(log, paths, variants, vc, threshold, attribute_key="conc
fvft = variants[vc[0][0]][0]
fvp = set()
for i in range(0, len(fvft) - 1):
path = fvft[i][attribute_key] + "," + fvft[i + 1][attribute_key]
path = fvft[i][attribute_key] + DEFAULT_VARIANT_SEP + fvft[i + 1][attribute_key]
fvp.add(path)
for trace in log:
new_trace = Trace()
Expand All @@ -221,7 +221,7 @@ def filter_log_by_paths(log, paths, variants, vc, threshold, attribute_key="conc
if j >= len(trace):
break
if attribute_key in trace[j] and attribute_key in trace[j + 1]:
path = trace[j][attribute_key] + "," + trace[j + 1][attribute_key]
path = trace[j][attribute_key] + DEFAULT_VARIANT_SEP + trace[j + 1][attribute_key]
if path in paths:
if path in fvp or paths[path] >= threshold:
new_trace.append(trace[j])
Expand Down
9 changes: 5 additions & 4 deletions pm4py/algo/filtering/pandas/paths/paths_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY
from pm4py.util.constants import PARAMETER_CONSTANT_CASEID_KEY
from pm4py.util.constants import PARAMETER_CONSTANT_TIMESTAMP_KEY
from pm4py.util.constants import DEFAULT_VARIANT_SEP
from enum import Enum
from pm4py.util import exec_utils
from copy import copy
Expand Down Expand Up @@ -52,14 +53,14 @@ def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[D
attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
paths = [path[0] + "," + path[1] for path in paths]
paths = [path[0] + DEFAULT_VARIANT_SEP + path[1] for path in paths]
df = df.sort_values([case_id_glue, timestamp_key])
filt_df = df[[case_id_glue, attribute_key]]
filt_dif_shifted = filt_df.shift(-1)
filt_dif_shifted.columns = [str(col) + '_2' for col in filt_dif_shifted.columns]
stacked_df = pd.concat([filt_df, filt_dif_shifted], axis=1)
stacked_df = stacked_df[stacked_df[case_id_glue] == stacked_df[case_id_glue + '_2']]
stacked_df["@@path"] = stacked_df[attribute_key] + "," + stacked_df[attribute_key + "_2"]
stacked_df["@@path"] = stacked_df[attribute_key] + DEFAULT_VARIANT_SEP + stacked_df[attribute_key + "_2"]
stacked_df = stacked_df[stacked_df["@@path"].isin(paths)]
i1 = df.set_index(case_id_glue).index
i2 = stacked_df.set_index(case_id_glue).index
Expand Down Expand Up @@ -104,15 +105,15 @@ def apply_performance(df: pd.DataFrame, provided_path: Tuple[str, str], paramete
attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
provided_path = provided_path[0] + "," + provided_path[1]
provided_path = provided_path[0] + DEFAULT_VARIANT_SEP + provided_path[1]
min_performance = exec_utils.get_param_value(Parameters.MIN_PERFORMANCE, parameters, 0)
max_performance = exec_utils.get_param_value(Parameters.MAX_PERFORMANCE, parameters, sys.maxsize)
df = df.sort_values([case_id_glue, timestamp_key])
filt_df = df[[case_id_glue, attribute_key, timestamp_key]]
filt_dif_shifted = filt_df.shift(-1)
filt_dif_shifted.columns = [str(col) + '_2' for col in filt_dif_shifted.columns]
stacked_df = pd.concat([filt_df, filt_dif_shifted], axis=1)
stacked_df["@@path"] = stacked_df[attribute_key] + "," + stacked_df[attribute_key + "_2"]
stacked_df["@@path"] = stacked_df[attribute_key] + DEFAULT_VARIANT_SEP + stacked_df[attribute_key + "_2"]
stacked_df = stacked_df[stacked_df["@@path"] == provided_path]
stacked_df["@@timedelta"] = (stacked_df[timestamp_key + "_2"] - stacked_df[timestamp_key]).astype('timedelta64[s]')
stacked_df = stacked_df[stacked_df["@@timedelta"] >= min_performance]
Expand Down
6 changes: 3 additions & 3 deletions pm4py/algo/organizational_mining/roles/common/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def aggregate_roles_iteration(roles, parameters=None):
for j in range(i + 1, len(roles)):
sim.append((i, j, roles[i][0], roles[j][0], -find_role_similarity(roles, i, j, parameters=parameters)))

sim = sorted(sim, key=lambda x: (x[-1], ",".join(x[-3]), ",".join(x[-2])))
sim = sorted(sim, key=lambda x: (x[-1], constants.DEFAULT_VARIANT_SEP.join(x[-3]), constants.DEFAULT_VARIANT_SEP.join(x[-2])))

found_feasible = False

Expand All @@ -200,7 +200,7 @@ def aggregate_roles_iteration(roles, parameters=None):

roles.append([total_set_act, total_set_res])

roles = sorted(roles, key=lambda x: ",".join(x[0]))
roles = sorted(roles, key=lambda x: constants.DEFAULT_VARIANT_SEP.join(x[0]))

found_feasible = True

Expand Down Expand Up @@ -265,7 +265,7 @@ def get_initial_roles(res_act_couples, parameters=None):
for act in roles0:
roles.append([[act], roles0[act]])

roles = sorted(roles, key=lambda x: ",".join(x[0]))
roles = sorted(roles, key=lambda x: constants.DEFAULT_VARIANT_SEP.join(x[0]))

roles = aggregate_roles_algorithm(roles, parameters=parameters)

Expand Down
2 changes: 1 addition & 1 deletion pm4py/algo/simulation/playout/dfg/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def apply(dfg: Dict[Tuple[str, str], int], start_activities: Dict[str, int], end
# returns the variants instead of the log
variants = []
for p, tr in final_traces:
variants.append({"variant": ",".join(tr), "count": math.ceil(-p * max_no_variants)})
variants.append({"variant": constants.DEFAULT_VARIANT_SEP.join(tr), "count": math.ceil(-p * max_no_variants)})
return variants
else:
event_log = EventLog()
Expand Down
4 changes: 2 additions & 2 deletions pm4py/algo/transformation/log_to_trie/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pm4py.objects.trie.obj import Trie
from pm4py.statistics.variants.log import get as get_variants
from typing import Optional, Dict, Any, Union, Tuple
from pm4py.util import variants_util
from pm4py.util import variants_util, constants


class Parameters(Enum):
Expand All @@ -17,7 +17,7 @@ def apply(log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]]
root = Trie()
variants = get_variants.get_variants(log, parameters=parameters)
if variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.STRING:
variants = list(map(lambda v: v.split(','), variants))
variants = list(map(lambda v: v.split(constants.DEFAULT_VARIANT_SEP), variants))
else:
variants = list(variants)

Expand Down
6 changes: 3 additions & 3 deletions pm4py/evaluation/precision/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import Counter
from pm4py.objects.log.obj import EventLog, Event, Trace
from pm4py.util import xes_constants as xes_util
from pm4py.util import xes_constants as xes_util, constants
import heapq
from pm4py.objects.petri_net.utils.petri_utils import decorate_places_preset_trans, decorate_transitions_prepostset
from pm4py.objects.petri_net.utils import align_utils as utils
Expand Down Expand Up @@ -100,7 +100,7 @@ def get_log_prefixes(log, activity_key=xes_util.DEFAULT_NAME_KEY):
for trace in log:
for i in range(1, len(trace)):
red_trace = trace[0:i]
prefix = ",".join([x[activity_key] for x in red_trace])
prefix = constants.DEFAULT_VARIANT_SEP.join([x[activity_key] for x in red_trace])
next_activity = trace[i][activity_key]
if prefix not in prefixes:
prefixes[prefix] = set()
Expand All @@ -123,7 +123,7 @@ def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY):
fake_log = EventLog()
for prefix in prefixes_keys:
trace = Trace()
prefix_activities = prefix.split(",")
prefix_activities = prefix.split(constants.DEFAULT_VARIANT_SEP)
for activity in prefix_activities:
event = Event()
event[activity_key] = activity
Expand Down
2 changes: 1 addition & 1 deletion pm4py/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def filter_variants(log: Union[EventLog, pd.DataFrame], variants: Union[Set[str
from pm4py.util import variants_util
parameters = get_properties(log)
if variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.STRING:
variants = [",".join(v) for v in variants]
variants = [constants.DEFAULT_VARIANT_SEP.join(v) for v in variants]
if check_is_pandas_dataframe(log):
check_pandas_dataframe_columns(log)
from pm4py.algo.filtering.pandas.variants import variants_filter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def apply(df, parameters=None):
pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
log = EventLog()
for vd in variant_stats:
variant = vd['variant'].split(",")
variant = vd['variant'].split(pm4_constants.DEFAULT_VARIANT_SEP)
trace = Trace()
for activity in variant:
event = Event()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def apply(df, parameters=None):
log = EventLog()
all_variants_log = {}
for vd in variant_stats:
variant = vd['variant'].split(",")
variant = vd['variant'].split(pm4_constants.DEFAULT_VARIANT_SEP)
variant_count = vd[case_glue]
trace = Trace()
for activity in variant:
Expand Down
2 changes: 1 addition & 1 deletion pm4py/objects/log/util/pandas_numpy_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def apply(dataframe: pd.DataFrame, parameters=None):
variants[acts] += 1

if variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.STRING:
variants = {",".join(x): y for x, y in variants.items()}
variants = {constants.DEFAULT_VARIANT_SEP.join(x): y for x, y in variants.items()}
else:
variants = {x: y for x, y in variants.items()}

Expand Down
2 changes: 1 addition & 1 deletion pm4py/objects/log/util/prefix_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def get_prefix_matrix_from_var_str(var_str, activities, parameters=None):
skip_last = parameters[SKIP_LAST] if SKIP_LAST in parameters else False
prefix_mat = []
this_prefix_repr = [0] * len(activities)
variant = var_str.split(",")
variant = var_str.split(constants.DEFAULT_VARIANT_SEP)
for index, act in enumerate(variant):
if skip_last and index == len(variant) - 1:
break
Expand Down
4 changes: 2 additions & 2 deletions pm4py/statistics/traces/generic/pandas/case_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def get_variants_df(df, parameters=None):
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)

if variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.STRING:
new_df = df.groupby(case_id_glue)[activity_key].agg(lambda col: ",".join(pd.Series.to_list(col))).to_frame()
new_df = df.groupby(case_id_glue)[activity_key].agg(lambda col: constants.DEFAULT_VARIANT_SEP.join(pd.Series.to_list(col))).to_frame()
elif variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.LIST:
new_df = df.groupby(case_id_glue)[activity_key].agg(lambda col: tuple(pd.Series.to_list(col))).to_frame()

Expand Down Expand Up @@ -219,7 +219,7 @@ def get_variants_df_with_case_duration(df, parameters=None):

df1 = None
if variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.STRING:
df1 = grouped_df[activity_key].agg(lambda col: ",".join(pd.Series.to_list(col))).to_frame()
df1 = grouped_df[activity_key].agg(lambda col: constants.DEFAULT_VARIANT_SEP.join(pd.Series.to_list(col))).to_frame()
elif variants_util.VARIANT_SPECIFICATION == variants_util.VariantsSpecifications.LIST:
df1 = grouped_df[activity_key].agg(lambda col: tuple(pd.Series.to_list(col))).to_frame()
new_cols = list(df1.columns)
Expand Down
2 changes: 1 addition & 1 deletion pm4py/util/variants_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ def get_variant_from_trace(trace, parameters=None):
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

if VARIANT_SPECIFICATION == VariantsSpecifications.STRING:
return ",".join([x[activity_key] for x in trace])
return constants.DEFAULT_VARIANT_SEP.join([x[activity_key] for x in trace])
elif VARIANT_SPECIFICATION == VariantsSpecifications.LIST:
return tuple([x[activity_key] for x in trace])

0 comments on commit fcc4eeb

Please sign in to comment.