From 7bc3730b8b669a24b837d2a9723c9d77ad9612dd Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 07:26:00 +0100 Subject: [PATCH 01/21] fix(pm4py): added examples and missing imports added examples and missing imports --- examples/execute_everything.py | 48 +++++++++++++++++++++++++++++ examples/streaming_discovery_dfg.py | 8 +++-- pm4py/objects/dfg/utils/__init__.py | 2 +- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/examples/execute_everything.py b/examples/execute_everything.py index 5ea982fe0..942b0947f 100644 --- a/examples/execute_everything.py +++ b/examples/execute_everything.py @@ -256,6 +256,54 @@ def visualization_align_table(): visualization_align_table.execute_script() +def streaming_conformance_footprints(): + from examples import streaming_conformance_footprints + print("\n\nstreaming_conformance_footprints") + streaming_conformance_footprints.execute_script() + + +def streaming_conformance_tbr(): + from examples import streaming_conformance_tbr + print("\n\nstreaming_conformacne_tbr") + streaming_conformance_tbr.execute_script() + + +def streaming_csv_reader_event_stream(): + from examples import streaming_csv_reader_event_stream + print("\n\nstreaming_csv_reader_event_stream") + streaming_csv_reader_event_stream.execute_script() + + +def streaming_discovery_dfg(): + from examples import streaming_discovery_dfg + print("\n\nstreaming_discovery_dfg") + streaming_discovery_dfg.execute_script() + + +def streaming_xes_reader_event_stream(): + from examples import streaming_xes_reader_event_stream + print("\n\nstreaming_xes_reader_event_stream") + streaming_xes_reader_event_stream.execute_script() + + +def streaming_xes_reader_trace_stream(): + from examples import streaming_xes_reader_trace_stream + print("\n\nstreaming_xes_reader_trace_stream") + streaming_xes_reader_trace_stream.execute_script() + + +def dfg_align_log(): + from examples import dfg_align_log + print("\n\ndfg_align_log") + dfg_align_log.execute_script() + + +def dfg_playout(): + from examples import dfg_playout + print("\n\ndfg_playout") + dfg_playout.execute_script() + + def execute_script(f): try: f() diff --git a/examples/streaming_discovery_dfg.py b/examples/streaming_discovery_dfg.py index 8783e0af7..fc0058ab4 100644 --- a/examples/streaming_discovery_dfg.py +++ b/examples/streaming_discovery_dfg.py @@ -1,8 +1,9 @@ +import os + import pm4py -from pm4py.streaming.stream.live_event_stream import LiveEventStream from pm4py.streaming.algo.discovery.dfg import algorithm as dfg_discovery +from pm4py.streaming.stream.live_event_stream import LiveEventStream from pm4py.visualization.dfg import visualizer as dfg_visualizer -import os, time def execute_script(): @@ -28,7 +29,8 @@ def execute_script(): dfg, activities, start_activities, end_activities = stream_dfg_disc.get() # visualize the DFG gviz = dfg_visualizer.apply(dfg, variant=dfg_visualizer.Variants.FREQUENCY, activities_count=activities, - parameters={"start_activities": start_activities, "end_activities": end_activities}) + parameters={"format": "svg", "start_activities": start_activities, + "end_activities": end_activities}) dfg_visualizer.view(gviz) diff --git a/pm4py/objects/dfg/utils/__init__.py b/pm4py/objects/dfg/utils/__init__.py index b4b7a0887..0993a73ab 100644 --- a/pm4py/objects/dfg/utils/__init__.py +++ b/pm4py/objects/dfg/utils/__init__.py @@ -1 +1 @@ -from pm4py.objects.dfg.utils import dfg_utils +from pm4py.objects.dfg.utils import dfg_utils, dfg_alignment, dfg_playout From a09b85ca6eb09128b9dfda4ccbfdfccd5849cae6 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 07:34:43 +0100 Subject: [PATCH 02/21] fix(pm4py): fixed examples paths fixed examples paths --- examples/dfg_playout.py | 3 ++- examples/powerbi/dfg_visualization_freq.py | 5 ++++- examples/powerbi/dfg_visualization_perf.py | 5 ++++- examples/powerbi/graph_case_duration.py | 5 ++++- examples/powerbi/graph_events_over_time.py | 5 ++++- examples/powerbi/heuristics_net.py | 5 ++++- examples/powerbi/inductive_miner.py | 5 ++++- examples/powerbi/process_tree.py | 5 ++++- 8 files changed, 30 insertions(+), 8 deletions(-) diff --git a/examples/dfg_playout.py b/examples/dfg_playout.py index 4a1276455..5974539de 100644 --- a/examples/dfg_playout.py +++ b/examples/dfg_playout.py @@ -1,10 +1,11 @@ import pm4py from pm4py.objects.dfg.filtering import dfg_filtering from pm4py.objects.dfg.utils import dfg_playout +import os def execute_script(): - log = pm4py.read_xes("C:/receipt.xes") + log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes")) activities = pm4py.get_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) # filters the DFG to make a simpler one diff --git a/examples/powerbi/dfg_visualization_freq.py b/examples/powerbi/dfg_visualization_freq.py index 730476568..5ff68a45c 100644 --- a/examples/powerbi/dfg_visualization_freq.py +++ b/examples/powerbi/dfg_visualization_freq.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/dfg_visualization_perf.py b/examples/powerbi/dfg_visualization_perf.py index 0e87150e6..db4659fb1 100644 --- a/examples/powerbi/dfg_visualization_perf.py +++ b/examples/powerbi/dfg_visualization_perf.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/graph_case_duration.py b/examples/powerbi/graph_case_duration.py index bb00760b0..2b76401f2 100644 --- a/examples/powerbi/graph_case_duration.py +++ b/examples/powerbi/graph_case_duration.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/graph_events_over_time.py b/examples/powerbi/graph_events_over_time.py index 3eefac7af..ff0d69ae9 100644 --- a/examples/powerbi/graph_events_over_time.py +++ b/examples/powerbi/graph_events_over_time.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/heuristics_net.py b/examples/powerbi/heuristics_net.py index de3692f07..1d8ebb98f 100644 --- a/examples/powerbi/heuristics_net.py +++ b/examples/powerbi/heuristics_net.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/inductive_miner.py b/examples/powerbi/inductive_miner.py index 8a95ce2c5..03636a40c 100644 --- a/examples/powerbi/inductive_miner.py +++ b/examples/powerbi/inductive_miner.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd diff --git a/examples/powerbi/process_tree.py b/examples/powerbi/process_tree.py index 2c294c742..50d09616f 100644 --- a/examples/powerbi/process_tree.py +++ b/examples/powerbi/process_tree.py @@ -1,9 +1,12 @@ if True: # ignore this part in true PowerBI executions + import os import pandas as pd from pm4py.objects.log.util import dataframe_utils - dataset = pd.read_csv("C:/running-example.csv") + log_path = os.path.join("..", "..", "tests", "input_data", "running-example.csv") + + dataset = pd.read_csv(log_path) dataset = dataframe_utils.convert_timestamp_columns_in_df(dataset) import pandas as pd From 98c2a566c2477b481bb8b2a478b42c86d44dea31 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 07:37:32 +0100 Subject: [PATCH 03/21] fix(pm4py): added missing example added missing example --- examples/execute_everything.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/execute_everything.py b/examples/execute_everything.py index 942b0947f..d70b2f1e2 100644 --- a/examples/execute_everything.py +++ b/examples/execute_everything.py @@ -262,6 +262,12 @@ def streaming_conformance_footprints(): streaming_conformance_footprints.execute_script() +def align_approx_pt(): + from examples import align_approx_pt + print("\n\nalign_approx_pt") + align_approx_pt.execute_script() + + def streaming_conformance_tbr(): from examples import streaming_conformance_tbr print("\n\nstreaming_conformacne_tbr") From 8c982eca54e168e62802c37964eff0a6117ed8fb Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 07:43:58 +0100 Subject: [PATCH 04/21] fix(pm4py): revised test revised test --- tests/other_tests.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/other_tests.py b/tests/other_tests.py index cb36ffa2e..6d25ebb71 100644 --- a/tests/other_tests.py +++ b/tests/other_tests.py @@ -198,6 +198,24 @@ def test_efg_pandas(self): from pm4py.statistics.eventually_follows.pandas import get efg = get.apply(dataframe, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"}) + def test_dfg_playout(self): + import pm4py + from pm4py.objects.dfg.utils import dfg_playout + log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) + dfg, sa, ea = pm4py.discover_dfg(log) + dfg_playout.apply(dfg, sa, ea) + + def test_dfg_align(self): + import pm4py + from pm4py.objects.dfg.filtering import dfg_filtering + from pm4py.objects.dfg.utils import dfg_alignment + log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) + dfg, sa, ea = pm4py.discover_dfg(log) + act_count = pm4py.get_attribute_values(log, "concept:name") + dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_activities_percentage(dfg, sa, ea, act_count, 0.5) + dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage(dfg, sa, ea, act_count, 0.5) + aligned_traces = dfg_alignment.apply(log, dfg, sa, ea) + if __name__ == "__main__": unittest.main() From 33f6b33e9f97f741276b7fd2e7ef47cfb5d86465 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 08:42:52 +0100 Subject: [PATCH 05/21] fix(pm4py): added missing examples added missing examples --- examples/execute_everything.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/examples/execute_everything.py b/examples/execute_everything.py index d70b2f1e2..8c5a6f85b 100644 --- a/examples/execute_everything.py +++ b/examples/execute_everything.py @@ -256,18 +256,18 @@ def visualization_align_table(): visualization_align_table.execute_script() -def streaming_conformance_footprints(): - from examples import streaming_conformance_footprints - print("\n\nstreaming_conformance_footprints") - streaming_conformance_footprints.execute_script() - - def align_approx_pt(): from examples import align_approx_pt print("\n\nalign_approx_pt") align_approx_pt.execute_script() +def streaming_conformance_footprints(): + from examples import streaming_conformance_footprints + print("\n\nstreaming_conformance_footprints") + streaming_conformance_footprints.execute_script() + + def streaming_conformance_tbr(): from examples import streaming_conformance_tbr print("\n\nstreaming_conformacne_tbr") @@ -361,3 +361,10 @@ def execute_script(f): execute_script(visualization_align_table) execute_script(pn_to_pt) execute_script(simplified_interface) + execute_script(align_approx_pt) + execute_script(streaming_conformance_footprints) + execute_script(streaming_conformance_tbr) + execute_script(streaming_csv_reader_event_stream) + execute_script(streaming_discovery_dfg) + execute_script(streaming_xes_reader_event_stream) + execute_script(streaming_xes_reader_trace_stream) From 2754e46e53cf4bd7c87739052caaf39ee4992856 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 5 Jan 2021 09:48:20 +0100 Subject: [PATCH 06/21] fix(pm4py): bug fix of return bug fix of return --- pm4py/objects/dfg/filtering/dfg_filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pm4py/objects/dfg/filtering/dfg_filtering.py b/pm4py/objects/dfg/filtering/dfg_filtering.py index 7617d8d3d..3777c52f4 100644 --- a/pm4py/objects/dfg/filtering/dfg_filtering.py +++ b/pm4py/objects/dfg/filtering/dfg_filtering.py @@ -249,7 +249,7 @@ def filter_dfg_on_paths_percentage(dfg0, start_activities0, end_activities0, act # make sure that the DFG contains only edges between these activities dfg = {x: y for x, y in dfg.items() if x[0] in activities_count and x[1] in activities_count} - return dfg, start_activities, end_activities, activities_count + return dfg, start_activities, end_activities, activities_count def clean_dfg_based_on_noise_thresh(dfg, activities, noise_threshold, parameters=None): From 96d21b04bde4918a194d5235c8714b5eab305376 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Thu, 7 Jan 2021 14:44:16 +0100 Subject: [PATCH 07/21] fix(pm4py): fixed start and end activities filters fixed start and end activities filters --- .../end_activities/end_activities_filter.py | 8 ++++++- .../start_activities_filter.py | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pm4py/algo/filtering/log/end_activities/end_activities_filter.py b/pm4py/algo/filtering/log/end_activities/end_activities_filter.py index 445b4c698..7c2807f2e 100644 --- a/pm4py/algo/filtering/log/end_activities/end_activities_filter.py +++ b/pm4py/algo/filtering/log/end_activities/end_activities_filter.py @@ -12,6 +12,7 @@ class Parameters(Enum): ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY DECREASING_FACTOR = "decreasingFactor" + POSITIVE = "positive" def apply(log, admitted_end_activities, parameters=None): @@ -35,8 +36,13 @@ def apply(log, admitted_end_activities, parameters=None): if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) + positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) - filtered_log = [trace for trace in log if trace and trace[-1][attribute_key] in admitted_end_activities] + if positive: + filtered_log = [trace for trace in log if trace and trace[-1][attribute_key] in admitted_end_activities] + else: + filtered_log = [trace for trace in log if trace and trace[-1][attribute_key] not in admitted_end_activities] + return EventLog(filtered_log) diff --git a/pm4py/algo/filtering/log/start_activities/start_activities_filter.py b/pm4py/algo/filtering/log/start_activities/start_activities_filter.py index 3dc7075a7..5894cdb66 100644 --- a/pm4py/algo/filtering/log/start_activities/start_activities_filter.py +++ b/pm4py/algo/filtering/log/start_activities/start_activities_filter.py @@ -1,18 +1,20 @@ +from enum import Enum + from pm4py.algo.filtering.common.filtering_constants import DECREASING_FACTOR -from pm4py.statistics.start_activities.common import get as start_activities_common -from pm4py.statistics.start_activities.log.get import get_start_activities from pm4py.algo.filtering.log.variants import variants_filter from pm4py.objects.log.log import EventLog -from pm4py.util.xes_constants import DEFAULT_NAME_KEY +from pm4py.statistics.start_activities.common import get as start_activities_common +from pm4py.statistics.start_activities.log.get import get_start_activities from pm4py.util import constants -from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY -from enum import Enum from pm4py.util import exec_utils +from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY +from pm4py.util.xes_constants import DEFAULT_NAME_KEY class Parameters(Enum): ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY DECREASING_FACTOR = "decreasingFactor" + POSITIVE = "positive" def apply(log, admitted_start_activities, parameters=None): @@ -36,8 +38,13 @@ def apply(log, admitted_start_activities, parameters=None): if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY) - - filtered_log = EventLog([trace for trace in log if trace and trace[0][attribute_key] in admitted_start_activities]) + positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) + if positive: + filtered_log = EventLog( + [trace for trace in log if trace and trace[0][attribute_key] in admitted_start_activities]) + else: + filtered_log = EventLog( + [trace for trace in log if trace and trace[0][attribute_key] not in admitted_start_activities]) return filtered_log From 9d9469e00883a322a680002e021c482a7a45ef65 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Thu, 7 Jan 2021 15:06:09 +0100 Subject: [PATCH 08/21] changed variants filtering in the example --- examples/simplified_interface.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index 502db10bb..d8e136d53 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -1,6 +1,7 @@ -import pm4py import os +import pm4py + def execute_script(): ENABLE_VISUALIZATION = False @@ -98,9 +99,11 @@ def execute_script(): print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) print("variants filter log = ", - len(pm4py.filter_variants(log2, ["register request,examine thoroughly,check ticket,decide,reject request"]))) + len(pm4py.filter_variants(log2, [ + ["register request", "examine thoroughly", "check ticket", "decide", "reject request"]]))) print("variants filter df = ", - len(pm4py.filter_variants(df2, ["register request,examine thoroughly,check ticket,decide,reject request"]))) + len(pm4py.filter_variants(df2, [ + ["register request", "examine thoroughly", "check ticket", "decide", "reject request"]]))) print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, percentage=0.8))) print("paths filter log len = ", len(pm4py.filter_paths(log2, [("register request", "examine casually")]))) From 9acb6eba37f0aff8c971d53c24103f04b3fbe515 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 15:30:55 +0100 Subject: [PATCH 09/21] merge conflict resolved --- examples/simplified_interface.py | 26 +++--- pm4py/__init__.py | 2 +- pm4py/filtering.py | 136 +++++++++++++++++++------------ 3 files changed, 98 insertions(+), 66 deletions(-) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index d8e136d53..747e71347 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -86,15 +86,15 @@ def execute_script(): print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print("attributes org:resource len(filt_log) (cases) cases = ", - len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], how="cases"))) + len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="cases"))) print("attributes org:resource len(filt_log) (cases) events = ", - len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], how="events"))) + len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="events"))) print("attributes org:resource len(filt_df) (events) cases = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], how="cases"))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="cases"))) print("attributes org:resource len(filt_df) (events) events = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], how="events"))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="events"))) print("attributes org:resource len(filt_df) (events) events notpositive = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], how="events", positive=False))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="events", retain=False))) print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) @@ -106,21 +106,21 @@ def execute_script(): ["register request", "examine thoroughly", "check ticket", "decide", "reject request"]]))) print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, percentage=0.8))) - print("paths filter log len = ", len(pm4py.filter_paths(log2, [("register request", "examine casually")]))) - print("paths filter dataframe len = ", len(pm4py.filter_paths(df2, [("register request", "examine casually")]))) + print("paths filter log len = ", len(pm4py.filter_directly_follows_relation(log2, [("register request", "examine casually")]))) + print("paths filter dataframe len = ", len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) print("timeframe filter log events len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="events"))) + len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter log traces_contained len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="traces_contained"))) + len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter log traces_intersecting len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="traces_intersecting"))) + len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print("timeframe filter df events len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="events"))) + len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter df traces_contained len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="traces_contained"))) + len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter df traces_intersecting len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", how="traces_intersecting"))) + len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") diff --git a/pm4py/__init__.py b/pm4py/__init__.py index 138cffae9..9a725f7ae 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -73,7 +73,7 @@ save_vis_process_tree, \ view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \ - filter_variants_percentage, filter_paths, filter_timestamp, filter_trace_attribute + filter_variants_percentage, filter_directly_follows_relation, filter_timestamp, filter_trace_attribute from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \ get_trace_attributes from pm4py.convert import convert_to_event_log, convert_to_event_stream, convert_to_dataframe, convert_to_bpmn, \ diff --git a/pm4py/filtering.py b/pm4py/filtering.py index bb7648a54..ec6493681 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -1,8 +1,11 @@ +import deprecation + +from pm4py import VERSION as PM4PY_CURRENT_VERSION from pm4py.util import constants from pm4py.util.pandas_utils import check_is_dataframe, check_dataframe_columns -def filter_start_activities(log, admitted_start_activities): +def filter_start_activities(log, activities, retain=True): """ Filter cases having a start activity in the provided list @@ -10,8 +13,11 @@ def filter_start_activities(log, admitted_start_activities): -------------- log Log object - admitted_start_activities - List of admitted start activities + activities + List start activities + retain + if True, we retain the traces containing the given activities, if false, we drop the traces + Returns -------------- @@ -21,13 +27,15 @@ def filter_start_activities(log, admitted_start_activities): if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.start_activities import start_activities_filter - return start_activities_filter.apply(log, admitted_start_activities) + return start_activities_filter.apply(log, activities, + parameters={start_activities_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.start_activities import start_activities_filter - return start_activities_filter.apply(log, admitted_start_activities) + return start_activities_filter.apply(log, activities, + parameters={start_activities_filter.Parameters.POSITIVE: retain}) -def filter_end_activities(log, admitted_end_activities): +def filter_end_activities(log, activities, retain=True): """ Filter cases having an end activity in the provided list @@ -35,8 +43,11 @@ def filter_end_activities(log, admitted_end_activities): --------------- log Log object - admitted_end_activities + activities List of admitted end activities + retain + if True, we retain the traces containing the given activities, if false, we drop the traces + Returns --------------- @@ -46,13 +57,15 @@ def filter_end_activities(log, admitted_end_activities): if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.end_activities import end_activities_filter - return end_activities_filter.apply(log, admitted_end_activities) + return end_activities_filter.apply(log, activities, + parameters={end_activities_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.end_activities import end_activities_filter - return end_activities_filter.apply(log, admitted_end_activities) + return end_activities_filter.apply(log, activities, + parameters={end_activities_filter.Parameters.POSITIVE: retain}) -def filter_attribute_values(log, attribute, values, how="cases", positive=True): +def filter_attribute_values(log, attribute, values, level="case", retain=True): """ Filter a log object on the values of some attribute @@ -64,10 +77,10 @@ def filter_attribute_values(log, attribute, values, how="cases", positive=True): Attribute to filter values Admitted (or forbidden) values - how - Specifies how the filter should be applied (cases filters the cases where at least one occurrence happens, - events filter the events eventually trimming the cases) - positive + level + Specifies how the filter should be applied ('case' filters the cases where at least one occurrence happens, + 'event' filter the events eventually trimming the cases) + retain Specified if the values should be kept or removed Returns @@ -78,25 +91,25 @@ def filter_attribute_values(log, attribute, values, how="cases", positive=True): if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter - if how == "events": + if level == "event": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, - attributes_filter.Parameters.POSITIVE: positive}) - elif how == "cases": + attributes_filter.Parameters.POSITIVE: retain}) + elif level == "case": return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter - if how == "events": + if level == "event": return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, - attributes_filter.Parameters.POSITIVE: positive}) + attributes_filter.Parameters.POSITIVE: retain}) else: return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: retain}) -def filter_trace_attribute(log, attribute, values, positive=True): +def filter_trace_attribute(log, attribute_key, values, retain=True): """ Filter a log on the values of a trace attribute @@ -104,12 +117,12 @@ def filter_trace_attribute(log, attribute, values, positive=True): -------------- log Event log - attribute + attribute_key Attribute to filter values Values to filter (list of) - positive - Boolean value (keep/discard cases) + retain + Boolean value (keep/discard matching traces) Returns -------------- @@ -119,15 +132,16 @@ def filter_trace_attribute(log, attribute, values, positive=True): if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.attributes import attributes_filter - return attributes_filter.apply(log, values, parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: attribute, - attributes_filter.Parameters.POSITIVE: positive}) + return attributes_filter.apply(log, values, + parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: attribute_key, + attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter return attributes_filter.apply_trace_attribute(log, values, parameters={ - attributes_filter.Parameters.ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: positive}) + attributes_filter.Parameters.ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) -def filter_variants(log, admitted_variants): +def filter_variants(log, variants, retain=True): """ Filter a log on a specified set of variants @@ -135,8 +149,10 @@ def filter_variants(log, admitted_variants): --------------- log Event log - admitted_variants - List of variants to filter + variants + collection of variants to filter; A variant should be specified as a list of activity names, e.g., ['a','b','c'] + retain + boolean; if True all traces conforming to the specified variants are retained; if False, all those traces are removed Returns -------------- @@ -146,13 +162,18 @@ def filter_variants(log, admitted_variants): if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.algo.filtering.pandas.variants import variants_filter - return variants_filter.apply(log, admitted_variants) + return variants_filter.apply(log, [",".join(v) for v in variants], + parameters={variants_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.variants import variants_filter - return variants_filter.apply(log, admitted_variants) + return variants_filter.apply(log, [",".join(v) for v in variants], + parameters={variants_filter.Parameters.POSITIVE: retain}) -def filter_variants_percentage(log, percentage=0.8): +@deprecation.deprecated(deprecated_in='2.1.3.1', removed_in='2.3.0', current_version=PM4PY_CURRENT_VERSION, + details='Filtering method will be removed due to fuzzy interpretation of the threshold.\ + Will be replaced with two new functions filter_variants_top_k and filter_variants_relative_frequency') +def filter_variants_percentage(log, threshold=0.8): """ Filter a log on the percentage of variants @@ -160,8 +181,8 @@ def filter_variants_percentage(log, percentage=0.8): --------------- log Event log - percentage - Percentage of admitted variants + threshold + Percentage (scale 0.1) of admitted variants Returns -------------- @@ -173,20 +194,28 @@ def filter_variants_percentage(log, percentage=0.8): "filtering variants percentage on Pandas dataframe is currently not available! please convert the dataframe to event log with the method: log = pm4py.convert_to_event_log(df)") else: from pm4py.algo.filtering.log.variants import variants_filter - return variants_filter.filter_log_variants_percentage(log, percentage=percentage) + return variants_filter.filter_log_variants_percentage(log, percentage=threshold) + + +@deprecation.deprecated(deprecated_in='2.1.3.1', removed_in='2.3.0', current_version=PM4PY_CURRENT_VERSION, + details='Use filter_directly_follows_relation') +def filter_paths(log, allowed_paths, retain=True): + return filter_directly_follows_relation(log, allowed_paths, retain) -def filter_paths(log, allowed_paths, positive=True): +def filter_directly_follows_relation(log, relations, retain=True): """ - Filter a log on a specified list of paths + Retain traces that contain any of the specified 'directly follows' relations. + For example, if relations == [('a','b'),('a','c')] and log [,,] + the resulting log will contain traces describing [,]. Parameters --------------- log Log object - allowed_paths - Allowed/forbidden paths - positive + relations + List of activity name pairs, which are allowed/forbidden paths + retain Parameter that says whether the paths should be kept/removed @@ -197,13 +226,13 @@ def filter_paths(log, allowed_paths, positive=True): """ if check_is_dataframe(log): from pm4py.algo.filtering.pandas.paths import paths_filter - return paths_filter.apply(log, allowed_paths, parameters={paths_filter.Parameters.POSITIVE: positive}) + return paths_filter.apply(log, relations, parameters={paths_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.paths import paths_filter - return paths_filter.apply(log, allowed_paths, parameters={paths_filter.Parameters.POSITIVE: positive}) + return paths_filter.apply(log, relations, parameters={paths_filter.Parameters.POSITIVE: retain}) -def filter_timestamp(log, dt1, dt2, how="events"): +def filter_timestamp(log, dt1, dt2, mode="events"): """ Filter a log on a time interval @@ -215,8 +244,11 @@ def filter_timestamp(log, dt1, dt2, how="events"): Left extreme of the interval dt2 Right extreme of the interval - how + mode Modality of filtering (events, traces_contained, traces_intersecting) + events: any event that fits the time frame is retained + traces_contained: any trace completely contained in the timeframe is retained + traces_intersecting: any trace intersecting with the time-frame is retained. Returns ---------------- @@ -225,17 +257,17 @@ def filter_timestamp(log, dt1, dt2, how="events"): """ if check_is_dataframe(log): from pm4py.algo.filtering.pandas.timestamp import timestamp_filter - if how == "events": + if mode == "events": return timestamp_filter.apply_events(log, dt1, dt2) - elif how == "traces_contained": + elif mode == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) - elif how == "traces_intersecting": + elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2) else: from pm4py.algo.filtering.log.timestamp import timestamp_filter - if how == "events": + if mode == "events": return timestamp_filter.apply_events(log, dt1, dt2) - elif how == "traces_contained": + elif mode == "traces_contained": return timestamp_filter.filter_traces_contained(log, dt1, dt2) - elif how == "traces_intersecting": + elif mode == "traces_intersecting": return timestamp_filter.filter_traces_intersecting(log, dt1, dt2) From 656603f41699d018cc896a015cb35ab6abcb4e09 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 15:36:58 +0100 Subject: [PATCH 10/21] backward compatibility on filter paths --- scripts/script.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 scripts/script.py diff --git a/scripts/script.py b/scripts/script.py new file mode 100644 index 000000000..7be14c328 --- /dev/null +++ b/scripts/script.py @@ -0,0 +1,7 @@ +import pm4py + + +if __name__ == "__main__": + log = pm4py.read_xes('../tests/input_data/running-example.xes') + allowed = {'a', 'b', 'c'} + filtered = pm4py.filter_log(lambda t: t[0] in allowed, log) From 78b650b75ede8b6ce14b55b4d506a63f4fbb50e0 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Thu, 7 Jan 2021 15:45:22 +0100 Subject: [PATCH 11/21] fix(pm4py): added eventually-follows filter in simplified interface --- pm4py/__init__.py | 3 ++- pm4py/filtering.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/pm4py/__init__.py b/pm4py/__init__.py index 9a725f7ae..878dcff37 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -73,7 +73,8 @@ save_vis_process_tree, \ view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \ - filter_variants_percentage, filter_directly_follows_relation, filter_timestamp, filter_trace_attribute + filter_variants_percentage, filter_directly_follows_relation, filter_timestamp, filter_trace_attribute, \ + filter_eventually_follows from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \ get_trace_attributes from pm4py.convert import convert_to_event_log, convert_to_event_stream, convert_to_dataframe, convert_to_bpmn, \ diff --git a/pm4py/filtering.py b/pm4py/filtering.py index ec6493681..1e4b97a5e 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -232,6 +232,52 @@ def filter_directly_follows_relation(log, relations, retain=True): return paths_filter.apply(log, relations, parameters={paths_filter.Parameters.POSITIVE: retain}) +def filter_eventually_follows(log, relations, retain=True): + """ + Retain traces that contain any of the specified 'eventually follows' relations. + For example, if relations == [('a','b'),('a','c')] and log [,,] + the resulting log will contain traces describing [,,]. + + Parameters + --------------- + log + Log object + relations + List of activity name pairs, which are allowed/forbidden paths + retain + Parameter that says whether the paths + should be kept/removed + + Returns + ---------------- + filtered_log + Filtered log object + """ + if check_is_dataframe(log): + from pm4py.algo.filtering.pandas.ltl import ltl_checker + cases = set() + for path in relations: + filt_log = ltl_checker.A_eventually_B(log, path[0], path[1], + parameters={ltl_checker.Parameters.POSITIVE: retain}) + cases = cases.union(set(filt_log[constants.CASE_CONCEPT_NAME])) + return log[log[constants.CASE_CONCEPT_NAME].isin(cases)] + else: + from pm4py.objects.log.log import EventLog + from pm4py.algo.filtering.log.ltl import ltl_checker + traces_to_keep = set() + for path in relations: + filt_log = ltl_checker.A_eventually_B(log, path[0], path[1], + parameters={ltl_checker.Parameters.POSITIVE: retain}) + for trace in filt_log: + traces_to_keep.add(id(trace)) + filtered_log = EventLog(attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, + classifiers=log.classifiers) + for trace in log: + if id(trace) in traces_to_keep: + filtered_log.append(trace) + return filtered_log + + def filter_timestamp(log, dt1, dt2, mode="events"): """ Filter a log on a time interval From 2562078e5cf0bc4fd64d9a9ad5d1bdba438e5c3b Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Thu, 7 Jan 2021 15:53:37 +0100 Subject: [PATCH 12/21] fix(pm4py): bug fix --- pm4py/filtering.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/pm4py/filtering.py b/pm4py/filtering.py index 1e4b97a5e..d2ec05422 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -255,25 +255,38 @@ def filter_eventually_follows(log, relations, retain=True): """ if check_is_dataframe(log): from pm4py.algo.filtering.pandas.ltl import ltl_checker - cases = set() + if retain: + cases = set() + else: + cases = set(log[constants.CASE_CONCEPT_NAME]) for path in relations: filt_log = ltl_checker.A_eventually_B(log, path[0], path[1], parameters={ltl_checker.Parameters.POSITIVE: retain}) - cases = cases.union(set(filt_log[constants.CASE_CONCEPT_NAME])) + this_traces = set(filt_log[constants.CASE_CONCEPT_NAME]) + if retain: + cases = cases.union(this_traces) + else: + cases = cases.intersection(this_traces) return log[log[constants.CASE_CONCEPT_NAME].isin(cases)] else: from pm4py.objects.log.log import EventLog from pm4py.algo.filtering.log.ltl import ltl_checker - traces_to_keep = set() + if retain: + cases = set() + else: + cases = set(id(trace) for trace in log) for path in relations: filt_log = ltl_checker.A_eventually_B(log, path[0], path[1], parameters={ltl_checker.Parameters.POSITIVE: retain}) - for trace in filt_log: - traces_to_keep.add(id(trace)) + this_traces = set(id(trace) for trace in filt_log) + if retain: + cases = cases.union(this_traces) + else: + cases = cases.intersection(this_traces) filtered_log = EventLog(attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present, classifiers=log.classifiers) for trace in log: - if id(trace) in traces_to_keep: + if id(trace) in cases: filtered_log.append(trace) return filtered_log From cf38a0b589ddf47cbd9fa5a6f829ff0319aed0f5 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 15:54:20 +0100 Subject: [PATCH 13/21] minor consistency in the naming of parameters --- pm4py/filtering.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pm4py/filtering.py b/pm4py/filtering.py index d2ec05422..0d6b689a5 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -65,7 +65,7 @@ def filter_end_activities(log, activities, retain=True): parameters={end_activities_filter.Parameters.POSITIVE: retain}) -def filter_attribute_values(log, attribute, values, level="case", retain=True): +def filter_attribute_values(log, attribute_key, values, level="case", retain=True): """ Filter a log object on the values of some attribute @@ -73,7 +73,7 @@ def filter_attribute_values(log, attribute, values, level="case", retain=True): -------------- log Log object - attribute + attribute_key Attribute to filter values Admitted (or forbidden) values @@ -93,20 +93,20 @@ def filter_attribute_values(log, attribute, values, level="case", retain=True): from pm4py.algo.filtering.pandas.attributes import attributes_filter if level == "event": return attributes_filter.apply_events(log, values, - parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, + parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) elif level == "case": return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: retain}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter if level == "event": return attributes_filter.apply_events(log, values, - parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, + parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) else: return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute, attributes_filter.Parameters.POSITIVE: retain}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) def filter_trace_attribute(log, attribute_key, values, retain=True): From b5cb7f0d85587610800624c20f5799b1adc6b45a Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 16:18:49 +0100 Subject: [PATCH 14/21] fix naming consistency in filtering simplified interface --- pm4py/__init__.py | 2 +- pm4py/filtering.py | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pm4py/__init__.py b/pm4py/__init__.py index 366f3eed2..f9572f875 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -74,7 +74,7 @@ view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \ filter_variants_percentage, filter_directly_follows_relation, filter_timestamp, filter_trace_attribute, \ - filter_eventually_follows + filter_eventually_follows, filter_event_attribute_values, filter_trace_attribute_values from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \ get_trace_attributes from pm4py.convert import convert_to_event_log, convert_to_event_stream, convert_to_dataframe, convert_to_bpmn, \ diff --git a/pm4py/filtering.py b/pm4py/filtering.py index 0d6b689a5..931ad10bc 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -65,9 +65,16 @@ def filter_end_activities(log, activities, retain=True): parameters={end_activities_filter.Parameters.POSITIVE: retain}) +@deprecation.deprecated(deprecated_in='2.1.4', removed_in='2.3.0', current_version=PM4PY_CURRENT_VERSION, + details='Filtering method will be removed due to fuzzy naming.\ + Use: filter_event_attribute_values') def filter_attribute_values(log, attribute_key, values, level="case", retain=True): + return filter_event_attribute_values(log, attribute_key, values, level=level, retain=retain) + + +def filter_event_attribute_values(log, attribute_key, values, level="case", retain=True): """ - Filter a log object on the values of some attribute + Filter a log object on the values of some event attribute Parameters -------------- @@ -97,7 +104,8 @@ def filter_attribute_values(log, attribute_key, values, level="case", retain=Tru attributes_filter.Parameters.POSITIVE: retain}) elif level == "case": return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, + attributes_filter.Parameters.POSITIVE: retain}) else: from pm4py.algo.filtering.log.attributes import attributes_filter if level == "event": @@ -106,10 +114,18 @@ def filter_attribute_values(log, attribute_key, values, level="case", retain=Tru attributes_filter.Parameters.POSITIVE: retain}) else: return attributes_filter.apply(log, values, parameters={ - constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) + constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, + attributes_filter.Parameters.POSITIVE: retain}) +@deprecation.deprecated(deprecated_in='2.1.4', removed_in='2.3.0', current_version=PM4PY_CURRENT_VERSION, + details='Filtering method will be removed due to fuzzy naming.\ + Use: filter_event_attribute_values') def filter_trace_attribute(log, attribute_key, values, retain=True): + return filter_trace_attribute_values(log, attribute_key, values, retain=retain) + + +def filter_trace_attribute_values(log, attribute_key, values, retain=True): """ Filter a log on the values of a trace attribute From 430ef0eff86274e576e50c60be88eba5da250cef Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 16:49:17 +0100 Subject: [PATCH 15/21] update release note for consistency fix in simplified filtering interface --- RELEASE_NOTES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index ed66412de..1e818bbeb 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,8 @@ +===== PM4Py 2.1.4 ===== +This is a *minor* release, consisting of the following changes: +1. commit b5cb7f0d + fix naming consistency in the filtering interface of pm4py. + ===== PM4Py 2.1.3.1 ===== This is a *hotfix* release, consisting of the following changes: 1. commit f9f894ed From f1c0f0378e8e94b5928c59f58d8ce90ae81de138 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 16:56:11 +0100 Subject: [PATCH 16/21] create more meaningful names in filtering functionality --- examples/simplified_interface.py | 12 ++++++------ pm4py/__init__.py | 4 ++-- pm4py/filtering.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index 747e71347..ea3ceba07 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -110,17 +110,17 @@ def execute_script(): print("paths filter dataframe len = ", len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) print("timeframe filter log events len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) + len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter log traces_contained len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) + len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter log traces_intersecting len = ", - len(pm4py.filter_timestamp(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) + len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print("timeframe filter df events len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) + len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter df traces_contained len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) + len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter df traces_intersecting len = ", - len(pm4py.filter_timestamp(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) + len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") diff --git a/pm4py/__init__.py b/pm4py/__init__.py index f9572f875..48528f69c 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -73,8 +73,8 @@ save_vis_process_tree, \ view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \ - filter_variants_percentage, filter_directly_follows_relation, filter_timestamp, filter_trace_attribute, \ - filter_eventually_follows, filter_event_attribute_values, filter_trace_attribute_values + filter_variants_percentage, filter_directly_follows_relation, filter_timestamp_range, filter_trace_attribute, \ + filter_eventually_follows_relation, filter_event_attribute_values, filter_trace_attribute_values from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \ get_trace_attributes from pm4py.convert import convert_to_event_log, convert_to_event_stream, convert_to_dataframe, convert_to_bpmn, \ diff --git a/pm4py/filtering.py b/pm4py/filtering.py index 931ad10bc..bd2b1b295 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -248,7 +248,7 @@ def filter_directly_follows_relation(log, relations, retain=True): return paths_filter.apply(log, relations, parameters={paths_filter.Parameters.POSITIVE: retain}) -def filter_eventually_follows(log, relations, retain=True): +def filter_eventually_follows_relation(log, relations, retain=True): """ Retain traces that contain any of the specified 'eventually follows' relations. For example, if relations == [('a','b'),('a','c')] and log [,,] @@ -307,7 +307,7 @@ def filter_eventually_follows(log, relations, retain=True): return filtered_log -def filter_timestamp(log, dt1, dt2, mode="events"): +def filter_timestamp_range(log, dt1, dt2, mode="events"): """ Filter a log on a time interval From 960d40e9b150450422a6194a21d4a31070ab5857 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Thu, 7 Jan 2021 17:54:52 +0100 Subject: [PATCH 17/21] create more meaningful names in filtering functionality --- examples/simplified_interface.py | 12 ++++++------ pm4py/__init__.py | 2 +- pm4py/filtering.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index ea3ceba07..06120cedf 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -110,17 +110,17 @@ def execute_script(): print("paths filter dataframe len = ", len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) print("timeframe filter log events len = ", - len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) + len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter log traces_contained len = ", - len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) + len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter log traces_intersecting len = ", - len(pm4py.filter_timestamp_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) + len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print("timeframe filter df events len = ", - len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) + len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print("timeframe filter df traces_contained len = ", - len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) + len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print("timeframe filter df traces_intersecting len = ", - len(pm4py.filter_timestamp_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) + len(pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") diff --git a/pm4py/__init__.py b/pm4py/__init__.py index 48528f69c..691176f83 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -73,7 +73,7 @@ save_vis_process_tree, \ view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn from pm4py.filtering import filter_start_activities, filter_end_activities, filter_attribute_values, filter_variants, \ - filter_variants_percentage, filter_directly_follows_relation, filter_timestamp_range, filter_trace_attribute, \ + filter_variants_percentage, filter_directly_follows_relation, filter_time_range, filter_trace_attribute, \ filter_eventually_follows_relation, filter_event_attribute_values, filter_trace_attribute_values from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \ get_trace_attributes diff --git a/pm4py/filtering.py b/pm4py/filtering.py index bd2b1b295..6fd24f332 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -307,7 +307,7 @@ def filter_eventually_follows_relation(log, relations, retain=True): return filtered_log -def filter_timestamp_range(log, dt1, dt2, mode="events"): +def filter_time_range(log, dt1, dt2, mode="events"): """ Filter a log on a time interval From 2cb6fc652d870223761b0d67e8c1347fb0e2bade Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Fri, 8 Jan 2021 09:05:18 +0100 Subject: [PATCH 18/21] remove script.py from scripts folder --- scripts/script.py | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 scripts/script.py diff --git a/scripts/script.py b/scripts/script.py deleted file mode 100644 index 7be14c328..000000000 --- a/scripts/script.py +++ /dev/null @@ -1,7 +0,0 @@ -import pm4py - - -if __name__ == "__main__": - log = pm4py.read_xes('../tests/input_data/running-example.xes') - allowed = {'a', 'b', 'c'} - filtered = pm4py.filter_log(lambda t: t[0] in allowed, log) From 948e6af8440edf7f272b6c2230219a87a593defc Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Fri, 8 Jan 2021 09:09:37 +0100 Subject: [PATCH 19/21] fix(pm4py): fixed example and made if-statement coherent fixed example and made if-statement correct --- examples/simplified_interface.py | 18 ++++++++++-------- pm4py/filtering.py | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/simplified_interface.py b/examples/simplified_interface.py index 06120cedf..865d0d4cb 100644 --- a/examples/simplified_interface.py +++ b/examples/simplified_interface.py @@ -86,15 +86,15 @@ def execute_script(): print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print("attributes org:resource len(filt_log) (cases) cases = ", - len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="cases"))) + len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="case"))) print("attributes org:resource len(filt_log) (cases) events = ", - len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="events"))) + len(pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="event"))) print("attributes org:resource len(filt_df) (events) cases = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="cases"))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="case"))) print("attributes org:resource len(filt_df) (events) events = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="events"))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event"))) print("attributes org:resource len(filt_df) (events) events notpositive = ", - len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="events", retain=False))) + len(pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event", retain=False))) print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) @@ -104,10 +104,12 @@ def execute_script(): print("variants filter df = ", len(pm4py.filter_variants(df2, [ ["register request", "examine thoroughly", "check ticket", "decide", "reject request"]]))) - print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, percentage=0.8))) + print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, threshold=0.8))) - print("paths filter log len = ", len(pm4py.filter_directly_follows_relation(log2, [("register request", "examine casually")]))) - print("paths filter dataframe len = ", len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) + print("paths filter log len = ", + len(pm4py.filter_directly_follows_relation(log2, [("register request", "examine casually")]))) + print("paths filter dataframe len = ", + len(pm4py.filter_directly_follows_relation(df2, [("register request", "examine casually")]))) print("timeframe filter log events len = ", len(pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) diff --git a/pm4py/filtering.py b/pm4py/filtering.py index 6fd24f332..b60afdba5 100644 --- a/pm4py/filtering.py +++ b/pm4py/filtering.py @@ -112,7 +112,7 @@ def filter_event_attribute_values(log, attribute_key, values, level="case", reta return attributes_filter.apply_events(log, values, parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) - else: + elif level == "case": return attributes_filter.apply(log, values, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, attributes_filter.Parameters.POSITIVE: retain}) From c3049ff468d5dba8cdde08d770236a8bb0549065 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Fri, 8 Jan 2021 09:11:54 +0100 Subject: [PATCH 20/21] fix(pm4py): updated requirements updated requirements --- Dockerfile | 2 +- README.THIRD_PARTY.md | 4 ++-- requirements_stable.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 221b535f7..120cf9e86 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev RUN pip install -U wheel six pytest -RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.12.5 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==1.0.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.3 parso==0.7.1 pickleshare==0.7.5 Pillow==8.1.0 Pygments==2.7.3 pyparsing==2.4.7 pytz==2020.5 setuptools==51.1.1 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.18.0 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.8 prompt-toolkit==3.0.7 python-dateutil==2.8.1 scipy==1.6.0 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==3.3.0 ipython==7.19.0 jsonpickle==1.4.2 deprecation==2.1.0 graphviz==0.16 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.3 pandas==1.2.0 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.24.0 StringDist==1.0.9 sympy==1.7.1 cython==0.29.21 tqdm==4.55.1 +RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.12.5 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==1.0.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.3 parso==0.8.1 pickleshare==0.7.5 Pillow==8.1.0 Pygments==2.7.3 pyparsing==2.4.7 pytz==2020.5 setuptools==51.1.1 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.18.0 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.8 prompt-toolkit==3.0.9 python-dateutil==2.8.1 scipy==1.6.0 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==3.3.0 ipython==7.19.0 jsonpickle==1.4.2 deprecation==2.1.0 graphviz==0.16 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.3 pandas==1.2.0 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.24.0 StringDist==1.0.9 sympy==1.7.1 cython==0.29.21 tqdm==4.55.1 COPY . /app RUN cd /app && cp tests/test_dockers/setups/setup_master.py setup.py && python setup.py install diff --git a/README.THIRD_PARTY.md b/README.THIRD_PARTY.md index e66c72b61..28192edb8 100644 --- a/README.THIRD_PARTY.md +++ b/README.THIRD_PARTY.md @@ -32,9 +32,9 @@ to change as libraries are added or removed. | numpy | https://pypi.org/project/numpy | BSD | 1.19.3 | | X | | packaging | https://pypi.org/project/packaging | Apache 2.0 | 20.8 | | | | pandas | https://pypi.org/project/pandas | BSD | 1.2.0 | X | X | -| parso | https://pypi.org/project/parso | MIT | 0.7.1 | | | +| parso | https://pypi.org/project/parso | MIT | 0.8.1 | | | | pickleshare | https://pypi.org/project/pickleshare | MIT | 0.7.5 | X | | -| prompt-toolkit | https://pypi.org/project/prompt-toolkit | BSD | 3.0.7 | | | +| prompt-toolkit | https://pypi.org/project/prompt-toolkit | BSD | 3.0.9 | | | | pulp | https://pypi.org/project/pulp | BSD | 2.1 | X | | | pydotplus | https://pypi.org/project/pydotplus | MIT | 2.0.2 | X | | | pygments | https://pypi.org/project/pygments | BSD | 2.7.3 | | | diff --git a/requirements_stable.txt b/requirements_stable.txt index 02547a9b0..20c47815e 100644 --- a/requirements_stable.txt +++ b/requirements_stable.txt @@ -9,7 +9,7 @@ joblib==1.0.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.3 -parso==0.7.1 +parso==0.8.1 pickleshare==0.7.5 Pillow==8.1.0 Pygments==2.7.3 @@ -26,7 +26,7 @@ jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.8 -prompt-toolkit==3.0.8 +prompt-toolkit==3.0.9 python-dateutil==2.8.1 scipy==1.6.0 traitlets==5.0.5 From 7f43d24eb1e918020984fcb577fc3cb7731d9f36 Mon Sep 17 00:00:00 2001 From: "Sebastiaan J. van Zelst" Date: Fri, 8 Jan 2021 09:17:45 +0100 Subject: [PATCH 21/21] prepare for release 2.1.3.2 --- RELEASE_NOTES | 6 +++--- pm4py/__init__.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 1e818bbeb..cf2108757 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,6 +1,6 @@ -===== PM4Py 2.1.4 ===== -This is a *minor* release, consisting of the following changes: -1. commit b5cb7f0d +===== PM4Py 2.1.3.2 ===== +This is a *hotfix* release, consisting of the following changes: +1. commit b5cb7f0d, f1c0f037, 960d40e9 fix naming consistency in the filtering interface of pm4py. ===== PM4Py 2.1.3.1 ===== diff --git a/pm4py/__init__.py b/pm4py/__init__.py index 691176f83..4b9236d00 100644 --- a/pm4py/__init__.py +++ b/pm4py/__init__.py @@ -1,4 +1,4 @@ -VERSION = '2.1.3.1' +VERSION = '2.1.3.2' import logging import pkgutil