Skip to content

Commit

Permalink
Merge branch '179-support-for-the-computation-of-sojourn-times-and-th…
Browse files Browse the repository at this point in the history
…eir-visualization-in-the-performance-dfg' into 'integration'

[Priority 2] Support for the computation of sojourn times and their visualization in the performance DFG

Closes #179

See merge request process-mining/pm4py/pm4py-core!1163
  • Loading branch information
fit-alessandro-berti committed Nov 20, 2023
2 parents baa6a51 + b2b4d31 commit 19c53e1
Show file tree
Hide file tree
Showing 23 changed files with 116 additions and 74 deletions.
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ Different statistics that could be computed on top of event logs are proposed, i
* :meth:`pm4py.stats.get_all_case_durations`; gets the list of *case durations* for the cases of the event log.
* :meth:`pm4py.stats.get_case_duration`; gets the *case duration* of a specific case in the log.
* :meth:`pm4py.stats.get_stochastic_language`; gets the *stochastic language* of an event log or a process model.
* :meth:`pm4py.stats.get_service_time`; gets the average *service time* per activity.


Filtering (:mod:`pm4py.filtering`)
Expand Down Expand Up @@ -538,6 +539,7 @@ Overall List of Methods
pm4py.stats.get_all_case_durations
pm4py.stats.get_case_duration
pm4py.stats.get_frequent_trace_segments
pm4py.stats.get_service_time
pm4py.stats.get_activity_position_summary
pm4py.stats.get_stochastic_language
pm4py.filtering
Expand Down
6 changes: 3 additions & 3 deletions examples/corr_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pm4py.algo.discovery.correlation_mining import algorithm as correlation_miner
from pm4py.objects.log.util import dataframe_utils
from pm4py.statistics.sojourn_time.pandas import get as soj_time_get
from pm4py.statistics.service_time.pandas import get as soj_time_get
from pm4py.statistics.start_activities.pandas import get as sa_get
from pm4py.statistics.end_activities.pandas import get as ea_get
from examples import examples_conf
Expand All @@ -26,10 +26,10 @@ def execute_script():
soj_time = soj_time_get.apply(df, parameters=parameters)
dfg, performance_dfg = correlation_miner.apply(df, variant=correlation_miner.Variants.CLASSIC,
parameters=parameters)
gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, soj_time=soj_time, variant=dfg_vis.Variants.FREQUENCY,
gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, serv_time=soj_time, variant=dfg_vis.Variants.FREQUENCY,
parameters=parameters)
dfg_vis.view(gviz_freq)
gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, soj_time=soj_time,
gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, serv_time=soj_time,
variant=dfg_vis.Variants.PERFORMANCE,
parameters=parameters)
dfg_vis.view(gviz_perf)
Expand Down
2 changes: 1 addition & 1 deletion examples/dfg_min_ex_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.statistics.end_activities.log import get as ea_get
from pm4py.statistics.start_activities.log import get as sa_get
from pm4py.statistics.sojourn_time.log import get as soj_time_get
from pm4py.statistics.service_time.log import get as soj_time_get
from pm4py.statistics.concurrent_activities.log import get as conc_act_get
from pm4py.statistics.eventually_follows.log import get as efg_get
from pm4py.util import constants
Expand Down
6 changes: 3 additions & 3 deletions examples/dfg_min_ex_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pm4py.objects.conversion.dfg import converter as dfg_conv
from pm4py.statistics.attributes.pandas import get as att_get
from pm4py.statistics.end_activities.pandas import get as ea_get
from pm4py.statistics.sojourn_time.pandas import get as soj_time_get
from pm4py.statistics.service_time.pandas import get as soj_time_get
from pm4py.statistics.concurrent_activities.pandas import get as conc_act_get
from pm4py.statistics.eventually_follows.pandas import get as efg_get
from pm4py.statistics.start_activities.pandas import get as sa_get
Expand Down Expand Up @@ -45,10 +45,10 @@ def execute_script():
print(efg)
dfg_freq, dfg_perf = df_statistics.get_dfg_graph(dataframe, measure="both", start_timestamp_key="start_timestamp")
dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, activities_count=att_count, variant=dfg_vis_fact.Variants.FREQUENCY,
soj_time=soj_time, parameters=parameters)
serv_time=soj_time, parameters=parameters)
dfg_vis_fact.view(dfg_gv_freq)
dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, activities_count=att_count, variant=dfg_vis_fact.Variants.PERFORMANCE,
soj_time=soj_time, parameters=parameters)
serv_time=soj_time, parameters=parameters)
dfg_vis_fact.view(dfg_gv_perf)
net, im, fm = dfg_conv.apply(dfg_freq)
gviz = pn_vis.apply(net, im, fm, parameters=parameters)
Expand Down
2 changes: 1 addition & 1 deletion pm4py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
get_minimum_self_distances, get_minimum_self_distance_witnesses, get_frequent_trace_segments, \
get_case_arrival_average, get_rework_cases_per_activity, get_case_overlap, get_cycle_time, \
get_all_case_durations, get_case_duration, get_activity_position_summary, get_stochastic_language, \
split_by_process_variant, get_variants_paths_duration
split_by_process_variant, get_variants_paths_duration, get_service_time
from pm4py.sim import play_out, generate_process_tree
from pm4py.ml import split_train_test, get_prefixes_from_log, extract_ocel_features, extract_features_dataframe, extract_temporal_features_dataframe, extract_outcome_enriched_dataframe, extract_target_vector
from pm4py.org import discover_handover_of_work_network, discover_activity_based_resource_similarity, discover_subcontracting_network, discover_working_together_network, discover_organizational_roles, discover_network_analysis
Expand Down
4 changes: 2 additions & 2 deletions pm4py/algo/discovery/heuristics/variants/plusplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from pm4py.statistics.end_activities.pandas import get as pd_ea
from pm4py.statistics.eventually_follows.log import get as efg_get
from pm4py.statistics.eventually_follows.pandas import get as pd_efg
from pm4py.statistics.sojourn_time.log import get as soj_get
from pm4py.statistics.sojourn_time.pandas import get as pd_soj_time
from pm4py.statistics.service_time.log import get as soj_get
from pm4py.statistics.service_time.pandas import get as pd_soj_time
from pm4py.statistics.start_activities.log import get as log_sa
from pm4py.statistics.start_activities.pandas import get as pd_sa
from pm4py.util import exec_utils, constants, xes_constants as xes
Expand Down
2 changes: 1 addition & 1 deletion pm4py/statistics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from pm4py.statistics import traces, attributes, variants, start_activities, end_activities, \
sojourn_time, concurrent_activities, eventually_follows, rework
service_time, concurrent_activities, eventually_follows, rework
1 change: 1 addition & 0 deletions pm4py/statistics/service_time/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from pm4py.statistics.service_time import log, pandas
1 change: 1 addition & 0 deletions pm4py/statistics/service_time/log/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from pm4py.statistics.service_time.log import get
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Parameters(Enum):

def apply(log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Dict[str, float]:
"""
Gets the sojourn time per activity on an event log object
Gets the service time per activity on an event log object
Parameters
--------------
Expand All @@ -50,7 +50,7 @@ def apply(log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]]
Returns
--------------
soj_time_dict
Sojourn time dictionary
Service time dictionary
"""
if parameters is None:
parameters = {}
Expand Down
1 change: 1 addition & 0 deletions pm4py/statistics/service_time/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from pm4py.statistics.service_time.pandas import get
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Parameters(Enum):

def apply(dataframe: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Dict[str, float]:
"""
Gets the sojourn time per activity on a Pandas dataframe
Gets the service time per activity on a Pandas dataframe
Parameters
--------------
Expand All @@ -48,7 +48,7 @@ def apply(dataframe: pd.DataFrame, parameters: Optional[Dict[Union[str, Paramete
Returns
--------------
soj_time_dict
Sojourn time dictionary
Service time dictionary
"""
if parameters is None:
parameters = {}
Expand Down
2 changes: 1 addition & 1 deletion pm4py/statistics/sojourn_time/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from pm4py.statistics.sojourn_time import log, pandas
from pm4py.statistics.service_time import *
1 change: 0 additions & 1 deletion pm4py/statistics/sojourn_time/log/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion pm4py/statistics/sojourn_time/pandas/__init__.py

This file was deleted.

37 changes: 37 additions & 0 deletions pm4py/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,43 @@ def get_cycle_time(log: Union[EventLog, pd.DataFrame], activity_key: str = "conc
return cycle_time.apply(log, parameters=properties)


def get_service_time(log: Union[EventLog, pd.DataFrame], aggregation_measure: str = "mean", activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", start_timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> Dict[str, float]:
"""
Gets the activities' (average/median/...) service time in the provided event log
:param log: event log
:param aggregation_measure: the aggregation to be used (mean, median, min, max, sum)
:param activity_key: attribute to be used for the activity
:param timestamp_key: attribute to be used for the timestamp
:param start_timestamp_key: attribute to be used for the start timestamp
:param case_id_key: attribute to be used as case identifier
:rtype: ``Dict[str, float]``
.. code-block:: python3
import pm4py
log = pm4py.read_xes('tests/input_data/interval_event_log.xes')
mean_serv_time = pm4py.get_service_time(log, start_timestamp_key='start_timestamp', aggregation_measure='mean')
print(mean_serv_time)
median_serv_time = pm4py.get_service_time(log, start_timestamp_key='start_timestamp', aggregation_measure='median')
print(median_serv_time)
"""
if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")
__event_log_deprecation_warning(log)

properties = get_properties(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, start_timestamp_key=start_timestamp_key)
properties["aggregationMeasure"] = aggregation_measure

if check_is_pandas_dataframe(log):
check_pandas_dataframe_columns(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, start_timestamp_key=start_timestamp_key)
from pm4py.statistics.service_time.pandas import get as serv_time_get
return serv_time_get.apply(log, parameters=properties)
else:
from pm4py.statistics.service_time.log import get as serv_time_get
return serv_time_get.apply(log, parameters=properties)


def get_all_case_durations(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, business_hour_slots=constants.DEFAULT_BUSINESS_HOUR_SLOTS, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> List[float]:
"""
Gets the durations of the cases in the event log
Expand Down
10 changes: 6 additions & 4 deletions pm4py/vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def save_vis_petri_net(petri_net: PetriNet, initial_marking: Marking, final_mark


def view_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict, format: str = constants.DEFAULT_FORMAT_GVIZ_VIEW,
aggregation_measure="mean", bgcolor: str = "white", rankdir: str = constants.DEFAULT_RANKDIR_GVIZ):
aggregation_measure="mean", bgcolor: str = "white", rankdir: str = constants.DEFAULT_RANKDIR_GVIZ, serv_time: Optional[Dict[str, float]] = None):
"""
Views a performance DFG
Expand All @@ -94,6 +94,7 @@ def view_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict
:param aggregation_measure: Aggregation measure (default: mean): mean, median, min, max, sum, stdev
:param bgcolor: Background color of the visualization (default: white)
:param rankdir: sets the direction of the graph ("LR" for left-to-right; "TB" for top-to-bottom)
:param serv_time: (optional) provides the activities' service times, used to decorate the graph
.. code-block:: python3
Expand All @@ -113,12 +114,12 @@ def view_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict
parameters[dfg_parameters.AGGREGATION_MEASURE] = aggregation_measure
parameters["bgcolor"] = bgcolor
parameters["rankdir"] = rankdir
gviz = dfg_perf_visualizer.apply(dfg, parameters=parameters)
gviz = dfg_perf_visualizer.apply(dfg, serv_time=serv_time, parameters=parameters)
dfg_visualizer.view(gviz)


def save_vis_performance_dfg(dfg: dict, start_activities: dict, end_activities: dict, file_path: str,
aggregation_measure="mean", bgcolor: str = "white", rankdir: str = constants.DEFAULT_RANKDIR_GVIZ):
aggregation_measure="mean", bgcolor: str = "white", rankdir: str = constants.DEFAULT_RANKDIR_GVIZ, serv_time: Optional[Dict[str, float]] = None):
"""
Saves the visualization of a performance DFG
Expand All @@ -129,6 +130,7 @@ def save_vis_performance_dfg(dfg: dict, start_activities: dict, end_activities:
:param aggregation_measure: Aggregation measure (default: mean): mean, median, min, max, sum, stdev
:param bgcolor: Background color of the visualization (default: white)
:param rankdir: sets the direction of the graph ("LR" for left-to-right; "TB" for top-to-bottom)
:param serv_time: (optional) provides the activities' service times, used to decorate the graph
.. code-block:: python3
Expand All @@ -149,7 +151,7 @@ def save_vis_performance_dfg(dfg: dict, start_activities: dict, end_activities:
parameters[dfg_parameters.AGGREGATION_MEASURE] = aggregation_measure
parameters["bgcolor"] = bgcolor
parameters["rankdir"] = rankdir
gviz = dfg_perf_visualizer.apply(dfg, parameters=parameters)
gviz = dfg_perf_visualizer.apply(dfg, serv_time=serv_time, parameters=parameters)
dfg_visualizer.save(gviz, file_path)


Expand Down
28 changes: 14 additions & 14 deletions pm4py/visualization/dfg/util/dfg_gviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,26 @@ def get_activities_color(activities_count):
return activities_color


def get_activities_color_soj_time(soj_time):
def get_activities_color_serv_time(serv_time):
"""
Gets the color for the activities based on the sojourn time
Gets the color for the activities based on the service time
Parameters
----------------
soj_time
Sojourn time
serv_time
Service time
Returns
----------------
act_color
Dictionary associating each activity to a color based on the sojourn time
Dictionary associating each activity to a color based on the service time
"""
activities_color = {}

min_soj_time, max_soj_time = get_min_max_value(soj_time)
min_soj_time, max_soj_time = get_min_max_value(serv_time)

for ac in soj_time:
act_soj_time = soj_time[ac]
for ac in serv_time:
act_soj_time = serv_time[ac]

trans_base_color = int(
255 - 100 * (act_soj_time - min_soj_time) / (max_soj_time - min_soj_time + 0.00001))
Expand Down Expand Up @@ -117,7 +117,7 @@ def assign_penwidth_edges(dfg):


def graphviz_visualization(activities_count, dfg, image_format="png", measure="frequency",
max_no_of_edges_in_diagram=100000, start_activities=None, end_activities=None, soj_time=None,
max_no_of_edges_in_diagram=100000, start_activities=None, end_activities=None, serv_time=None,
font_size="12", bgcolor=constants.DEFAULT_BGCOLOR, rankdir=constants.DEFAULT_RANKDIR_GVIZ):
"""
Do GraphViz visualization of a DFG graph
Expand All @@ -138,8 +138,8 @@ def graphviz_visualization(activities_count, dfg, image_format="png", measure="f
Start activities of the log
end_activities
End activities of the log
soj_time
For each activity, the sojourn time in the log
serv_time
For each activity, the service time in the log
font_size
Size of the text on the activities/edges
bgcolor
Expand Down Expand Up @@ -187,7 +187,7 @@ def graphviz_visualization(activities_count, dfg, image_format="png", measure="f
if measure == "frequency":
activities_color = get_activities_color(activities_count_int)
else:
activities_color = get_activities_color_soj_time(soj_time)
activities_color = get_activities_color_serv_time(serv_time)

# represent nodes
viz.attr('node', shape='box')
Expand All @@ -205,8 +205,8 @@ def graphviz_visualization(activities_count, dfg, image_format="png", measure="f
viz.node(str(hash(act)), act + " (" + str(activities_count_int[act]) + ")", style='filled',
fillcolor=activities_color[act], fontsize=font_size)
activities_map[act] = str(hash(act))
elif "performance" in measure and act in soj_time and soj_time[act] >= 0:
viz.node(str(hash(act)), act + " (" + human_readable_stat(soj_time[act]) + ")", fontsize=font_size,
elif "performance" in measure and act in serv_time and serv_time[act] >= 0:
viz.node(str(hash(act)), act + " (" + human_readable_stat(serv_time[act]) + ")", fontsize=font_size,
style='filled', fillcolor=activities_color[act])
activities_map[act] = str(hash(act))
else:
Expand Down
20 changes: 10 additions & 10 deletions pm4py/visualization/dfg/variants/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pm4py.objects.dfg.utils import dfg_utils
from pm4py.util import xes_constants as xes
from pm4py.util import exec_utils
from pm4py.statistics.sojourn_time.log import get as soj_time_get
from pm4py.statistics.service_time.log import get as serv_time_get
from enum import Enum
from pm4py.util import constants

Expand All @@ -30,7 +30,7 @@ class Parameters(Enum):


def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Optional[Dict[Any, Any]] = None,
activities_count: Dict[str, int] = None, soj_time: Dict[str, float] = None) -> graphviz.Digraph:
activities_count: Dict[str, int] = None, serv_time: Dict[str, float] = None) -> graphviz.Digraph:
"""
Visualize a cost-based directly-follows graph
Expand All @@ -42,8 +42,8 @@ def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Opt
(if provided) Event log for the calculation of statistics
activities_count
(if provided) Dictionary associating to each activity the number of occurrences in the log.
soj_time
(if provided) Dictionary associating to each activity the average sojourn time
serv_time
(if provided) Dictionary associating to each activity the average service time
parameters
Variant-specific parameters
Expand Down Expand Up @@ -95,13 +95,13 @@ def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Opt
for act in start_activities:
activities_count[act] += start_activities[act]

if soj_time is None:
if serv_time is None:
if log is not None:
soj_time = soj_time_get.apply(log, parameters=parameters)
serv_time = serv_time_get.apply(log, parameters=parameters)
else:
soj_time = {key: -1 for key in activities}
serv_time = {key: -1 for key in activities}

return dfg_gviz.graphviz_visualization(activities_count, dfg, image_format=image_format, measure="cost",
max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
start_activities=start_activities, end_activities=end_activities, soj_time=soj_time,
font_size=font_size, bgcolor=bgcolor, rankdir=rankdir)
max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
start_activities=start_activities, end_activities=end_activities, serv_time=serv_time,
font_size=font_size, bgcolor=bgcolor, rankdir=rankdir)
Loading

0 comments on commit 19c53e1

Please sign in to comment.