diff --git a/examples/powl_discovery.py b/examples/powl_discovery.py index 26aa5db2d..fc77fe189 100644 --- a/examples/powl_discovery.py +++ b/examples/powl_discovery.py @@ -1,18 +1,20 @@ import pm4py from examples import examples_conf +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant +from pm4py.visualization.powl.visualizer import POWLVisualizationVariants def execute_script(): log = pm4py.read_xes("../tests/input_data/helpdesk.xes.gz", return_legacy_log_object=True) # discovers the POWL model - powl_model = pm4py.discover_powl(log) + powl_model = pm4py.discover_powl(log, variant=POWLDiscoveryVariant.DYNAMIC_CLUSTERING, order_graph_filtering_threshold=0.6) # prints the repr of the POWL model print(powl_model) # views the POWL model on the screen - pm4py.view_powl(powl_model, format=examples_conf.TARGET_IMG_FORMAT) + pm4py.view_powl(powl_model, format=examples_conf.TARGET_IMG_FORMAT, variant=POWLVisualizationVariants.NET) # converts the POWL model to a Petri net (which can be used for conformance checking) net, im, fm = pm4py.convert_to_petri_net(powl_model) diff --git a/pm4py/algo/conformance/alignments/petri_net/variants/generator_dijkstra_no_heuristics.py b/pm4py/algo/conformance/alignments/petri_net/variants/generator_dijkstra_no_heuristics.py index 38d3aab22..c169a00c3 100644 --- a/pm4py/algo/conformance/alignments/petri_net/variants/generator_dijkstra_no_heuristics.py +++ b/pm4py/algo/conformance/alignments/petri_net/variants/generator_dijkstra_no_heuristics.py @@ -13,7 +13,7 @@ from enum import Enum import sys from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY -from typing import Optional, Dict, Any, Union +from typing import Optional, Dict, Any, Union, List as TList from pm4py.objects.log.obj import Trace from pm4py.objects.petri_net.obj import PetriNet, Marking from pm4py.util import typing @@ -320,7 +320,7 @@ def repr_searchtuple ( st: utils.DijkstraSearchTuple ) -> str : -def rec_hub (hub, curr_m) -> list[list[utils.DijkstraSearchTuple]]: +def rec_hub (hub, curr_m) -> TList[TList[utils.DijkstraSearchTuple]]: out = [] for curr_st in hub[curr_m] : diff --git a/pm4py/algo/discovery/powl/algorithm.py b/pm4py/algo/discovery/powl/algorithm.py index 8113e0592..b0a85748b 100644 --- a/pm4py/algo/discovery/powl/algorithm.py +++ b/pm4py/algo/discovery/powl/algorithm.py @@ -1,10 +1,12 @@ from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL -from pm4py.algo.discovery.powl.inductive.variants.im_base import IMBasePOWL -from pm4py.algo.discovery.powl.inductive.variants.im_brute_force import BruteForcePOWL -from pm4py.algo.discovery.powl.inductive.variants.im_cluster import ClusterPOWL -from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant - -from pm4py import util as pmutil +from pm4py.algo.discovery.powl.inductive.variants.im_dynamic_clustering_frequencies import \ + POWLInductiveMinerDynamicClusteringFrequency +from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL +from pm4py.algo.discovery.powl.inductive.variants.im_brute_force import POWLInductiveMinerBruteForce +from pm4py.algo.discovery.powl.inductive.variants.im_maximal import POWLInductiveMinerMaximalOrder +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant + +from pm4py import util from pm4py.algo.discovery.inductive.algorithm import Parameters from pm4py.objects.powl.obj import POWL @@ -19,23 +21,25 @@ def get_variant(variant: POWLDiscoveryVariant) -> Type[IMBasePOWL]: - if variant == POWLDiscoveryVariant.IM_BASE: + if variant == POWLDiscoveryVariant.TREE: return IMBasePOWL elif variant == POWLDiscoveryVariant.BRUTE_FORCE: - return BruteForcePOWL - elif variant == POWLDiscoveryVariant.CLUSTER: - return ClusterPOWL + return POWLInductiveMinerBruteForce + elif variant == POWLDiscoveryVariant.MAXIMAL: + return POWLInductiveMinerMaximalOrder + elif variant == POWLDiscoveryVariant.DYNAMIC_CLUSTERING: + return POWLInductiveMinerDynamicClusteringFrequency else: raise Exception('Invalid Variant!') def apply(obj: Union[EventLog, pd.DataFrame, UVCL], parameters: Optional[Dict[Any, Any]] = None, - variant=POWLDiscoveryVariant.CLUSTER, simplify_using_frequent_transitions=False) -> POWL: + variant=POWLDiscoveryVariant.MAXIMAL) -> POWL: if parameters is None: parameters = {} ack = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY) tk = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_util.DEFAULT_TIMESTAMP_KEY) - cidk = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, pmutil.constants.CASE_CONCEPT_NAME) + cidk = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, util.constants.CASE_CONCEPT_NAME) if type(obj) in [EventLog, pd.DataFrame]: uvcl = comut.get_variants(comut.project_univariate(obj, key=ack, df_glue=cidk, df_sorting_criterion_key=tk)) else: @@ -44,9 +48,6 @@ def apply(obj: Union[EventLog, pd.DataFrame, UVCL], parameters: Optional[Dict[An algorithm = get_variant(variant) im = algorithm(parameters) res = im.apply(IMDataStructureUVCL(uvcl), parameters) - res = res.simplify() - if simplify_using_frequent_transitions: - return res.simplify_using_frequent_transitions() return res diff --git a/pm4py/algo/discovery/powl/inductive/fall_through/factory.py b/pm4py/algo/discovery/powl/inductive/fall_through/factory.py index a9fc1eb28..85fbf502d 100644 --- a/pm4py/algo/discovery/powl/inductive/fall_through/factory.py +++ b/pm4py/algo/discovery/powl/inductive/fall_through/factory.py @@ -1,5 +1,5 @@ from multiprocessing import Pool, Manager -from typing import List, TypeVar, Tuple, Optional, Dict, Any, Type, Union +from typing import List, TypeVar, Tuple, Optional, Dict, Any, Type from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructure, IMDataStructureUVCL from pm4py.algo.discovery.inductive.fall_through.abc import FallThrough @@ -25,7 +25,7 @@ def get_fall_throughs(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) return list() @classmethod - def fall_through(cls, obj: T, pool: Pool, manager: Manager, parameters: Optional[Dict[str, Any]] = None) -> Union[Tuple[POWL, List[T]], None]: + def fall_through(cls, obj: T, pool: Pool, manager: Manager, parameters: Optional[Dict[str, Any]] = None) -> Tuple[POWL, List[T]] | None: for f in FallThroughFactory.get_fall_throughs(obj): r = f.apply(obj, pool, manager, parameters) if r is not None: diff --git a/pm4py/algo/discovery/powl/inductive/fall_through/strict_tau_loop.py b/pm4py/algo/discovery/powl/inductive/fall_through/strict_tau_loop.py index 4c032a6fc..1730c55f6 100644 --- a/pm4py/algo/discovery/powl/inductive/fall_through/strict_tau_loop.py +++ b/pm4py/algo/discovery/powl/inductive/fall_through/strict_tau_loop.py @@ -16,3 +16,4 @@ def apply(cls, obj: IMDataStructureUVCL, pool: Pool = None, manager: Manager = N proj = cls._get_projected_log(log) if sum(proj.values()) > sum(log.values()): return OperatorPOWL(Operator.LOOP, []), [IMDataStructureUVCL(proj), IMDataStructureUVCL(Counter())] + diff --git a/pm4py/algo/discovery/powl/inductive/utils/__init__.py b/pm4py/algo/discovery/powl/inductive/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pm4py/algo/discovery/powl/inductive/utils/filtering.py b/pm4py/algo/discovery/powl/inductive/utils/filtering.py new file mode 100644 index 000000000..ce765720c --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/utils/filtering.py @@ -0,0 +1,43 @@ +from enum import Enum, auto +from collections import Counter +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL + + +class FilteringType(Enum): + DYNAMIC = auto() + DECREASING_FACTOR = auto() + + +DEFAULT_FILTERING_TYPE = FilteringType.DECREASING_FACTOR +FILTERING_THRESHOLD = "filtering_threshold" +FILTERING_TYPE = "filtering_type" + + +def filter_most_frequent_variants(log): + to_remove_freq = min([freq for var, freq in log.items()]) + new_log = Counter() + for var, freq in log.items(): + if freq == to_remove_freq: + continue + new_log[var] = freq + + return IMDataStructureUVCL(new_log) + + +def filter_most_frequent_variants_with_decreasing_factor(log, decreasing_factor): + sorted_variants = sorted(log, key=log.get, reverse=True) + new_log = Counter() + + already_added_sum = 0 + prev_var_count = -1 + + for variant in sorted_variants: + frequency = log[variant] + if already_added_sum == 0 or frequency > decreasing_factor * prev_var_count: + new_log[variant] = frequency + already_added_sum = already_added_sum + frequency + prev_var_count = frequency + else: + break + + return IMDataStructureUVCL(new_log) diff --git a/pm4py/algo/discovery/powl/inductive/variants/brute_force/bf_partial_order_cut.py b/pm4py/algo/discovery/powl/inductive/variants/brute_force/bf_partial_order_cut.py index 00c0d8d31..73a665d2e 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/brute_force/bf_partial_order_cut.py +++ b/pm4py/algo/discovery/powl/inductive/variants/brute_force/bf_partial_order_cut.py @@ -11,7 +11,6 @@ from pm4py.objects.dfg import util as dfu from pm4py.statistics.eventually_follows.uvcl.get import apply as to_efg -# MAX_NUM_PARTITIONS = 1000 def remove(blocks, g): res = [] @@ -47,14 +46,14 @@ def get_partitions_of_size_k(nodes, k=None): elif k > n: return - def set_partitions_helper(L, k): - n = len(L) + def set_partitions_helper(l, k): + length = len(l) if k == 1: - yield [tuple(L)] - elif n == k: - yield [tuple([s]) for s in L] + yield [tuple(l)] + elif length == k: + yield [tuple([s]) for s in l] else: - e, *M = L + e, *M = l for p in set_partitions_helper(M, k - 1): yield [tuple([e]), *p] for p in set_partitions_helper(M, k): @@ -69,13 +68,9 @@ def set_partitions_helper(L, k): def partition(collection): - # count = 1 i = len(collection) while i > 1: for part in get_partitions_of_size_k(collection, i): - # if count > MAX_NUM_PARTITIONS: - # return - # count = count + 1 yield part i = i - 1 return @@ -167,14 +162,14 @@ def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[ efg = to_efg(obj) alphabet = sorted(dfu.get_vertices(dfg_graph), key=lambda g: g.__str__()) for part in partition(alphabet): - # print(part) po = generate_order(part, efg) if is_valid_order(po, dfg_graph, efg): return po return None @classmethod - def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, List[POWL]]]: + def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[ + Tuple[StrictPartialOrder, List[POWL]]]: g = cls.holds(obj, parameters) if g is None: return g diff --git a/pm4py/algo/discovery/powl/inductive/variants/brute_force/factory.py b/pm4py/algo/discovery/powl/inductive/variants/brute_force/factory.py index aac3081d3..0e91e84e8 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/brute_force/factory.py +++ b/pm4py/algo/discovery/powl/inductive/variants/brute_force/factory.py @@ -5,24 +5,24 @@ from pm4py.algo.discovery.powl.inductive.cuts.loop import POWLLoopCutUVCL from pm4py.algo.discovery.powl.inductive.cuts.sequence import POWLStrictSequenceCutUVCL from pm4py.algo.discovery.powl.inductive.cuts.xor import POWLExclusiveChoiceCutUVCL -from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL, IMDataStructure +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL from pm4py.algo.discovery.powl.inductive.variants.brute_force.bf_partial_order_cut import BruteForcePartialOrderCutUVCL -from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant from pm4py.objects.powl.obj import POWL -class CutFactoryPOBF(CutFactory): +class CutFactoryPOWLBruteForce(CutFactory): @classmethod - def get_cuts(cls, obj: T, inst: POWLDiscoveryVariant, parameters: Optional[Dict[str, Any]] = None) -> List[Type[S]]: + def get_cuts(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> List[Type[S]]: if type(obj) is IMDataStructureUVCL: - return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL, BruteForcePartialOrderCutUVCL] + return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL, + BruteForcePartialOrderCutUVCL] return list() @classmethod - def find_cut(cls, obj: IMDataStructure, inst: POWLDiscoveryVariant, parameters: Optional[Dict[str, Any]] = None) -> Optional[ - Tuple[POWL, List[T]]]: - for c in CutFactoryPOBF.get_cuts(obj, inst): + def find_cut(cls, obj: IMDataStructureUVCL, parameters: Optional[Dict[str, Any]] = None) -> Optional[ + Tuple[POWL, List[T]]]: + for c in CutFactoryPOWLBruteForce.get_cuts(obj): r = c.apply(obj, parameters) if r is not None: return r diff --git a/pm4py/algo/discovery/powl/inductive/variants/clustering/__init__.py b/pm4py/algo/discovery/powl/inductive/variants/clustering/__init__.py deleted file mode 100644 index 9bb038112..000000000 --- a/pm4py/algo/discovery/powl/inductive/variants/clustering/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pm4py.algo.discovery.powl.inductive.variants.clustering import * \ No newline at end of file diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/__init__.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/__init__.py new file mode 100644 index 000000000..4e0a37547 --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/__init__.py @@ -0,0 +1 @@ +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering import * diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/dynamic_clustering_partial_order_cut.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/dynamic_clustering_partial_order_cut.py new file mode 100644 index 000000000..995a98d2c --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/dynamic_clustering_partial_order_cut.py @@ -0,0 +1,150 @@ +from itertools import product +from abc import ABC +from itertools import combinations +from typing import Any, Optional, Dict, List, Generic, Tuple, Collection + +from pm4py.algo.discovery.inductive.cuts.abc import Cut, T +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL +from pm4py.objects.powl.BinaryRelation import BinaryRelation +from pm4py.objects.powl.obj import StrictPartialOrder, POWL +from pm4py.algo.discovery.inductive.cuts import utils as cut_util +from pm4py.algo.discovery.powl.inductive.variants.maximal.maximal_partial_order_cut import project_on_groups_with_unique_activities +from pm4py.objects.dfg import util as dfu +from pm4py.statistics.eventually_follows.uvcl.get import apply as to_efg + + +def generate_order(clusters, efg): + # Step 0: if we have one single group containing all activities ---> invoke fall-through. + if len(clusters) < 2: + return None + + # Step 1: Generate Order based on the EFG + po = BinaryRelation([tuple(c) for c in sorted(clusters)]) + at_least_one_efg = [[False for _ in range(len(po.nodes))] for _ in range(len(po.nodes))] + all_efg = [[True for _ in range(len(po.nodes))] for _ in range(len(po.nodes))] + + changed = False + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + for a in cluster_1: + for b in cluster_2: + if (a, b) in efg: + at_least_one_efg[i][j] = True + else: + all_efg[i][j] = False + if (b, a) in efg: + at_least_one_efg[j][i] = True + else: + all_efg[j][i] = False + + if at_least_one_efg[i][j] and not at_least_one_efg[j][i]: + po.add_edge(cluster_1, cluster_2) + elif at_least_one_efg[j][i] and not at_least_one_efg[i][j]: + po.add_edge(cluster_2, cluster_1) + + # Step 2: Ensure Transitivity and Irreflexivity + if not po.is_transitive(): + n = len(po.nodes) + continue_loop = True + while continue_loop: + continue_loop = False + for i, j, k in product(range(n), range(n), range(n)): + if i != j and j != k and po.edges[i][j] and po.edges[j][k] and not po.is_edge_id(i, k): + if not at_least_one_efg[k][i]: + po.edges[i][k] = True + continue_loop = True + else: + clusters = cut_util.merge_lists_based_on_activities(po.nodes[i][0], po.nodes[k][0], clusters) + return generate_order(clusters, efg) + + if not po.is_irreflexive(): + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if po.is_edge(cluster_1, cluster_2) and po.is_edge(cluster_2, cluster_1): + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed: + return generate_order(clusters, efg) + + # # Step 3: Detect Choice + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if not po.is_edge(cluster_1, cluster_2) and not po.is_edge(cluster_2, cluster_1) and not \ + at_least_one_efg[i][j] and not at_least_one_efg[j][i]: + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed: + return generate_order(clusters, efg) + + # Step 4: Cluster nodes sharing the same pre- and post-sets. + pre = {node: [] for node in po.nodes} + post = {node: [] for node in po.nodes} + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if po.is_edge(cluster_1, cluster_2): + pre[cluster_2].append(cluster_1) + post[cluster_1].append(cluster_2) + elif po.is_edge(cluster_2, cluster_1): + pre[cluster_1].append(cluster_2) + post[cluster_2].append(cluster_1) + + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if pre[cluster_1] == pre[cluster_2] and post[cluster_1] == post[cluster_2]: + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed and len(clusters) > 1: + return generate_order(clusters, efg) + else: + return po + + +class DynamicClusteringPartialOrderCut(Cut[T], ABC, Generic[T]): + + @classmethod + def operator(cls, parameters: Optional[Dict[str, Any]] = None) -> StrictPartialOrder: + return StrictPartialOrder([]) + + @classmethod + def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[BinaryRelation]: + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) + efg = to_efg(obj) + clusters = [[a] for a in alphabet] + po = generate_order(clusters, efg) + return po + + @classmethod + def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, + List[POWL]]]: + g = cls.holds(obj, parameters) + if g is None: + return g + children = cls.project(obj, g.nodes, parameters) + po = StrictPartialOrder(children) + for i, j in combinations(range(len(g.nodes)), 2): + if g.is_edge_id(i, j): + po.order.add_edge(children[i], children[j]) + elif g.is_edge_id(j, i): + po.order.add_edge(children[j], children[i]) + return po, po.children + + +class DynamicClusteringPartialOrderCutUVCL(DynamicClusteringPartialOrderCut[IMDataStructureUVCL]): + + @classmethod + def project(cls, obj: IMDataStructureUVCL, groups: List[Collection[Any]], + parameters: Optional[Dict[str, Any]] = None) -> List[IMDataStructureUVCL]: + return project_on_groups_with_unique_activities(obj.data_structure, groups) diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/factory.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/factory.py new file mode 100644 index 000000000..8e6ecf44c --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering/factory.py @@ -0,0 +1,29 @@ +from typing import List, Optional, Dict, Any, Tuple + +from pm4py.algo.discovery.powl.inductive.cuts.factory import T, CutFactory +from pm4py.algo.discovery.powl.inductive.cuts.loop import POWLLoopCutUVCL +from pm4py.algo.discovery.powl.inductive.cuts.xor import POWLExclusiveChoiceCutUVCL +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering.dynamic_clustering_partial_order_cut import \ + DynamicClusteringPartialOrderCutUVCL +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructure +from pm4py.objects.powl.obj import POWL +from pm4py.objects.dfg import util as dfu + + +class CutFactoryPOWLDynamicClustering(CutFactory): + + @classmethod + def get_cuts(cls, obj, parameters=None): + return [POWLExclusiveChoiceCutUVCL, POWLLoopCutUVCL, DynamicClusteringPartialOrderCutUVCL] + + @classmethod + def find_cut(cls, obj: IMDataStructure, parameters: Optional[Dict[str, Any]] = None) -> Optional[ + Tuple[POWL, List[T]]]: + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) + if len(alphabet) < 2: + return None + for c in CutFactoryPOWLDynamicClustering.get_cuts(obj): + r = c.apply(obj, parameters) + if r is not None: + return r + return None diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/__init__.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/__init__.py new file mode 100644 index 000000000..31d978ece --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/__init__.py @@ -0,0 +1 @@ +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering_frequency import * diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/dynamic_clustering_frequency_partial_order_cut.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/dynamic_clustering_frequency_partial_order_cut.py new file mode 100644 index 000000000..0e49594af --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/dynamic_clustering_frequency_partial_order_cut.py @@ -0,0 +1,172 @@ +from itertools import product +from abc import ABC +from itertools import combinations +from typing import Any, Optional, Dict, List, Generic, Tuple, Collection + +from pm4py.algo.discovery.inductive.cuts.abc import Cut, T +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL +from pm4py.objects.powl.BinaryRelation import BinaryRelation +from pm4py.objects.powl.obj import StrictPartialOrder, POWL +from pm4py.algo.discovery.inductive.cuts import utils as cut_util +from pm4py.algo.discovery.powl.inductive.variants.maximal.maximal_partial_order_cut import \ + project_on_groups_with_unique_activities +from pm4py.objects.dfg import util as dfu + +ORDER_FREQUENCY_RATIO = "order frequency ratio" + + +def generate_order(obj: T, clusters, order_frequency_ratio): + # Step 0: if we have one single group containing all activities ---> invoke fall-through. + if len(clusters) < 2: + return None + + # Step 1: Generate Order based on the EFG + po = BinaryRelation([tuple(c) for c in sorted(clusters)]) + efg_freq = compute_efg_frequencies(obj, groups=po.nodes) + + changed = False + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + + sum_freq = efg_freq[(cluster_1, cluster_2)] + efg_freq[(cluster_2, cluster_1)] + if sum_freq > 0: + if (float(efg_freq[(cluster_1, cluster_2)]) / sum_freq) >= order_frequency_ratio: + po.add_edge(cluster_1, cluster_2) + if (float(efg_freq[cluster_2, cluster_1]) / sum_freq) >= order_frequency_ratio: + po.add_edge(cluster_2, cluster_1) + + # Step 2: Ensure Transitivity and Irreflexivity + if not po.is_transitive(): + n = len(po.nodes) + continue_loop = True + while continue_loop: + continue_loop = False + for i, j, k in product(range(n), range(n), range(n)): + if i != j and j != k and po.edges[i][j] and po.edges[j][k] and not po.is_edge_id(i, k): + if efg_freq[(po.nodes[k], po.nodes[i])] + efg_freq[(po.nodes[i], po.nodes[k])] == 0: + po.edges[i][k] = True + continue_loop = True + else: + clusters = cut_util.merge_lists_based_on_activities(po.nodes[i][0], po.nodes[k][0], clusters) + return generate_order(obj, clusters, order_frequency_ratio) + + if not po.is_irreflexive(): + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if po.is_edge(cluster_1, cluster_2) and po.is_edge(cluster_2, cluster_1): + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed: + return generate_order(obj, clusters, order_frequency_ratio) + + # # Step 3: Detect Choice + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if not po.is_edge(cluster_1, cluster_2) and not po.is_edge(cluster_2, cluster_1) \ + and efg_freq[(cluster_1, cluster_2)] == 0 \ + and efg_freq[(cluster_2, cluster_1)] == 0: + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed: + return generate_order(obj, clusters, order_frequency_ratio) + + # Step 4: Cluster nodes sharing the same pre- and post-sets. + pre = {node: [] for node in po.nodes} + post = {node: [] for node in po.nodes} + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if po.is_edge(cluster_1, cluster_2): + pre[cluster_2].append(cluster_1) + post[cluster_1].append(cluster_2) + elif po.is_edge(cluster_2, cluster_1): + pre[cluster_1].append(cluster_2) + post[cluster_2].append(cluster_1) + + for i in range(len(po.nodes)): + cluster_1 = po.nodes[i] + for j in range(i + 1, len(po.nodes)): + cluster_2 = po.nodes[j] + if pre[cluster_1] == pre[cluster_2] and post[cluster_1] == post[cluster_2]: + clusters = cut_util.merge_lists_based_on_activities(cluster_1[0], cluster_2[0], clusters) + changed = True + + if changed and len(clusters) > 1: + return generate_order(obj, clusters, order_frequency_ratio) + else: + return po + + +def compute_efg_frequencies(interval_log: IMDataStructureUVCL, groups) -> Dict[Tuple[str, str], int]: + res = {(g1, g2): 0 for g1 in groups for g2 in groups} + + activity_to_cluster = {} + for cluster in groups: + for activity in cluster: + activity_to_cluster[activity] = cluster + + for trace, freq in interval_log.data_structure.items(): + seen_pairs = set() + for i in range(len(trace)): + cluster_1 = activity_to_cluster.get(trace[i]) + for j in range(i + 1, len(trace)): + cluster_2 = activity_to_cluster.get(trace[j]) + pair = (cluster_1, cluster_2) + if pair not in seen_pairs: + res[pair] += freq + seen_pairs.add(pair) + return res + + +class DynamicClusteringFrequencyPartialOrderCut(Cut[T], ABC, Generic[T]): + + @classmethod + def operator(cls, parameters: Optional[Dict[str, Any]] = None) -> StrictPartialOrder: + return StrictPartialOrder([]) + + @classmethod + def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[BinaryRelation]: + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) + clusters = [[a] for a in alphabet] + + if ORDER_FREQUENCY_RATIO in parameters.keys(): + order_frequency_ratio = parameters[ORDER_FREQUENCY_RATIO] + if not (0.5 < order_frequency_ratio <= 1.0): + raise ValueError("Parameter value of " + ORDER_FREQUENCY_RATIO + "must be in range: 0.5 < value <= 1.0") + else: + order_frequency_ratio = 1.0 + + po = generate_order(obj, clusters, order_frequency_ratio) + return po + + @classmethod + def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, + List[POWL]]]: + g = cls.holds(obj, parameters) + if g is None: + return g + children = cls.project(obj, g.nodes, parameters) + po = StrictPartialOrder(children) + for i, j in combinations(range(len(g.nodes)), 2): + if g.is_edge_id(i, j): + po.order.add_edge(children[i], children[j]) + elif g.is_edge_id(j, i): + po.order.add_edge(children[j], children[i]) + return po, po.children + + +class DynamicClusteringFrequencyPartialOrderCutUVCL(DynamicClusteringFrequencyPartialOrderCut[IMDataStructureUVCL]): + + @classmethod + def project(cls, obj: IMDataStructureUVCL, groups: List[Collection[Any]], + parameters: Optional[Dict[str, Any]] = None) -> List[IMDataStructureUVCL]: + return project_on_groups_with_unique_activities(obj.data_structure, groups) diff --git a/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/factory.py b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/factory.py new file mode 100644 index 000000000..f5db59365 --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/dynamic_clustering_frequency/factory.py @@ -0,0 +1,29 @@ +from typing import List, Optional, Dict, Any, Tuple + +from pm4py.algo.discovery.powl.inductive.cuts.factory import T, CutFactory +from pm4py.algo.discovery.powl.inductive.cuts.loop import POWLLoopCutUVCL +from pm4py.algo.discovery.powl.inductive.cuts.xor import POWLExclusiveChoiceCutUVCL +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructure +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering_frequency.dynamic_clustering_frequency_partial_order_cut import \ + DynamicClusteringFrequencyPartialOrderCutUVCL +from pm4py.objects.powl.obj import POWL +from pm4py.objects.dfg import util as dfu + + +class CutFactoryPOWLDynamicClusteringFrequency(CutFactory): + + @classmethod + def get_cuts(cls, obj, parameters=None): + return [POWLExclusiveChoiceCutUVCL, POWLLoopCutUVCL, DynamicClusteringFrequencyPartialOrderCutUVCL] + + @classmethod + def find_cut(cls, obj: IMDataStructure, parameters: Optional[Dict[str, Any]] = None) -> Optional[ + Tuple[POWL, List[T]]]: + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) + if len(alphabet) < 2: + return None + for c in CutFactoryPOWLDynamicClusteringFrequency.get_cuts(obj): + r = c.apply(obj, parameters) + if r is not None: + return r + return None diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_brute_force.py b/pm4py/algo/discovery/powl/inductive/variants/im_brute_force.py index b5f8f4fe1..2bd492ee6 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/im_brute_force.py +++ b/pm4py/algo/discovery/powl/inductive/variants/im_brute_force.py @@ -1,18 +1,16 @@ from typing import Optional, Tuple, List, Dict, Any -from pm4py.algo.discovery.powl.inductive.variants.brute_force.factory import CutFactoryPOBF -from pm4py.algo.discovery.powl.inductive.variants.im_base import IMBasePOWL, T -from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant +from pm4py.algo.discovery.powl.inductive.variants.brute_force.factory import CutFactoryPOWLBruteForce +from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL, T +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant from pm4py.objects.powl.obj import POWL -class BruteForcePOWL(IMBasePOWL): +class POWLInductiveMinerBruteForce(IMBasePOWL): def instance(self) -> POWLDiscoveryVariant: return POWLDiscoveryVariant.BRUTE_FORCE def find_cut(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[POWL, List[T]]]: - res = CutFactoryPOBF.find_cut(obj, self.instance(), parameters=parameters) + res = CutFactoryPOWLBruteForce.find_cut(obj, parameters=parameters) return res - - diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_cluster.py b/pm4py/algo/discovery/powl/inductive/variants/im_cluster.py deleted file mode 100644 index 70d210f12..000000000 --- a/pm4py/algo/discovery/powl/inductive/variants/im_cluster.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Optional, Tuple, List, Dict, Any - -from pm4py.algo.discovery.powl.inductive.variants.clustering.factory import CutFactoryPOCluster -from pm4py.algo.discovery.powl.inductive.variants.im_base import IMBasePOWL, T -from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant -from pm4py.objects.powl.obj import POWL - -class ClusterPOWL(IMBasePOWL): - - def instance(self) -> POWLDiscoveryVariant: - return POWLDiscoveryVariant.CLUSTER - - def find_cut(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[POWL, List[T]]]: - res = CutFactoryPOCluster.find_cut(obj, parameters=parameters) - return res diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering.py b/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering.py new file mode 100644 index 000000000..b327660ed --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering.py @@ -0,0 +1,19 @@ +from typing import Optional, Tuple, List, TypeVar, Dict, Any + +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureLog +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering.factory import CutFactoryPOWLDynamicClustering +from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant +from pm4py.objects.powl.obj import POWL + +T = TypeVar('T', bound=IMDataStructureLog) + + +class POWLInductiveMinerDynamicClustering(IMBasePOWL): + + def instance(self) -> POWLDiscoveryVariant: + return POWLDiscoveryVariant.DYNAMIC_CLUSTERING + + def find_cut(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[POWL, List[T]]]: + res = CutFactoryPOWLDynamicClustering.find_cut(obj, parameters=parameters) + return res diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering_frequencies.py b/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering_frequencies.py new file mode 100644 index 000000000..23934ec04 --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/im_dynamic_clustering_frequencies.py @@ -0,0 +1,20 @@ +from typing import Optional, Tuple, List, TypeVar, Dict, Any + +from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureLog +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering_frequency.factory import \ + CutFactoryPOWLDynamicClusteringFrequency +from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant +from pm4py.objects.powl.obj import POWL + +T = TypeVar('T', bound=IMDataStructureLog) + + +class POWLInductiveMinerDynamicClusteringFrequency(IMBasePOWL): + + def instance(self) -> POWLDiscoveryVariant: + return POWLDiscoveryVariant.DYNAMIC_CLUSTERING + + def find_cut(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[POWL, List[T]]]: + res = CutFactoryPOWLDynamicClusteringFrequency.find_cut(obj, parameters=parameters) + return res diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_maximal.py b/pm4py/algo/discovery/powl/inductive/variants/im_maximal.py new file mode 100644 index 000000000..c1c12235b --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/im_maximal.py @@ -0,0 +1,16 @@ +from typing import Optional, Tuple, List, Dict, Any + +from pm4py.algo.discovery.powl.inductive.variants.maximal.factory import CutFactoryPOWLMaximal +from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL, T +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant +from pm4py.objects.powl.obj import POWL + + +class POWLInductiveMinerMaximalOrder(IMBasePOWL): + + def instance(self) -> POWLDiscoveryVariant: + return POWLDiscoveryVariant.MAXIMAL + + def find_cut(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[POWL, List[T]]]: + res = CutFactoryPOWLMaximal.find_cut(obj, parameters=parameters) + return res diff --git a/pm4py/algo/discovery/powl/inductive/variants/im_base.py b/pm4py/algo/discovery/powl/inductive/variants/im_tree.py similarity index 55% rename from pm4py/algo/discovery/powl/inductive/variants/im_base.py rename to pm4py/algo/discovery/powl/inductive/variants/im_tree.py index 06a9d27f9..c365a59f1 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/im_base.py +++ b/pm4py/algo/discovery/powl/inductive/variants/im_tree.py @@ -8,7 +8,10 @@ from pm4py.algo.discovery.powl.inductive.base_case.factory import BaseCaseFactory from pm4py.algo.discovery.powl.inductive.cuts.factory import CutFactory from pm4py.algo.discovery.powl.inductive.fall_through.factory import FallThroughFactory -from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant +from pm4py.algo.discovery.powl.inductive.utils.filtering import FILTERING_TYPE, FilteringType, \ + filter_most_frequent_variants, FILTERING_THRESHOLD, filter_most_frequent_variants_with_decreasing_factor, \ + DEFAULT_FILTERING_TYPE +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant from pm4py.objects.powl.obj import POWL, StrictPartialOrder, Sequence @@ -18,20 +21,52 @@ class IMBasePOWL(Generic[T], InductiveMinerFramework[T]): def instance(self) -> POWLDiscoveryVariant: - return POWLDiscoveryVariant.IM_BASE + return POWLDiscoveryVariant.TREE + def apply(self, obj: IMDataStructureUVCL, parameters: Optional[Dict[str, Any]] = None) -> POWL: + if FILTERING_TYPE not in parameters.keys(): + filtering_type = DEFAULT_FILTERING_TYPE + else: + filtering_type = parameters[FILTERING_TYPE] + if filtering_type not in FilteringType: + raise KeyError("Invalid FILTERING_TYPE: " + str(filtering_type)) + empty_traces = POWLEmptyTracesUVCL.apply(obj, parameters) if empty_traces is not None: return self._recurse(empty_traces[0], empty_traces[1], parameters) + powl = self.apply_base_cases(obj, parameters) - if powl is None: - cut = self.find_cut(obj, parameters) - if cut is not None: - powl = self._recurse(cut[0], cut[1], parameters=parameters) - if powl is None: + if powl is not None: + return powl + + cut = self.find_cut(obj, parameters) + if cut is not None: + powl = self._recurse(cut[0], cut[1], parameters=parameters) + + if powl is not None: + return powl + else: + if filtering_type is FilteringType.DYNAMIC: + filtered_log = filter_most_frequent_variants(obj.data_structure) + if len(filtered_log.data_structure) > 0: + return self.apply(filtered_log, parameters=parameters) + + elif filtering_type is FilteringType.DECREASING_FACTOR: + if FILTERING_THRESHOLD in parameters.keys(): + t = parameters[FILTERING_THRESHOLD] + if isinstance(t, float) and 0 <= t < 1: + if t > 0: + filtered_log = filter_most_frequent_variants_with_decreasing_factor(obj.data_structure, + decreasing_factor=t) + if 0 < len(filtered_log.data_structure) < len(obj.data_structure): + return self.apply(filtered_log, parameters=parameters) + else: + raise KeyError("Invalid filtering threshold!") + else: + raise KeyError("Invalid filtering type!") + ft = self.fall_through(obj, parameters) - powl = self._recurse(ft[0], ft[1], parameters=parameters) - return powl + return self._recurse(ft[0], ft[1], parameters=parameters) def apply_base_cases(self, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[POWL]: return BaseCaseFactory.apply_base_cases(obj, parameters=parameters) @@ -57,4 +92,3 @@ def _recurse(self, powl: POWL, objs: List[T], parameters: Optional[Dict[str, Any else: powl.children.extend(children) return powl - diff --git a/pm4py/algo/discovery/powl/inductive/variants/maximal/__init__.py b/pm4py/algo/discovery/powl/inductive/variants/maximal/__init__.py new file mode 100644 index 000000000..9017f458d --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/maximal/__init__.py @@ -0,0 +1 @@ +from pm4py.algo.discovery.powl.inductive.variants.maximal import * \ No newline at end of file diff --git a/pm4py/algo/discovery/powl/inductive/variants/clustering/factory.py b/pm4py/algo/discovery/powl/inductive/variants/maximal/factory.py similarity index 63% rename from pm4py/algo/discovery/powl/inductive/variants/clustering/factory.py rename to pm4py/algo/discovery/powl/inductive/variants/maximal/factory.py index d04d72c6c..0e179d347 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/clustering/factory.py +++ b/pm4py/algo/discovery/powl/inductive/variants/maximal/factory.py @@ -6,33 +6,29 @@ from pm4py.algo.discovery.powl.inductive.cuts.sequence import POWLStrictSequenceCutUVCL from pm4py.algo.discovery.powl.inductive.cuts.xor import POWLExclusiveChoiceCutUVCL from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL -from pm4py.algo.discovery.powl.inductive.variants.clustering.cluster_partial_order_cut_EFG import ClusterPartialOrderEFGCutUVCL +from pm4py.algo.discovery.powl.inductive.variants.maximal.maximal_partial_order_cut import \ + MaximalPartialOrderCutUVCL from pm4py.objects.powl.obj import POWL -from pm4py.statistics.eventually_follows.uvcl.get import apply as to_efg from pm4py.objects.dfg import util as dfu -class CutFactoryPOCluster(CutFactory): +class CutFactoryPOWLMaximal(CutFactory): @classmethod def get_cuts(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> List[Type[S]]: if type(obj) is IMDataStructureUVCL: - return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL, ClusterPartialOrderEFGCutUVCL] + return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL, + MaximalPartialOrderCutUVCL] return list() @classmethod def find_cut(cls, obj: IMDataStructureUVCL, parameters: Optional[Dict[str, Any]] = None) -> Optional[ - Tuple[POWL, List[T]]]: - init_efg = to_efg(obj) + Tuple[POWL, List[T]]]: + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) if len(alphabet) < 2: return None - parameters["EFG"] = init_efg - parameters["DFG"] = obj.dfg.graph - parameters["alphabet"] = alphabet - parameters["start_activities"] = sorted(list(obj.dfg.start_activities.keys())) - parameters["end_activities"] = sorted(list(obj.dfg.end_activities.keys())) - for c in CutFactoryPOCluster.get_cuts(obj): + for c in CutFactoryPOWLMaximal.get_cuts(obj): r = c.apply(obj, parameters) if r is not None: return r diff --git a/pm4py/algo/discovery/powl/inductive/variants/clustering/cluster_partial_order_cut_EFG.py b/pm4py/algo/discovery/powl/inductive/variants/maximal/maximal_partial_order_cut.py similarity index 73% rename from pm4py/algo/discovery/powl/inductive/variants/clustering/cluster_partial_order_cut_EFG.py rename to pm4py/algo/discovery/powl/inductive/variants/maximal/maximal_partial_order_cut.py index ee54d16e9..11430ab07 100644 --- a/pm4py/algo/discovery/powl/inductive/variants/clustering/cluster_partial_order_cut_EFG.py +++ b/pm4py/algo/discovery/powl/inductive/variants/maximal/maximal_partial_order_cut.py @@ -7,6 +7,8 @@ from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL from pm4py.objects.powl.BinaryRelation import BinaryRelation from pm4py.objects.powl.obj import StrictPartialOrder, POWL +from pm4py.objects.dfg import util as dfu +from pm4py.statistics.eventually_follows.uvcl.get import apply as to_efg def generate_initial_order(nodes, efg): @@ -20,6 +22,7 @@ def generate_initial_order(nodes, efg): po.add_edge(b, a) return po + def remove(blocks, g): res = [] for g2 in blocks: @@ -35,7 +38,10 @@ def contains(blocks, g): return False -def is_valid_order(po, parameters): +def is_valid_order(po, efg, start_activities, end_activities): + if po is None: + return False + if len(po.nodes) < 2: return False @@ -44,7 +50,6 @@ def is_valid_order(po, parameters): start_blocks = po.nodes end_blocks = po.nodes - efg = parameters["EFG"] for group_1, group_2 in combinations(po.nodes, 2): @@ -76,13 +81,12 @@ def is_valid_order(po, parameters): for i in range(n): group = po.nodes[i] c1 = contains(start_blocks, group) - c2 = len(set(group).intersection(set(parameters["start_activities"]))) > 0 + c2 = len(set(group).intersection(start_activities)) > 0 c3 = contains(end_blocks, group) - c4 = len(set(group).intersection(set(parameters["end_activities"]))) > 0 + c4 = len(set(group).intersection(end_activities)) > 0 if (c1 and not c2) or (c3 and not c4): return False - return True @@ -118,7 +122,7 @@ def cluster_order(binary_relation): return new_relation -class ClusterPartialOrderEFGCut(Cut[T], ABC, Generic[T]): +class MaximalPartialOrderCut(Cut[T], ABC, Generic[T]): @classmethod def operator(cls, parameters: Optional[Dict[str, Any]] = None) -> StrictPartialOrder: @@ -127,18 +131,21 @@ def operator(cls, parameters: Optional[Dict[str, Any]] = None) -> StrictPartialO @classmethod def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[BinaryRelation]: - efg = parameters["EFG"] - alphabet = parameters["alphabet"] + efg = to_efg(obj) + alphabet = sorted(dfu.get_vertices(obj.dfg), key=lambda g: g.__str__()) po = generate_initial_order(alphabet, efg) clustered_po = cluster_order(po) - if is_valid_order(clustered_po, parameters): + start_activities = set(list(obj.dfg.start_activities.keys())) + end_activities = set(list(obj.dfg.end_activities.keys())) + if is_valid_order(clustered_po, efg, start_activities, end_activities): return clustered_po else: return None @classmethod - def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, List[POWL]]]: + def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, + List[POWL]]]: g = cls.holds(obj, parameters) if g is None: return g @@ -152,15 +159,27 @@ def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[ return po, po.children -class ClusterPartialOrderEFGCutUVCL(ClusterPartialOrderEFGCut[IMDataStructureUVCL]): +def project_on_groups_with_unique_activities(log: Counter, groups: List[Collection[Any]]): + r = list() + for g in groups: + new_log = Counter() + for var, freq in log.items(): + new_var = [] + for activity in var: + if activity in g: + new_var.append(activity) + new_var_tuple = tuple(new_var) + if new_var_tuple in new_log.keys(): + new_log[new_var_tuple] = new_log[new_var_tuple] + freq + else: + new_log[new_var_tuple] = freq + r.append(new_log) + return list(map(lambda l: IMDataStructureUVCL(l), r)) + + +class MaximalPartialOrderCutUVCL(MaximalPartialOrderCut[IMDataStructureUVCL]): @classmethod def project(cls, obj: IMDataStructureUVCL, groups: List[Collection[Any]], parameters: Optional[Dict[str, Any]] = None) -> List[IMDataStructureUVCL]: - r = list() - for g in groups: - c = Counter() - for t in obj.data_structure: - c[tuple(filter(lambda e: e in g, t))] = obj.data_structure[t] - r.append(c) - return list(map(lambda l: IMDataStructureUVCL(l), r)) + return project_on_groups_with_unique_activities(obj.data_structure, groups) diff --git a/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_varaints.py b/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_varaints.py new file mode 100644 index 000000000..9517a8424 --- /dev/null +++ b/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_varaints.py @@ -0,0 +1,8 @@ +from enum import Enum, auto + + +class POWLDiscoveryVariant(Enum): + TREE = auto() # base IM with no partial orders + BRUTE_FORCE = auto() + MAXIMAL = auto() + DYNAMIC_CLUSTERING = auto() diff --git a/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_variants.py b/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_variants.py deleted file mode 100644 index b7e4ffc42..000000000 --- a/pm4py/algo/discovery/powl/inductive/variants/powl_discovery_variants.py +++ /dev/null @@ -1,6 +0,0 @@ -from enum import Enum, auto - -class POWLDiscoveryVariant(Enum): - IM_BASE = auto() # this is base IM with no partial orders - BRUTE_FORCE = auto() # BPM paper - CLUSTER = auto() # ICPM paper diff --git a/pm4py/discovery.py b/pm4py/discovery.py index 2a04c1c56..756cdceee 100644 --- a/pm4py/discovery.py +++ b/pm4py/discovery.py @@ -7,6 +7,10 @@ import pandas as pd from pandas import DataFrame +from pm4py.algo.discovery.powl.inductive.utils.filtering import FILTERING_THRESHOLD +from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering_frequency.dynamic_clustering_frequency_partial_order_cut import \ + ORDER_FREQUENCY_RATIO +from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant from pm4py.objects.bpmn.obj import BPMN from pm4py.objects.dfg.obj import DFG from pm4py.objects.powl.obj import POWL @@ -740,7 +744,10 @@ def discover_declare(log: Union[EventLog, pd.DataFrame], allowed_templates: Opti return declare_discovery.apply(log, parameters=properties) -def discover_powl(log: Union[EventLog, pd.DataFrame], activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> POWL: +def discover_powl(log: Union[EventLog, pd.DataFrame], variant=POWLDiscoveryVariant.MAXIMAL, + filtering_weight_factor: float = 0.0, order_graph_filtering_threshold: float = None, + activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", + case_id_key: str = "case:concept:name") -> POWL: """ Discovers a POWL model from an event log. @@ -748,6 +755,9 @@ def discover_powl(log: Union[EventLog, pd.DataFrame], activity_key: str = "conce Kourani, Humam, and Sebastiaan J. van Zelst. "POWL: partially ordered workflow language." International Conference on Business Process Management. Cham: Springer Nature Switzerland, 2023. :param log: event log / Pandas dataframe + :param variant: variant of the algorithm + :param filtering_weight_factor: accepts values 0 <= x < 1 + :param order_graph_filtering_threshold: accepts values 0.5 < x <= 1 :param activity_key: attribute to be used for the activity :param timestamp_key: attribute to be used for the timestamp :param case_id_key: attribute to be used as case identifier @@ -771,8 +781,16 @@ def discover_powl(log: Union[EventLog, pd.DataFrame], activity_key: str = "conce log = pm4py.convert_to_event_log(log, case_id_key=case_id_key) properties = get_properties(log, activity_key=activity_key, timestamp_key=timestamp_key) + if order_graph_filtering_threshold is not None: + if variant is POWLDiscoveryVariant.DYNAMIC_CLUSTERING: + properties[ORDER_FREQUENCY_RATIO] = order_graph_filtering_threshold + else: + raise Exception("the order graph filtering threshold can only be used for the variant DYNAMIC_CLUSTERING") + + properties[FILTERING_THRESHOLD] = filtering_weight_factor + from pm4py.algo.discovery.powl import algorithm as powl_discovery - return powl_discovery.apply(log, parameters=properties) + return powl_discovery.apply(log, variant=variant, parameters=properties) def discover_batches(log: Union[EventLog, pd.DataFrame], merge_distance: int = 15 * 60, min_batch_size: int = 2, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", resource_key: str = "org:resource") -> List[ diff --git a/pm4py/objects/conversion/powl/converter.py b/pm4py/objects/conversion/powl/converter.py index 4f2a40ce0..94e47fd37 100644 --- a/pm4py/objects/conversion/powl/converter.py +++ b/pm4py/objects/conversion/powl/converter.py @@ -6,6 +6,7 @@ class Variants(Enum): TO_PETRI_NET = to_petri_net + def apply(powl, parameters=None, variant=Variants.TO_PETRI_NET): """ Method for converting from POWL to Petri net diff --git a/pm4py/objects/conversion/powl/variants/to_petri_net.py b/pm4py/objects/conversion/powl/variants/to_petri_net.py index 90cbcdf16..ab97c18b2 100644 --- a/pm4py/objects/conversion/powl/variants/to_petri_net.py +++ b/pm4py/objects/conversion/powl/variants/to_petri_net.py @@ -1,11 +1,12 @@ import time +import uuid from itertools import product import pm4py.objects.conversion.process_tree.variants.to_petri_net as pt_to_pn from pm4py.objects.petri_net.obj import Marking from pm4py.objects.petri_net.obj import PetriNet from pm4py.objects.petri_net.utils.petri_utils import add_arc_from_to, remove_place -from pm4py.objects.powl.obj import Transition, SilentTransition, StrictPartialOrder, OperatorPOWL +from pm4py.objects.powl.obj import Transition, SilentTransition, StrictPartialOrder, OperatorPOWL, FrequentTransition from pm4py.objects.petri_net.utils import reduction from pm4py.objects.process_tree.obj import Operator @@ -42,7 +43,7 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree Last place added in this recursion """ if type(initial_entity_subtree) is PetriNet.Transition: - initial_place = pt_to_pn.get_new_place(counts) + initial_place = get_new_place(counts) net.places.add(initial_place) add_arc_from_to(initial_entity_subtree, initial_place, net) else: @@ -50,21 +51,23 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree if final_entity_subtree is not None and type(final_entity_subtree) is PetriNet.Place: final_place = final_entity_subtree else: - final_place = pt_to_pn.get_new_place(counts) + final_place = get_new_place(counts) net.places.add(final_place) if final_entity_subtree is not None and type(final_entity_subtree) is PetriNet.Transition: add_arc_from_to(final_place, final_entity_subtree, net) if force_add_skip: - invisible = pt_to_pn.get_new_hidden_trans(counts, type_trans="skip") + invisible = get_new_hidden_trans(counts, type_trans="skip") add_arc_from_to(initial_place, invisible, net) add_arc_from_to(invisible, final_place, net) if isinstance(powl, Transition): if isinstance(powl, SilentTransition): - petri_trans = pt_to_pn.get_new_hidden_trans(counts, type_trans="skip") + petri_trans = get_new_hidden_trans(counts, type_trans="skip") + elif isinstance(powl, FrequentTransition): + petri_trans = get_transition(counts, powl.label, powl.activity, powl.skippable, powl.selfloop) else: - petri_trans = pt_to_pn.get_transition(counts, powl.label) + petri_trans = get_transition(counts, powl.label, powl.label) net.transitions.add(petri_trans) add_arc_from_to(initial_place, petri_trans, net) add_arc_from_to(petri_trans, final_place, net) @@ -78,20 +81,19 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree counts, rec_depth + 1) elif powl.operator == Operator.LOOP: - new_initial_place = pt_to_pn.get_new_place(counts) + new_initial_place = get_new_place(counts) net.places.add(new_initial_place) - init_loop_trans = pt_to_pn.get_new_hidden_trans(counts, type_trans="init_loop") + init_loop_trans = get_new_hidden_trans(counts, type_trans="init_loop") net.transitions.add(init_loop_trans) add_arc_from_to(initial_place, init_loop_trans, net) add_arc_from_to(init_loop_trans, new_initial_place, net) initial_place = new_initial_place - loop_trans = pt_to_pn.get_new_hidden_trans(counts, type_trans="loop") + loop_trans = get_new_hidden_trans(counts, type_trans="loop") net.transitions.add(loop_trans) - dummy = SilentTransition() + exit_node = SilentTransition() do = tree_children[0] redo = tree_children[1] - exit = dummy net, counts, int1 = recursively_add_tree(do, net, initial_place, None, counts, @@ -99,7 +101,7 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree net, counts, int2 = recursively_add_tree(redo, net, int1, None, counts, rec_depth + 1) - net, counts, int3 = recursively_add_tree(exit, net, int1, + net, counts, int3 = recursively_add_tree(exit_node, net, int1, final_place, counts, rec_depth + 1) @@ -111,10 +113,10 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree elif isinstance(powl, StrictPartialOrder): transitive_reduction = powl.order.get_transitive_reduction() tree_children = list(powl.children) - tau_split = pt_to_pn.get_new_hidden_trans(counts, type_trans="tauSplit") + tau_split = get_new_hidden_trans(counts, type_trans="tauSplit") net.transitions.add(tau_split) add_arc_from_to(initial_place, tau_split, net) - tau_join = pt_to_pn.get_new_hidden_trans(counts, type_trans="tauJoin") + tau_join = get_new_hidden_trans(counts, type_trans="tauJoin") net.transitions.add(tau_join) add_arc_from_to(tau_join, final_place, net) @@ -123,19 +125,19 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree start_nodes = transitive_reduction.get_start_nodes() end_nodes = transitive_reduction.get_end_nodes() for subtree in tree_children: - i_trans = pt_to_pn.get_new_hidden_trans(counts, type_trans="init_par") + i_trans = get_new_hidden_trans(counts, type_trans="init_par") net.transitions.add(i_trans) if subtree in start_nodes: - i_place = pt_to_pn.get_new_place(counts) + i_place = get_new_place(counts) net.places.add(i_place) add_arc_from_to(tau_split, i_place, net) add_arc_from_to(i_place, i_trans, net) - f_trans = pt_to_pn.get_new_hidden_trans(counts, type_trans="final_par") + f_trans = get_new_hidden_trans(counts, type_trans="final_par") net.transitions.add(f_trans) if subtree in end_nodes: - f_place = pt_to_pn.get_new_place(counts) + f_place = get_new_place(counts) net.places.add(f_place) add_arc_from_to(f_trans, f_place, net) add_arc_from_to(f_place, tau_join, net) @@ -152,7 +154,7 @@ def recursively_add_tree(powl, net, initial_entity_subtree, final_entity_subtree n = range(len(tree_children)) for i, j in product(n, n): if transitive_reduction.is_edge_id(i, j): - new_place = pt_to_pn.get_new_place(counts) + new_place = get_new_place(counts) net.places.add(new_place) add_arc_from_to(final_trans[i], new_place, net) add_arc_from_to(new_place, init_trans[j], net) @@ -180,37 +182,34 @@ def apply(powl, parameters=None): final_marking Final marking """ - if parameters is None: - parameters = {} - del parameters counts = pt_to_pn.Counts() net = PetriNet('imdf_net_' + str(time.time())) initial_marking = Marking() final_marking = Marking() - source = pt_to_pn.get_new_place(counts) + source = get_new_place(counts) source.name = "source" - sink = pt_to_pn.get_new_place(counts) + sink = get_new_place(counts) sink.name = "sink" net.places.add(source) net.places.add(sink) initial_marking[source] = 1 final_marking[sink] = 1 - initial_mandatory = True - final_mandatory = True + initial_mandatory = True # check_tau_mandatory_at_initial_marking(powl) + final_mandatory = True # check_tau_mandatory_at_final_marking(powl) if initial_mandatory: - initial_place = pt_to_pn.get_new_place(counts) + initial_place = get_new_place(counts) net.places.add(initial_place) - tau_initial = pt_to_pn.get_new_hidden_trans(counts, type_trans="tau") + tau_initial = get_new_hidden_trans(counts, type_trans="tau") net.transitions.add(tau_initial) add_arc_from_to(source, tau_initial, net) add_arc_from_to(tau_initial, initial_place, net) else: initial_place = source if final_mandatory: - final_place = pt_to_pn.get_new_place(counts) + final_place = get_new_place(counts) net.places.add(final_place) - tau_final = pt_to_pn.get_new_hidden_trans(counts, type_trans="tau") + tau_final = get_new_hidden_trans(counts, type_trans="tau") net.transitions.add(tau_final) add_arc_from_to(final_place, tau_final, net) add_arc_from_to(tau_final, sink, net) @@ -229,3 +228,29 @@ def apply(powl, parameters=None): remove_place(net, place) return net, initial_marking, final_marking + + +def get_new_place(counts): + """ + Create a new place in the Petri net + """ + counts.inc_places() + return PetriNet.Place('p_' + str(counts.num_places)) + + +def get_new_hidden_trans(counts, type_trans="unknown"): + """ + Create a new hidden transition in the Petri net + """ + counts.inc_no_hidden() + return PetriNet.Transition(type_trans + '_' + str(counts.num_hidden), None) + + +def get_transition(counts, label, activity, skippable=False, selfloop=False): + """ + Create a transitions with the specified label in the Petri net + """ + counts.inc_no_visible() + return PetriNet.Transition(str(uuid.uuid4()), label, properties={"activity": activity, + "skippable": skippable, + "selfloop": selfloop}) diff --git a/pm4py/objects/petri_net/__init__.py b/pm4py/objects/petri_net/__init__.py index 875a498f1..d2d8d49a5 100644 --- a/pm4py/objects/petri_net/__init__.py +++ b/pm4py/objects/petri_net/__init__.py @@ -1 +1 @@ -from pm4py.objects.petri_net import obj, properties, semantics, utils, saw_net, stochastic +from pm4py.objects.petri_net import obj, properties, semantics, utils, saw_net, stochastic, exporter diff --git a/pm4py/objects/powl/BinaryRelation.py b/pm4py/objects/powl/BinaryRelation.py index 31b47c977..90ac3b605 100644 --- a/pm4py/objects/powl/BinaryRelation.py +++ b/pm4py/objects/powl/BinaryRelation.py @@ -3,6 +3,8 @@ from typing import Hashable, TypeVar, List as TList, Set as TSet T = TypeVar('T', bound=Hashable) + + class BinaryRelation: def __init__(self, nodes: TList[T]): @@ -61,7 +63,6 @@ def remove_edge_without_violating_transitivity(self, source: T, target: T) -> No self._edges[j][k] = False changed = True - def add_node(self, node: T) -> None: if node not in self._nodes: self._nodes.append(node) @@ -159,3 +160,7 @@ def __repr__(self) -> str: return res + " })" nodes = property(get_nodes, _set_nodes) + + @property + def edges(self): + return self._edges diff --git a/pm4py/objects/powl/constants.py b/pm4py/objects/powl/constants.py index 471ab5a18..0ed0181ba 100644 --- a/pm4py/objects/powl/constants.py +++ b/pm4py/objects/powl/constants.py @@ -1,4 +1,2 @@ SILENT_TRANSITION_LABEL = "tau" STRICT_PARTIAL_ORDER_LABEL = "PO" -SEQUENCE_LABEL = "->" - diff --git a/pm4py/objects/powl/obj.py b/pm4py/objects/powl/obj.py index 3bbbde473..356d762e3 100644 --- a/pm4py/objects/powl/obj.py +++ b/pm4py/objects/powl/obj.py @@ -1,7 +1,7 @@ from pm4py.objects.powl.BinaryRelation import BinaryRelation from pm4py.objects.powl.constants import STRICT_PARTIAL_ORDER_LABEL from pm4py.objects.process_tree.obj import ProcessTree, Operator -from typing import Optional, Union, List as TList +from typing import List as TList class POWL(ProcessTree): @@ -17,16 +17,11 @@ def simplify_using_frequent_transitions(self) -> "POWL": def simplify(self) -> "POWL": return self - def apply_all_reductions(self) -> "POWL": - res = self.simplify() - res = res.simplify_using_frequent_transitions() - return res - class Transition(POWL): transition_id: int = 0 - def __init__(self, label: Optional[str] = None) -> None: + def __init__(self, label: str | None) -> None: super().__init__() self._label = label self._identifier = Transition.transition_id @@ -63,18 +58,19 @@ def __init__(self) -> None: class FrequentTransition(Transition): - def __init__(self, label, min_freq: Union[str, int], max_freq: Union[str, int]) -> None: - self.min_freq = min_freq - self.max_freq = max_freq - if min_freq == 0 and max_freq == "-": - suffix = "*" - elif min_freq == 1 and max_freq == "-": - suffix = "+" - elif min_freq == 0 and max_freq == 1: - suffix = "?" - else: - suffix = str(min_freq) + ", " + str(max_freq) - super().__init__(label=label + "\n" + "[" + suffix + "]") + def __init__(self, label, min_freq: str | int, max_freq: str | int) -> None: + self.skippable = False + self.selfloop = False + if min_freq == 0: + self.skippable = True + if max_freq == "-": + self.selfloop = True + min_freq = "1" + self.activity = label + if self.skippable or self.selfloop: + label = str(label) + "\n" + "[" + str(min_freq) + "," + str(max_freq) + "]" + + super().__init__(label=label) class StrictPartialOrder(POWL): @@ -112,7 +108,6 @@ def __lt__(self, other: object) -> bool: partial_order = property(get_order, _set_order) children = property(get_children, _set_children) - # def __eq__(self, other): # if not isinstance(other, StrictPartialOrder): # return False @@ -135,7 +130,6 @@ def __lt__(self, other: object) -> bool: # return False # return True - def equal_content(self, other: object) -> bool: if not isinstance(other, StrictPartialOrder): return False @@ -171,17 +165,32 @@ def simplify_using_frequent_transitions(self) -> "StrictPartialOrder": def simplify(self) -> "StrictPartialOrder": simplified_nodes = {} sub_nodes = {} + start_nodes = {} + end_nodes = {} + + def connected(node): + for node2 in self.children: + if self.partial_order.is_edge(node, node2) or self.partial_order.is_edge(node2, node): + return True + return False for node_1 in self.children: - connected = False - for node_2 in self.children: - if self.partial_order.is_edge(node_1, node_2) or self.partial_order.is_edge(node_2, node_1): - connected = True - # break - if not connected and isinstance(node_1, StrictPartialOrder): - sub_nodes[node_1] = node_1.simplify() + simplified_node = node_1.simplify() + if isinstance(simplified_node, StrictPartialOrder): + + if not connected(node_1): + sub_nodes[node_1] = simplified_node + else: + s_nodes = simplified_node.order.get_start_nodes() + e_nodes = simplified_node.order.get_end_nodes() + if len(s_nodes) == 1 and len(e_nodes) == 1: + sub_nodes[node_1] = simplified_node + start_nodes[node_1] = list(s_nodes)[0] + end_nodes[node_1] = list(e_nodes)[0] + else: + simplified_nodes[node_1] = simplified_node else: - simplified_nodes[node_1] = node_1.simplify() + simplified_nodes[node_1] = simplified_node new_nodes = list(simplified_nodes.values()) for po, simplified_po in sub_nodes.items(): @@ -190,7 +199,14 @@ def simplify(self) -> "StrictPartialOrder": for node_1 in self.children: for node_2 in self.children: if self.partial_order.is_edge(node_1, node_2): - res.partial_order.add_edge(simplified_nodes[node_1], simplified_nodes[node_2]) + if node_1 in simplified_nodes.keys() and node_2 in simplified_nodes.keys(): + res.partial_order.add_edge(simplified_nodes[node_1], simplified_nodes[node_2]) + elif node_1 in simplified_nodes.keys(): + res.partial_order.add_edge(simplified_nodes[node_1], start_nodes[node_2]) + elif node_2 in simplified_nodes.keys(): + res.partial_order.add_edge(end_nodes[node_1], simplified_nodes[node_2]) + else: + res.partial_order.add_edge(end_nodes[node_1], start_nodes[node_2]) for po, simplified_po in sub_nodes.items(): for node_1 in simplified_po.children: for node_2 in simplified_po.children: @@ -204,40 +220,8 @@ class Sequence(StrictPartialOrder): def __init__(self, nodes: TList[POWL]) -> None: super().__init__(nodes) for i in range(len(nodes)): - for j in range(i+1, len(nodes)): + for j in range(i + 1, len(nodes)): self.partial_order.add_edge(nodes[i], nodes[j]) - self.operator = Operator.SEQUENCE - - def _set_sequence(self, nodes: TList[POWL]) -> None: - self._sequence: list[POWL] = nodes - - def get_sequence(self) -> TList[POWL]: - return self._sequence - - def simplify(self) -> "Sequence": - new_nodes = [] - for child in self.children: - if isinstance(child, Sequence): - for node in child.children: - new_nodes.append(node) - else: - new_nodes.append(child) - return Sequence([child.simplify() for child in new_nodes]) - - # def simplify_using_frequent_transitions(self): - # sequences = [] - # last_activity = None - # counter = 0 - # for child in self.children: - # if isinstance(child, Transition): - # - # if last_activity is not None: - # if child.label == last_activity: - # counter = counter + 1 - # else: - # if counter == 1: - # sequences.append(Transition(label = last_activity)) - # counter = 1 class OperatorPOWL(POWL): @@ -246,7 +230,7 @@ def __init__(self, operator: Operator, children: TList[POWL]) -> None: self.operator = operator self.children = children - def __lt__(self, other: object)-> bool: + def __lt__(self, other: object) -> bool: if isinstance(other, OperatorPOWL): return self.__repr__() < other.__repr__() elif isinstance(other, Transition): @@ -255,7 +239,7 @@ def __lt__(self, other: object)-> bool: return True return NotImplemented - def equal_content(self, other: object)-> bool: + def equal_content(self, other: object) -> bool: if not isinstance(other, OperatorPOWL): return False @@ -298,8 +282,8 @@ def simplify(self) -> "OperatorPOWL": child_1 = self.children[1] def merge_with_children(child0, child1): - if isinstance(child0, SilentTransition) and isinstance(child1, - OperatorPOWL) and child1.operator is Operator.LOOP: + if isinstance(child0, SilentTransition) and isinstance(child1, OperatorPOWL) \ + and child1.operator is Operator.LOOP: if isinstance(child1.children[0], SilentTransition): return OperatorPOWL(Operator.LOOP, [n.simplify() for n in child1.children]) elif isinstance(child1.children[1], SilentTransition): @@ -318,11 +302,12 @@ def merge_with_children(child0, child1): if self.operator is Operator.XOR: new_children = [] for child in self.children: - if isinstance(child, OperatorPOWL) and child.operator is Operator.XOR: - for node in child.children: + s_child = child.simplify() + if isinstance(s_child, OperatorPOWL) and s_child.operator is Operator.XOR: + for node in s_child.children: new_children.append(node) else: - new_children.append(child) - return OperatorPOWL(Operator.XOR, [child.simplify() for child in new_children]) + new_children.append(s_child) + return OperatorPOWL(Operator.XOR, [child for child in new_children]) else: - return OperatorPOWL(self.operator, [child.simplify() for child in self.children]) + return OperatorPOWL(self.operator, [child for child in self.children]) diff --git a/pm4py/vis.py b/pm4py/vis.py index 662752358..c6c8ee0bb 100644 --- a/pm4py/vis.py +++ b/pm4py/vis.py @@ -22,6 +22,7 @@ from pm4py.objects.ocel.obj import OCEL from pm4py.objects.org.sna.obj import SNA from pm4py.util import constants +from pm4py.visualization.powl.visualizer import POWLVisualizationVariants def view_petri_net(petri_net: PetriNet, initial_marking: Optional[Marking] = None, @@ -1233,7 +1234,7 @@ def save_vis_footprints(footprints: Union[Tuple[Dict[str, Any], Dict[str, Any]], return fps_visualizer.save(gviz, file_path) -def view_powl(powl: POWL, format: str = constants.DEFAULT_FORMAT_GVIZ_VIEW, bgcolor: str = "white", rankdir: str = "TB"): +def view_powl(powl: POWL, variant=POWLVisualizationVariants.BASIC, format: str = constants.DEFAULT_FORMAT_GVIZ_VIEW, bgcolor: str = "white"): """ Perform a visualization of a POWL model. @@ -1241,6 +1242,7 @@ def view_powl(powl: POWL, format: str = constants.DEFAULT_FORMAT_GVIZ_VIEW, bgco Kourani, Humam, and Sebastiaan J. van Zelst. "POWL: partially ordered workflow language." International Conference on Business Process Management. Cham: Springer Nature Switzerland, 2023. :param powl: POWL model + :param variant: variant of the visualization (BASIC or NET) :param format: format of the visualization (default: png) :param bgcolor: background color of the visualization (default: white) :param rankdir: sets the direction of the graph ("LR" for left-to-right; "TB" for top-to-bottom) @@ -1256,12 +1258,12 @@ def view_powl(powl: POWL, format: str = constants.DEFAULT_FORMAT_GVIZ_VIEW, bgco format = str(format).lower() from pm4py.visualization.powl import visualizer as powl_visualizer - gviz = powl_visualizer.apply(powl, parameters={"format": format, "bgcolor": bgcolor, "rankdir": rankdir}) + gviz = powl_visualizer.apply(powl, variant=variant, parameters={"format": format, "bgcolor": bgcolor}) powl_visualizer.view(gviz) -def save_vis_powl(powl: POWL, file_path: str, bgcolor: str = "white", rankdir: str = "TB", **kwargs): +def save_vis_powl(powl: POWL, file_path: str, variant=POWLVisualizationVariants.BASIC, bgcolor: str = "white", rankdir: str = "TB", **kwargs): """ Saves the visualization of a POWL model. @@ -1269,6 +1271,7 @@ def save_vis_powl(powl: POWL, file_path: str, bgcolor: str = "white", rankdir: s Kourani, Humam, and Sebastiaan J. van Zelst. "POWL: partially ordered workflow language." International Conference on Business Process Management. Cham: Springer Nature Switzerland, 2023. :param powl: POWL model + :param variant: variant of the visualization (BASIC or NET) :param file_path: target path of the visualization :param bgcolor: background color of the visualization (default: white) :param rankdir: sets the direction of the graph ("LR" for left-to-right; "TB" for top-to-bottom) @@ -1285,7 +1288,7 @@ def save_vis_powl(powl: POWL, file_path: str, bgcolor: str = "white", rankdir: s format = os.path.splitext(file_path)[1][1:].lower() from pm4py.visualization.powl import visualizer as powl_visualizer - gviz = powl_visualizer.apply(powl, parameters={"format": format, "bgcolor": bgcolor, "rankdir": rankdir}) + gviz = powl_visualizer.apply(powl, variant=variant, parameters={"format": format, "bgcolor": bgcolor, "rankdir": rankdir}) return powl_visualizer.save(gviz, file_path) diff --git a/pm4py/visualization/powl/variants/basic.py b/pm4py/visualization/powl/variants/basic.py index d609a3572..b536764b8 100644 --- a/pm4py/visualization/powl/variants/basic.py +++ b/pm4py/visualization/powl/variants/basic.py @@ -1,16 +1,19 @@ +import os import tempfile from enum import Enum from graphviz import Digraph from pm4py.objects.powl.constants import SILENT_TRANSITION_LABEL from pm4py.objects.process_tree.obj import Operator -from pm4py.util import exec_utils, constants +from pm4py.util import exec_utils from typing import Optional, Dict, Any, Union -from pm4py.objects.powl.obj import POWL, Transition, SilentTransition, StrictPartialOrder, OperatorPOWL +from pm4py.objects.powl.obj import POWL, Transition, SilentTransition, StrictPartialOrder, OperatorPOWL, \ + FrequentTransition -COLOR_ORDER = "red" -COLOR_OPERATOR = "black" -COLOR_LEAF = "blue" -NODE_IN_PO = False +COLOR_XOR = "white" +COLOR_LOOP = "white" +COLOR_ACTIVITY = "white" +COLOR_PO = "white" +OPERATOR_BOXES = True class Parameters(Enum): @@ -19,7 +22,6 @@ class Parameters(Enum): ENABLE_DEEPCOPY = "enable_deepcopy" FONT_SIZE = "font_size" BGCOLOR = "bgcolor" - RANKDIR = "rankdir" def apply(powl: POWL, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Digraph: @@ -41,17 +43,9 @@ def apply(powl: POWL, parameters: Optional[Dict[Union[str, Parameters], Any]] = if parameters is None: parameters = {} - image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") - color_map = exec_utils.get_param_value(Parameters.COLOR_MAP, parameters, {}) - bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, constants.DEFAULT_BGCOLOR) - rankdir = exec_utils.get_param_value(Parameters.RANKDIR, parameters, "TB") - filename = tempfile.NamedTemporaryFile(suffix='.gv') - filename.close() - - viz = Digraph("powl", filename=filename.name, engine='dot', graph_attr={'bgcolor': bgcolor}) - viz.graph_attr['rankdir'] = rankdir + viz = Digraph("powl", filename=filename.name, engine='dot') viz.attr('node', shape='ellipse', fixedsize='false') viz.attr(nodesep='1') viz.attr(ranksep='1') @@ -59,8 +53,11 @@ def apply(powl: POWL, parameters: Optional[Dict[Union[str, Parameters], Any]] = viz.attr(overlap='scale') viz.attr(splines='true') + image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") + color_map = exec_utils.get_param_value(Parameters.COLOR_MAP, parameters, {}) + repr_powl(powl, viz, color_map, parameters) - viz.format = image_format.replace("html", "plain-ext") + viz.format = image_format return viz @@ -95,7 +92,10 @@ def get_id(powl): if isinstance(powl, Transition): return str(id(powl)) if isinstance(powl, OperatorPOWL): - return "clusterINVIS_" + str(id(powl)) + if OPERATOR_BOXES: + return "cluster_" + str(id(powl)) + else: + return "clusterINVIS_" + str(id(powl)) if isinstance(powl, StrictPartialOrder): return "cluster_" + str(id(powl)) @@ -103,25 +103,25 @@ def get_id(powl): def add_operator_edge(vis, current_node_id, child, directory='none', style=""): child_id = get_id(child) if child_id.startswith("cluster_"): - vis.edge(current_node_id, get_id_base(child), dir=directory, lhead=child_id, style=style) + vis.edge(current_node_id, get_id_base(child), dir=directory, lhead=child_id, style=style, minlen='2') else: vis.edge(current_node_id, get_id_base(child), dir=directory, style=style) -def add_order_edge(block, child_1, child_2, directory='forward', color=COLOR_ORDER, style=""): +def add_order_edge(block, child_1, child_2, directory='forward', color="black", style=""): child_id_1 = get_id(child_1) child_id_2 = get_id(child_2) if child_id_1.startswith("cluster_"): if child_id_2.startswith("cluster_"): block.edge(get_id_base(child_1), get_id_base(child_2), dir=directory, color=color, style=style, - ltail=child_id_1, lhead=child_id_2) + ltail=child_id_1, lhead=child_id_2, minlen='2') else: block.edge(get_id_base(child_1), get_id_base(child_2), dir=directory, color=color, style=style, - ltail=child_id_1) + ltail=child_id_1, minlen='2') else: if child_id_2.startswith("cluster_"): block.edge(get_id_base(child_1), get_id_base(child_2), dir=directory, color=color, style=style, - lhead=child_id_2) + lhead=child_id_2, minlen='2') else: block.edge(get_id_base(child_1), get_id_base(child_2), dir=directory, color=color, style=style) @@ -131,17 +131,31 @@ def repr_powl(powl, viz, color_map, parameters): font_size = str(font_size) this_node_id = str(id(powl)) - if isinstance(powl, Transition): + script_dir = os.path.dirname(os.path.realpath(__file__)) + + if isinstance(powl, FrequentTransition): + label = powl.activity + if powl.skippable: + label = label + "\n?" + if powl.selfloop: + label = label + "*" + elif powl.selfloop: + label = label + "\n*" + viz.node(this_node_id, label, shape="box", fontsize=font_size, + style='filled', fillcolor=COLOR_ACTIVITY) + elif isinstance(powl, Transition): if isinstance(powl, SilentTransition): - viz.node(this_node_id, SILENT_TRANSITION_LABEL, style='filled', fillcolor='black', shape='point', - width="0.2", fontsize=font_size) + viz.node(this_node_id, SILENT_TRANSITION_LABEL, style='filled', fillcolor='black', shape='square', + width='0.4', height='0.4', fixedsize="true") else: - viz.node(this_node_id, str(powl.label), color=COLOR_LEAF, fontcolor=COLOR_LEAF, fontsize=font_size) + viz.node(this_node_id, str(powl.label), shape='box', fontsize=font_size, style='filled', + fillcolor=COLOR_ACTIVITY) elif isinstance(powl, StrictPartialOrder): transitive_reduction = powl.order.get_transitive_reduction() with viz.subgraph(name=get_id(powl)) as block: - block.attr(style="") + block.attr(style="filled") + block.attr(fillcolor="white") for child in powl.children: repr_powl(child, block, color_map, parameters) for child in powl.children: @@ -151,18 +165,22 @@ def repr_powl(powl, viz, color_map, parameters): elif isinstance(powl, OperatorPOWL): with viz.subgraph(name=get_id(powl)) as block: - block.attr(style="invis") - node_color = COLOR_OPERATOR - block.node(this_node_id, powl.operator.__repr__(), color=node_color, fontcolor=node_color, - fontsize=font_size) + block.attr(style="filled") + block.attr(fillcolor=COLOR_LOOP) if powl.operator == Operator.LOOP: + block.node(this_node_id, image=os.path.join(script_dir, "loop.png"), label="", fontsize=font_size, + width='0.5', height='0.5', fixedsize="true", style="filled", fillcolor=COLOR_LOOP) do = powl.children[0] redo = powl.children[1] repr_powl(do, block, color_map, parameters) add_operator_edge(block, this_node_id, do) repr_powl(redo, block, color_map, parameters) add_operator_edge(block, this_node_id, redo, style="dashed") - else: + elif powl.operator == Operator.XOR: + block.attr(style="filled") + block.attr(fillcolor=COLOR_XOR) + block.node(this_node_id, image=os.path.join(script_dir, "xor.png"), label="", fontsize=font_size, + width='0.5', height='0.5', fixedsize="true", style="filled", fillcolor=COLOR_XOR) for child in powl.children: repr_powl(child, block, color_map, parameters) add_operator_edge(block, this_node_id, child) diff --git a/pm4py/visualization/powl/variants/net.py b/pm4py/visualization/powl/variants/net.py new file mode 100644 index 000000000..ff4a09ffd --- /dev/null +++ b/pm4py/visualization/powl/variants/net.py @@ -0,0 +1,286 @@ +import os + +from pm4py.objects.powl.obj import POWL +from pm4py.util import exec_utils +from enum import Enum +import tempfile +import graphviz +from graphviz import Digraph +from typing import Optional, Dict, Any +from pm4py.util import constants +from pm4py.objects.bpmn.obj import BPMN +from pm4py.objects.bpmn.util.sorting import get_sorted_nodes_edges +from pm4py.objects.conversion.powl.converter import apply as powl_to_pn +from pm4py.objects.bpmn.util import reduction + + +class Parameters(Enum): + FORMAT = "format" + RANKDIR = "rankdir" + FONT_SIZE = "font_size" + BGCOLOR = "bgcolor" + + +FREQUENCY_TAGS_BORDER = False + + +class SplitExclusiveGateway(BPMN.ExclusiveGateway): + pass + + +class JoinExclusiveGateway(BPMN.ExclusiveGateway): + pass + + +def can_connect_without_xor_violation(input_nodes, output_nodes): + for in_node in input_nodes: + for out_node in output_nodes: + if isinstance(in_node, BPMN.ExclusiveGateway) and len(output_nodes) > 1: + return False + if isinstance(out_node, BPMN.ExclusiveGateway) and len(input_nodes) > 1: + return False + + return True + + +def simplify_and_gateways(nodes, edges): + for n in nodes: + if isinstance(n, BPMN.ParallelGateway): + input_nodes = [edge[0] for edge in edges if edge[1] == n] + output_nodes = [edge[1] for edge in edges if edge[0] == n] + if can_connect_without_xor_violation(input_nodes, output_nodes): + nodes.remove(n) + edges = [edge for edge in edges if n not in edge] + + for in_node in input_nodes: + for out_node in output_nodes: + edges.append((in_node, out_node)) + return simplify_and_gateways(nodes, edges) + + return nodes, edges + + +def add_node(n, viz, parameters): + script_dir = os.path.dirname(os.path.realpath(__file__)) + font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, "16") + n_id = str(id(n)) + if isinstance(n, FrequencyTask): + if FREQUENCY_TAGS_BORDER: + peripheries = '1' + if n.selfloop: + peripheries = '2' + if n.skippable: + viz.node(n_id, shape="box", label=n.activity, fontsize=font_size, peripheries=peripheries, + style="dashed") + else: + viz.node(n_id, shape="box", label=n.activity, fontsize=font_size, peripheries=peripheries) + else: + label = n.activity + if n.skippable: + label = label + "\n?" + if n.selfloop: + label = label + "*" + elif n.selfloop: + label = label + "\n*" + viz.node(n_id, shape="box", label=label, fontsize=font_size) + elif isinstance(n, BPMN.StartEvent): + viz.node(n_id, image=os.path.join(script_dir, "play.png"), label="", shape="none", width='0.35', + height='0.35', fixedsize="true") + elif isinstance(n, BPMN.EndEvent): + viz.node(n_id, image=os.path.join(script_dir, "end.png"), label="", shape="none", width='0.35', + height='0.35', fixedsize="true") + elif isinstance(n, BPMN.ParallelGateway): + viz.node(n_id, label="", shape="square", fontsize=font_size, style="filled", fillcolor="black", width='0.3', + height='0.3') + elif isinstance(n, SplitExclusiveGateway): + viz.node(n_id, label="", shape="diamond", style="filled", fillcolor="lightgreen", fontsize=font_size, + width='0.4', height='0.4') + elif isinstance(n, JoinExclusiveGateway): + viz.node(n_id, label="", shape="diamond", style="filled", fillcolor="orange", fontsize=font_size, + width='0.4', height='0.4') + elif isinstance(n, BPMN.ExclusiveGateway): + viz.node(n_id, label="", shape="diamond", fontsize=font_size, + width='0.4', height='0.4') + else: + raise Exception("Unexpected instance of class " + str(type(n)) + "!") + + +def apply(powl: POWL, parameters: Optional[Dict[Any, Any]] = None) -> graphviz.Digraph: + pn_2, init_2, final_2 = powl_to_pn(powl) + bpmn_graph = to_bpmn(pn_2, init_2, final_2) + + nodes, edges = get_sorted_nodes_edges(bpmn_graph) + + for node in nodes: + + if isinstance(node, BPMN.ExclusiveGateway): + + incoming_edges = [e[0] for e in edges if e[1] is node] + outgoing_edges = [e[1] for e in edges if e[0] is node] + + if len(incoming_edges) == 1 and len(outgoing_edges) > 1: + + node.__class__ = SplitExclusiveGateway + + elif len(incoming_edges) > 1 and len(outgoing_edges) == 1: + + node.__class__ = JoinExclusiveGateway + + nodes, edges = simplify_and_gateways(nodes, edges) + + image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") + rankdir = exec_utils.get_param_value(Parameters.RANKDIR, parameters, "LR") + + bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, constants.DEFAULT_BGCOLOR) + + filename = tempfile.NamedTemporaryFile(suffix='.gv') + viz = Digraph("", filename=filename.name, engine='dot', graph_attr={'bgcolor': bgcolor}) + viz.graph_attr['rankdir'] = rankdir + + gateway_edges = {} + + for e in edges: + if isinstance(e[0], BPMN.ParallelGateway) or isinstance(e[0], BPMN.ParallelGateway): + if e[0] not in gateway_edges: + gateway_edges[e[0]] = {'in': [], 'out': []} + gateway_edges[e[0]]['out'].append(e[1]) + continue + if isinstance(e[1], BPMN.ParallelGateway) or isinstance(e[1], BPMN.ParallelGateway): + if e[1] not in gateway_edges: + gateway_edges[e[1]] = {'in': [], 'out': []} + gateway_edges[e[1]]['in'].append(e[0]) + continue + + for node in nodes: + add_node(node, viz, parameters) + add_concurrent_subgraphs(viz, find_concurrent_groups(nodes, edges)) + + for e in edges: + n_id_1 = str(id(e[0])) + n_id_2 = str(id(e[1])) + + viz.edge(n_id_1, n_id_2) + + viz.attr(overlap='false') + + viz.format = image_format + + return viz + + +class FrequencyTask(BPMN.Task): + def __init__(self, name, properties, id="", in_arcs=None, out_arcs=None, process=None): + super().__init__(id=id, name=name, in_arcs=in_arcs, out_arcs=out_arcs, process=process) + self.activity = properties["activity"] + self.skippable = properties["skippable"] + self.selfloop = properties["selfloop"] + + +def to_bpmn(net, im, fm): + """ + Converts an accepting Petri net into a BPMN diagram + + Parameters + -------------- + net + Petri net + im + initial marking + fm + final marking + + Returns + -------------- + bpmn_graph + BPMN diagram + """ + + bpmn_graph = BPMN() + + entering_dictio = {} + exiting_dictio = {} + + for place in net.places: + node = BPMN.ExclusiveGateway() + bpmn_graph.add_node(node) + entering_dictio[place] = node + exiting_dictio[place] = node + + for trans in net.transitions: + if trans.label is None: + if len(trans.in_arcs) > 1: + node = BPMN.ParallelGateway(gateway_direction=BPMN.Gateway.Direction.CONVERGING) + elif len(trans.out_arcs) > 1: + node = BPMN.ParallelGateway(gateway_direction=BPMN.Gateway.Direction.DIVERGING) + else: + node = BPMN.ExclusiveGateway(gateway_direction=BPMN.Gateway.Direction.UNSPECIFIED) + bpmn_graph.add_node(node) + entering_dictio[trans] = node + exiting_dictio[trans] = node + else: + if len(trans.in_arcs) > 1: + entering_node = BPMN.ParallelGateway(gateway_direction=BPMN.Gateway.Direction.CONVERGING) + else: + entering_node = BPMN.ExclusiveGateway(gateway_direction=BPMN.Gateway.Direction.UNSPECIFIED) + + if len(trans.out_arcs) > 1: + exiting_node = BPMN.ParallelGateway(gateway_direction=BPMN.Gateway.Direction.DIVERGING) + else: + exiting_node = BPMN.ExclusiveGateway(gateway_direction=BPMN.Gateway.Direction.UNSPECIFIED) + + task = FrequencyTask(name=trans.label, properties=trans.properties) + bpmn_graph.add_node(task) + + bpmn_graph.add_flow(BPMN.Flow(entering_node, task)) + bpmn_graph.add_flow(BPMN.Flow(task, exiting_node)) + + entering_dictio[trans] = entering_node + exiting_dictio[trans] = exiting_node + + for arc in net.arcs: + bpmn_graph.add_flow(BPMN.Flow(exiting_dictio[arc.source], entering_dictio[arc.target])) + + start_node = BPMN.StartEvent(name="start", isInterrupting=True) + end_node = BPMN.NormalEndEvent(name="end") + bpmn_graph.add_node(start_node) + bpmn_graph.add_node(end_node) + for place in im: + bpmn_graph.add_flow(BPMN.Flow(start_node, entering_dictio[place])) + for place in fm: + bpmn_graph.add_flow(BPMN.Flow(exiting_dictio[place], end_node)) + + bpmn_graph = reduction.apply(bpmn_graph) + + for node in bpmn_graph.get_nodes(): + node.set_process(bpmn_graph.get_process_id()) + + for edge in bpmn_graph.get_flows(): + edge.set_process(bpmn_graph.get_process_id()) + + return bpmn_graph + + +def find_concurrent_groups(nodes, edges): + predecessors = {n: [] for n in nodes} + successors = {n: [] for n in nodes} + for src, dst in edges: + successors[src].append(dst) + predecessors[dst].append(src) + + groups = {} + for node, preds in predecessors.items(): + if len(preds) == 1: + pred_id = str(id(preds[0])) + if pred_id not in groups: + groups[pred_id] = [] + groups[pred_id].append(node) + + return [group for group in groups.values() if len(group) > 1] + + +def add_concurrent_subgraphs(graph, concurrent_elements): + for group in concurrent_elements: + with graph.subgraph() as s: + s.attr(rank='same') + for n in group: + s.node(str(id(n))) diff --git a/pm4py/visualization/powl/visualizer.py b/pm4py/visualization/powl/visualizer.py index ea379c1d3..c788b9d83 100644 --- a/pm4py/visualization/powl/visualizer.py +++ b/pm4py/visualization/powl/visualizer.py @@ -1,6 +1,7 @@ from pm4py.visualization.common import gview from pm4py.visualization.common import save as gsave from pm4py.visualization.powl.variants import basic +from pm4py.visualization.powl.variants import net from enum import Enum from pm4py.util import exec_utils from typing import Optional, Dict, Any @@ -8,14 +9,16 @@ import graphviz -class Variants(Enum): +class POWLVisualizationVariants(Enum): BASIC = basic + NET = net -DEFAULT_VARIANT = Variants.BASIC +DEFAULT_VARIANT = POWLVisualizationVariants.BASIC -def apply(powl: POWL, parameters: Optional[Dict[Any, Any]] = None, variant=DEFAULT_VARIANT) -> graphviz.Graph: +def apply(powl: POWL, parameters: Optional[Dict[Any, Any]] = None, variant=DEFAULT_VARIANT, frequency_tags=True)\ + -> graphviz.Graph: """ Method for POWL model representation @@ -28,13 +31,19 @@ def apply(powl: POWL, parameters: Optional[Dict[Any, Any]] = None, variant=DEFAU Parameters.FORMAT -> Format of the image (PDF, PNG, SVG; default PNG) variant Variant of the algorithm to use: - - Variants.BASIC + - POWLVisualizationVariants.BASIC (default) + - POWLVisualizationVariants.NET: BPMN-like visualization with decision gates + frequency_tags + Simplify the visualization using frequency tags Returns ----------- gviz GraphViz object """ + if frequency_tags: + powl = powl.simplify_using_frequent_transitions() + return exec_utils.get_variant(variant).apply(powl, parameters=parameters)