Skip to content

Commit

Permalink
Merge branch 'powl_ISJ' into 'integration'
Browse files Browse the repository at this point in the history
POWL ISJournal updates

See merge request process-mining/pm4py/pm4py-core!1185
  • Loading branch information
fit-alessandro-berti committed Dec 14, 2023
2 parents 320350c + 29d96f2 commit 3769966
Show file tree
Hide file tree
Showing 38 changed files with 1,121 additions and 260 deletions.
6 changes: 4 additions & 2 deletions examples/powl_discovery.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import pm4py
from examples import examples_conf
from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant
from pm4py.visualization.powl.visualizer import POWLVisualizationVariants


def execute_script():
log = pm4py.read_xes("../tests/input_data/helpdesk.xes.gz", return_legacy_log_object=True)

# discovers the POWL model
powl_model = pm4py.discover_powl(log)
powl_model = pm4py.discover_powl(log, variant=POWLDiscoveryVariant.DYNAMIC_CLUSTERING, order_graph_filtering_threshold=0.6)

# prints the repr of the POWL model
print(powl_model)

# views the POWL model on the screen
pm4py.view_powl(powl_model, format=examples_conf.TARGET_IMG_FORMAT)
pm4py.view_powl(powl_model, format=examples_conf.TARGET_IMG_FORMAT, variant=POWLVisualizationVariants.NET)

# converts the POWL model to a Petri net (which can be used for conformance checking)
net, im, fm = pm4py.convert_to_petri_net(powl_model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from enum import Enum
import sys
from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY
from typing import Optional, Dict, Any, Union
from typing import Optional, Dict, Any, Union, List as TList
from pm4py.objects.log.obj import Trace
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.util import typing
Expand Down Expand Up @@ -320,7 +320,7 @@ def repr_searchtuple ( st: utils.DijkstraSearchTuple ) -> str :



def rec_hub (hub, curr_m) -> list[list[utils.DijkstraSearchTuple]]:
def rec_hub (hub, curr_m) -> TList[TList[utils.DijkstraSearchTuple]]:

out = []
for curr_st in hub[curr_m] :
Expand Down
31 changes: 16 additions & 15 deletions pm4py/algo/discovery/powl/algorithm.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL
from pm4py.algo.discovery.powl.inductive.variants.im_base import IMBasePOWL
from pm4py.algo.discovery.powl.inductive.variants.im_brute_force import BruteForcePOWL
from pm4py.algo.discovery.powl.inductive.variants.im_cluster import ClusterPOWL
from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant

from pm4py import util as pmutil
from pm4py.algo.discovery.powl.inductive.variants.im_dynamic_clustering_frequencies import \
POWLInductiveMinerDynamicClusteringFrequency
from pm4py.algo.discovery.powl.inductive.variants.im_tree import IMBasePOWL
from pm4py.algo.discovery.powl.inductive.variants.im_brute_force import POWLInductiveMinerBruteForce
from pm4py.algo.discovery.powl.inductive.variants.im_maximal import POWLInductiveMinerMaximalOrder
from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_varaints import POWLDiscoveryVariant

from pm4py import util
from pm4py.algo.discovery.inductive.algorithm import Parameters
from pm4py.objects.powl.obj import POWL

Expand All @@ -19,23 +21,25 @@


def get_variant(variant: POWLDiscoveryVariant) -> Type[IMBasePOWL]:
if variant == POWLDiscoveryVariant.IM_BASE:
if variant == POWLDiscoveryVariant.TREE:
return IMBasePOWL
elif variant == POWLDiscoveryVariant.BRUTE_FORCE:
return BruteForcePOWL
elif variant == POWLDiscoveryVariant.CLUSTER:
return ClusterPOWL
return POWLInductiveMinerBruteForce
elif variant == POWLDiscoveryVariant.MAXIMAL:
return POWLInductiveMinerMaximalOrder
elif variant == POWLDiscoveryVariant.DYNAMIC_CLUSTERING:
return POWLInductiveMinerDynamicClusteringFrequency
else:
raise Exception('Invalid Variant!')


def apply(obj: Union[EventLog, pd.DataFrame, UVCL], parameters: Optional[Dict[Any, Any]] = None,
variant=POWLDiscoveryVariant.CLUSTER, simplify_using_frequent_transitions=False) -> POWL:
variant=POWLDiscoveryVariant.MAXIMAL) -> POWL:
if parameters is None:
parameters = {}
ack = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY)
tk = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_util.DEFAULT_TIMESTAMP_KEY)
cidk = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, pmutil.constants.CASE_CONCEPT_NAME)
cidk = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, util.constants.CASE_CONCEPT_NAME)
if type(obj) in [EventLog, pd.DataFrame]:
uvcl = comut.get_variants(comut.project_univariate(obj, key=ack, df_glue=cidk, df_sorting_criterion_key=tk))
else:
Expand All @@ -44,9 +48,6 @@ def apply(obj: Union[EventLog, pd.DataFrame, UVCL], parameters: Optional[Dict[An
algorithm = get_variant(variant)
im = algorithm(parameters)
res = im.apply(IMDataStructureUVCL(uvcl), parameters)

res = res.simplify()
if simplify_using_frequent_transitions:
return res.simplify_using_frequent_transitions()

return res
4 changes: 2 additions & 2 deletions pm4py/algo/discovery/powl/inductive/fall_through/factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from multiprocessing import Pool, Manager
from typing import List, TypeVar, Tuple, Optional, Dict, Any, Type, Union
from typing import List, TypeVar, Tuple, Optional, Dict, Any, Type

from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructure, IMDataStructureUVCL
from pm4py.algo.discovery.inductive.fall_through.abc import FallThrough
Expand All @@ -25,7 +25,7 @@ def get_fall_throughs(cls, obj: T, parameters: Optional[Dict[str, Any]] = None)
return list()

@classmethod
def fall_through(cls, obj: T, pool: Pool, manager: Manager, parameters: Optional[Dict[str, Any]] = None) -> Union[Tuple[POWL, List[T]], None]:
def fall_through(cls, obj: T, pool: Pool, manager: Manager, parameters: Optional[Dict[str, Any]] = None) -> Tuple[POWL, List[T]] | None:
for f in FallThroughFactory.get_fall_throughs(obj):
r = f.apply(obj, pool, manager, parameters)
if r is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ def apply(cls, obj: IMDataStructureUVCL, pool: Pool = None, manager: Manager = N
proj = cls._get_projected_log(log)
if sum(proj.values()) > sum(log.values()):
return OperatorPOWL(Operator.LOOP, []), [IMDataStructureUVCL(proj), IMDataStructureUVCL(Counter())]

Empty file.
43 changes: 43 additions & 0 deletions pm4py/algo/discovery/powl/inductive/utils/filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from enum import Enum, auto
from collections import Counter
from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL


class FilteringType(Enum):
DYNAMIC = auto()
DECREASING_FACTOR = auto()


DEFAULT_FILTERING_TYPE = FilteringType.DECREASING_FACTOR
FILTERING_THRESHOLD = "filtering_threshold"
FILTERING_TYPE = "filtering_type"


def filter_most_frequent_variants(log):
to_remove_freq = min([freq for var, freq in log.items()])
new_log = Counter()
for var, freq in log.items():
if freq == to_remove_freq:
continue
new_log[var] = freq

return IMDataStructureUVCL(new_log)


def filter_most_frequent_variants_with_decreasing_factor(log, decreasing_factor):
sorted_variants = sorted(log, key=log.get, reverse=True)
new_log = Counter()

already_added_sum = 0
prev_var_count = -1

for variant in sorted_variants:
frequency = log[variant]
if already_added_sum == 0 or frequency > decreasing_factor * prev_var_count:
new_log[variant] = frequency
already_added_sum = already_added_sum + frequency
prev_var_count = frequency
else:
break

return IMDataStructureUVCL(new_log)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from pm4py.objects.dfg import util as dfu
from pm4py.statistics.eventually_follows.uvcl.get import apply as to_efg

# MAX_NUM_PARTITIONS = 1000

def remove(blocks, g):
res = []
Expand Down Expand Up @@ -47,14 +46,14 @@ def get_partitions_of_size_k(nodes, k=None):
elif k > n:
return

def set_partitions_helper(L, k):
n = len(L)
def set_partitions_helper(l, k):
length = len(l)
if k == 1:
yield [tuple(L)]
elif n == k:
yield [tuple([s]) for s in L]
yield [tuple(l)]
elif length == k:
yield [tuple([s]) for s in l]
else:
e, *M = L
e, *M = l
for p in set_partitions_helper(M, k - 1):
yield [tuple([e]), *p]
for p in set_partitions_helper(M, k):
Expand All @@ -69,13 +68,9 @@ def set_partitions_helper(L, k):


def partition(collection):
# count = 1
i = len(collection)
while i > 1:
for part in get_partitions_of_size_k(collection, i):
# if count > MAX_NUM_PARTITIONS:
# return
# count = count + 1
yield part
i = i - 1
return
Expand Down Expand Up @@ -167,14 +162,14 @@ def holds(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[
efg = to_efg(obj)
alphabet = sorted(dfu.get_vertices(dfg_graph), key=lambda g: g.__str__())
for part in partition(alphabet):
# print(part)
po = generate_order(part, efg)
if is_valid_order(po, dfg_graph, efg):
return po
return None

@classmethod
def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[Tuple[StrictPartialOrder, List[POWL]]]:
def apply(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> Optional[
Tuple[StrictPartialOrder, List[POWL]]]:
g = cls.holds(obj, parameters)
if g is None:
return g
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,24 @@
from pm4py.algo.discovery.powl.inductive.cuts.loop import POWLLoopCutUVCL
from pm4py.algo.discovery.powl.inductive.cuts.sequence import POWLStrictSequenceCutUVCL
from pm4py.algo.discovery.powl.inductive.cuts.xor import POWLExclusiveChoiceCutUVCL
from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL, IMDataStructure
from pm4py.algo.discovery.inductive.dtypes.im_ds import IMDataStructureUVCL
from pm4py.algo.discovery.powl.inductive.variants.brute_force.bf_partial_order_cut import BruteForcePartialOrderCutUVCL
from pm4py.algo.discovery.powl.inductive.variants.powl_discovery_variants import POWLDiscoveryVariant
from pm4py.objects.powl.obj import POWL


class CutFactoryPOBF(CutFactory):
class CutFactoryPOWLBruteForce(CutFactory):

@classmethod
def get_cuts(cls, obj: T, inst: POWLDiscoveryVariant, parameters: Optional[Dict[str, Any]] = None) -> List[Type[S]]:
def get_cuts(cls, obj: T, parameters: Optional[Dict[str, Any]] = None) -> List[Type[S]]:
if type(obj) is IMDataStructureUVCL:
return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL, BruteForcePartialOrderCutUVCL]
return [POWLExclusiveChoiceCutUVCL, POWLStrictSequenceCutUVCL, POWLConcurrencyCutUVCL, POWLLoopCutUVCL,
BruteForcePartialOrderCutUVCL]
return list()

@classmethod
def find_cut(cls, obj: IMDataStructure, inst: POWLDiscoveryVariant, parameters: Optional[Dict[str, Any]] = None) -> Optional[
Tuple[POWL, List[T]]]:
for c in CutFactoryPOBF.get_cuts(obj, inst):
def find_cut(cls, obj: IMDataStructureUVCL, parameters: Optional[Dict[str, Any]] = None) -> Optional[
Tuple[POWL, List[T]]]:
for c in CutFactoryPOWLBruteForce.get_cuts(obj):
r = c.apply(obj, parameters)
if r is not None:
return r
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from pm4py.algo.discovery.powl.inductive.variants.dynamic_clustering import *
Loading

0 comments on commit 3769966

Please sign in to comment.