Merge branch 'PMPY-2002' into 'integration'

PMPY-2002 ILP Miner Closes PMPY-2002 See merge request process-mining/pm4py/pm4py-core!901
process-intelligence-solutions · Mar 11, 2023 · 73254a8 · 73254a8
2 parents 84e9d52 + 4294389
commit 73254a8
Show file tree

Hide file tree

Showing 8 changed files with 398 additions and 1 deletion.
diff --git a/examples/hybrid_ilp_miner.py b/examples/hybrid_ilp_miner.py
@@ -0,0 +1,12 @@
+import pm4py
+import os
+
+
+def execute_script():
+    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
+    net, im, fm = pm4py.discover_petri_net_ilp(log)
+    pm4py.view_petri_net(net, im, fm, format="svg")
+
+
+if __name__ == "__main__":
+    execute_script()
diff --git a/pm4py/__init__.py b/pm4py/__init__.py
@@ -15,7 +15,7 @@
     filter_ocel_end_events_per_object_type, filter_ocel_events_timestamp, filter_prefixes, filter_suffixes, \
     filter_four_eyes_principle, filter_activity_done_different_resources, filter_ocel_events, filter_ocel_objects, \
     filter_ocel_object_types, filter_ocel_cc_object
-from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_heuristics, \
+from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_ilp, discover_petri_net_heuristics, \
     discover_petri_net_inductive, discover_process_tree_inductive, discover_heuristics_net, \
     discover_dfg, discover_footprints, discover_eventually_follows_graph, discover_directly_follows_graph, discover_bpmn_inductive, \
     discover_performance_dfg, discover_transition_system, discover_prefix_tree, \

diff --git a/pm4py/algo/discovery/ilp/__init__.py b/pm4py/algo/discovery/ilp/__init__.py
@@ -0,0 +1 @@
+from pm4py.algo.discovery.ilp import algorithm, variants
diff --git a/pm4py/algo/discovery/ilp/algorithm.py b/pm4py/algo/discovery/ilp/algorithm.py
@@ -0,0 +1,37 @@
+from enum import Enum
+from pm4py.util import exec_utils
+from pm4py.algo.discovery.ilp.variants import classic
+from typing import Union, Optional, Dict, Any, Tuple
+from pm4py.objects.petri_net.obj import PetriNet, Marking
+from pm4py.objects.log.obj import EventLog, EventStream
+import pandas as pd
+
+
+class Variants(Enum):
+    CLASSIC = classic
+
+
+def apply(log: Union[EventLog, EventStream, pd.DataFrame], variant = Variants.CLASSIC, parameters: Optional[Dict[Any, Any]] = None) -> Tuple[PetriNet, Marking, Marking]:
+    """
+    Discovers a Petri net using the ILP miner.
+
+    Parameters
+    ---------------
+    log
+        Event log / Event stream / Pandas dataframe
+    variant
+        Variant of the algorithm to be used, possible values:
+        - Variants.CLASSIC
+    parameters
+        Variant-specific parameters
+
+    Returns
+    ---------------
+    net
+        Petri net
+    im
+        Initial marking
+    fm
+        Final marking
+    """
+    return exec_utils.get_variant(variant).apply(log, parameters)
diff --git a/pm4py/algo/discovery/ilp/variants/__init__.py b/pm4py/algo/discovery/ilp/variants/__init__.py
@@ -0,0 +1 @@
+from pm4py.algo.discovery.ilp.variants import classic
diff --git a/pm4py/algo/discovery/ilp/variants/classic.py b/pm4py/algo/discovery/ilp/variants/classic.py
@@ -0,0 +1,309 @@
+import numpy as np
+import networkx as nx
+
+from pm4py.objects.log.obj import EventLog, EventStream
+import pandas as pd
+from typing import Union, Optional, Dict, Any, Tuple, List
+from pm4py.objects.petri_net.obj import PetriNet, Marking
+from enum import Enum
+from pm4py.util import exec_utils, xes_constants, constants
+from pm4py.objects.conversion.log import converter as log_converter
+from pm4py.objects.log.util import filtering_utils
+from pm4py.util.lp import solver as lp_solver
+from pm4py.objects.petri_net.utils import petri_utils
+from pm4py.objects.log.util import artificial
+from copy import copy, deepcopy
+from pm4py.algo.discovery.causal import algorithm as causal_discovery
+from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
+from pm4py.objects.petri_net.utils import murata
+from pm4py.objects.petri_net.utils import reduction
+import pkgutil
+import warnings
+
+
+class Parameters(Enum):
+    ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
+    PARAM_ARTIFICIAL_START_ACTIVITY = constants.PARAM_ARTIFICIAL_START_ACTIVITY
+    PARAM_ARTIFICIAL_END_ACTIVITY = constants.PARAM_ARTIFICIAL_END_ACTIVITY
+    CAUSAL_RELATION = "causal_relation"
+    SHOW_PROGRESS_BAR = "show_progress_bar"
+    ALPHA = "alpha"
+
+
+def __transform_log_to_matrix(log: EventLog, activities: List[str], activity_key: str):
+    """
+    Internal method
+    Transforms the event log in a numeric matrix that is used to construct the linear problem.
+    """
+    matr = []
+    for trace in log:
+        vect = []
+        rep = np.array([0] * len(activities))
+        for i in range(len(trace)):
+            repi = rep.copy()
+            repi[activities.index(trace[i][activity_key])] += 1
+            vect.append(repi)
+            rep = repi
+        matr.append(vect)
+    return matr
+
+
+def __manage_solution(sol, added_places, explored_solutions, net, activities, trans_map):
+    """
+    Internal method.
+    Manages the solution of the linear problem and possibly adds it as a place of the Petri net
+    """
+    if sol.success:
+        sol = lp_solver.get_points_from_sol(sol, variant=lp_solver.SCIPY)
+        sol = [round(x) for x in sol]
+
+        if tuple(sol) not in added_places:
+            added_places.add(tuple(sol))
+            sol = np.array(sol)
+
+            x0 = np.array(sol[:len(activities)])
+            y0 = np.array(sol[len(activities):2 * len(activities)])
+
+            if max(x0) > 0 and max(y0) > 0:
+                place = PetriNet.Place(str(len(net.places)))
+                net.places.add(place)
+                i = 0
+                while i < len(activities):
+                    if sol[i] == 1:
+                        petri_utils.add_arc_from_to(trans_map[activities[i]], place, net)
+                    i = i + 1
+                while i < 2 * len(activities):
+                    if sol[i] == 1:
+                        petri_utils.add_arc_from_to(place, trans_map[activities[i - len(activities)]], net)
+                    i = i + 1
+            vec = x0.tolist() + (y0-1).tolist() + [sol[-1]]
+            b = sol[-1] - 1 + x0.sum()
+
+            explored_solutions.add((tuple(vec), b))
+
+            return vec, b
+
+    return None, None
+
+
+def apply(log0: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> Tuple[PetriNet, Marking, Marking]:
+    """
+    Discovers a Petri net using the ILP miner.
+
+    The implementation follows what is described in the scientific paper:
+    van Zelst, Sebastiaan J., et al.
+    "Discovering workflow nets using integer linear programming." Computing 100.5 (2018): 529-556.
+
+    Parameters
+    ---------------
+    log0
+        Event log / Event stream / Pandas dataframe
+    parameters
+        Parameters of the algorithm, including:
+        - Parameters.ACTIVITY_KEY => the attribute to be used as activity
+        - Parameters.SHOW_PROGRESS_BAR => decides if the progress bar should be shown
+
+    Returns
+    ---------------
+    net
+        Petri net
+    im
+        Initial marking
+    fm
+        Final marking
+    """
+    if parameters is None:
+        parameters = {}
+
+    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
+    artificial_start_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters,
+                                                           constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
+    artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
+                                                         constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+
+    log0 = log_converter.apply(log0, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
+    log0 = filtering_utils.keep_one_trace_per_variant(log0, parameters=parameters)
+    log = artificial.insert_artificial_start_end(deepcopy(log0), parameters=parameters)
+    # use the ALPHA causal relation if none is provided as parameter
+    causal = exec_utils.get_param_value(Parameters.CAUSAL_RELATION, parameters, causal_discovery.apply(dfg_discovery.apply(log, parameters=parameters)))
+    # noise threshold for the sequence encoding graph (when alpha=1, no filtering is applied; when alpha=0, the greatest filtering is applied)
+    alpha = exec_utils.get_param_value(Parameters.ALPHA, parameters, 1.0)
+
+    activities = sorted(list(set(x[activity_key] for trace in log for x in trace)))
+
+    # check if the causal relation satisfy the criteria for relaxed sound WF-nets
+    G = nx.DiGraph()
+    for ca in causal:
+        G.add_edge(ca[0], ca[1])
+
+    desc_start = set(nx.descendants(G, artificial_start_activity))
+    anc_end = set(nx.ancestors(G, artificial_end_activity))
+
+    if artificial_start_activity in desc_start or artificial_end_activity in anc_end or len(desc_start.union({artificial_start_activity}).difference(activities)) > 0 or len(anc_end.union({artificial_end_activity}).difference(activities)) > 0:
+        warnings.warn("The conditions needed to ensure a relaxed sound WF-net as output are not satisfied.")
+
+    matr = __transform_log_to_matrix(log, activities, activity_key)
+
+    net = PetriNet("ilp")
+    im = Marking()
+    fm = Marking()
+    source = PetriNet.Place("source")
+    sink = PetriNet.Place("sink")
+    net.places.add(source)
+    net.places.add(sink)
+    im[source] = 1
+    fm[sink] = 1
+    trans_map = {}
+
+    # STEP A) construction of the transitions of the Petri net.
+    # the source and sink place are connected respectively to the artificial start/end activities
+    for act in activities:
+        label = act if act not in [artificial_start_activity, artificial_end_activity] else None
+        trans_map[act] = PetriNet.Transition(act, label)
+        net.transitions.add(trans_map[act])
+
+        if act == artificial_start_activity:
+            petri_utils.add_arc_from_to(source, trans_map[act], net)
+        elif act == artificial_end_activity:
+            petri_utils.add_arc_from_to(trans_map[act], sink, net)
+
+    # STEP B) construction of the sequence encoding graph
+    seq_enc_graph = {}
+    for j in range(len(matr)):
+        trace = matr[j]
+        trace_occ = log0[j].attributes["@@num_traces"]
+        for i in range(len(trace)):
+            prev = -trace[i-1] if i > 0 else np.zeros(len(activities))
+            curr = trace[i]
+            prev = tuple(prev)
+            curr = tuple(curr)
+            if prev not in seq_enc_graph:
+                seq_enc_graph[prev] = {}
+            if curr not in seq_enc_graph[prev]:
+                seq_enc_graph[prev][curr] = 0
+            seq_enc_graph[prev][curr] += trace_occ
+    max_child_seq_enc_graph = {x: max(y.values()) for x, y in seq_enc_graph.items()}
+
+    # STEP C) construction of the base linear problem
+    # which will be 'extended' in each step
+    c = np.zeros(2*len(activities))
+    Aub = []
+    bub = []
+    Aeq = []
+    beq = []
+
+    added_rows_Aub = set()
+    added_rows_Aeq = set()
+
+    for trace in matr:
+        for i in range(len(trace)):
+            row1 = -trace[i-1] if i > 0 else np.zeros(len(activities))
+            row2 = trace[i]
+            prev = tuple(row1)
+            curr = tuple(row2)
+
+            if seq_enc_graph[prev][curr] >= (1-alpha) * max_child_seq_enc_graph[prev]:
+                row = row1.tolist() + row2.tolist() + [-1]
+                if i < len(trace)-1:
+                    if tuple(row) not in added_rows_Aub:
+                        added_rows_Aub.add(tuple(row))
+                        Aub.append(row)
+                        bub.append(0)
+                else:
+                    if tuple(row) not in added_rows_Aeq:
+                        # deviation 1: impose that the place is empty at the end of every trace of the log
+                        added_rows_Aeq.add(tuple(row))
+                        Aeq.append(row)
+                        beq.append(0)
+
+                crow = row2.tolist() + (-row2).tolist()
+                c += crow
+            else:
+                # break not only the current node but all his children
+                break
+
+    Aub.append([-1] * (2*len(activities)) + [0])
+    bub.append(-1)
+
+    for i in range(2*len(activities)+1):
+        row1 = [0] * (2*len(activities)+1)
+        row1[i] = -1
+        Aub.append(row1)
+        bub.append(0)
+        row2 = [0] * (2*len(activities)+1)
+        row2[i] = 1
+        Aub.append(row2)
+        bub.append(1)
+
+    # deviation 2: seek only for places that contains initially 0 tokens
+    const = [0] * (2 * len(activities)) + [1]
+    Aub.append(const)
+    bub.append(0)
+
+    c = c.tolist()
+    c.append(1)
+
+    integrality = [1] * (2*len(activities)+1)
+
+    ite = 0
+    added_places = set()
+    explored_solutions = set()
+
+    progress = None
+    if pkgutil.find_loader("tqdm") and show_progress_bar and len(causal) > 1:
+        from tqdm.auto import tqdm
+        progress = tqdm(total=len(causal), desc="discovering Petri net using ILP miner, completed causal relations :: ")
+
+    # STEP D) explore all the causal relations in the log
+    # to find places
+    for ca in causal:
+        Aeq1 = copy(Aeq)
+        beq1 = copy(beq)
+
+        const1 = [0] * (2*len(activities) + 1)
+        const1[activities.index(ca[0])] = 1
+        Aeq1.append(const1)
+        beq1.append(1)
+
+        const2 = [0] * (2*len(activities) + 1)
+        const2[len(activities) + activities.index(ca[1])] = 1
+        Aeq1.append(const2)
+        beq1.append(1)
+
+        sol = lp_solver.apply(c, Aub, bub, Aeq1, beq1, variant=lp_solver.SCIPY, parameters={"integrality": integrality})
+        __manage_solution(sol, added_places, explored_solutions, net, activities, trans_map)
+
+        ite += 1
+
+        if progress is not None:
+            progress.update()
+
+    # gracefully close progress bar
+    if progress is not None:
+        progress.close()
+    del progress
+
+
+    # here, we also implement the extensive research for places (closing the LP space to previous solutions)
+    # as described in the paper (possible STEP D)
+    """for tup in explored_solutions:
+        Aub.append(list(tup[0]))
+        bub.append(tup[1])
+    while True:
+        sol = lp_solver.apply(c, Aub, bub, Aeq, beq, variant=lp_solver.SCIPY, parameters={"integrality": integrality})
+
+        vec, b = __manage_solution(sol, added_places, explored_solutions, net, activities, trans_map)
+
+        if vec is not None:
+            Aub.append(vec)
+            bub.append(b)
+        ite += 1
+        if ite >= 25:
+            break"""
+
+    # STEP E) apply the reduction on the implicit places and on the invisible transitions
+    net, im, fm = murata.apply_reduction(net, im, fm)
+    net = reduction.apply_simple_reduction(net)
+
+    return net, im, fm
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from pm4py.algo.discovery.ilp import algorithm, variants
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from pm4py.algo.discovery.ilp.variants import classic