Skip to content

Commit

Permalink
feat: PROBE log to process tree (#81)
Browse files Browse the repository at this point in the history
Co-authored-by: Sam Grayson <[email protected]>
  • Loading branch information
Acesif and charmoniumQ authored Jan 22, 2025
1 parent cdbc183 commit b59f043
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
25 changes: 25 additions & 0 deletions probe_src/python/probe_py/manual/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,28 @@ def color_hb_graph(prov_log: parser.ProvLog, process_graph: nx.DiGraph) -> None:
data["label"] += f"\n{TaskType(op.data.task_type).name} {op.data.task_id}"
elif isinstance(op.data, StatOp):
data["label"] += f"\n{op.data.path.path.decode()}"

def provlog_to_process_tree(prov_log: parser.ProvLog) -> nx.DiGraph:
process_tree = collections.defaultdict(list)

for pid, process in prov_log.processes.items():
for exec_epoch_no, exec_epoch in process.exec_epochs.items():
for tid, thread in exec_epoch.threads.items():
for op_index, op in enumerate(thread.ops):
op_data = op.data

if isinstance(op_data, CloneOp) and op_data.ferrno == 0:
child_pid = op_data.task_id
process_tree[pid].append(child_pid)

G = nx.DiGraph()

for parent_pid, children in process_tree.items():
if not G.has_node(parent_pid):
G.add_node(parent_pid, label=f"Process {parent_pid}")
for child_pid in children:
if not G.has_node(child_pid):
G.add_node(child_pid, label=f"Process {child_pid}")
G.add_edge(parent_pid, child_pid)

return G
20 changes: 20 additions & 0 deletions probe_src/python/probe_py/manual/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,26 @@ def nextflow(
script = g.generate_workflow(dataflow_graph)
output.write_text(script)

@export_app.command()
def provlog_to_process_tree(
output: Annotated[
pathlib.Path,
typer.Argument()
] = pathlib.Path("provlog-process-tree.png"),
probe_log: Annotated[
pathlib.Path,
typer.Argument(help="output file written by `probe record -o $file`."),
] = pathlib.Path("probe_log"),
) -> None:
"""
Write a process tree from probe_log.
Digraphs shows the clone ops of the parent process and the children.
"""
prov_log = parse_probe_log(probe_log)
digraph = analysis.provlog_to_process_tree(prov_log)
graph_utils.serialize_graph(digraph, output)


@export_app.command()
def ops_jsonl(
Expand Down

0 comments on commit b59f043

Please sign in to comment.