From 5eb1b588e3e459b18fb3140ce13b2f57aa2ab15f Mon Sep 17 00:00:00 2001 From: Saleha Muzammil <84681153+saleha-muzammil@users.noreply.github.com> Date: Mon, 22 Jul 2024 02:19:38 +0500 Subject: [PATCH] CLI Entry Points, Performance Tests, Execution Time Recording (#33) * performance_tests * Update performance_test.py * Update cli.py --- flake.nix | 1 + probe_src/benchmark_results.csv | 43 +++++++ probe_src/performance_test.py | 200 ++++++++++++++++++++++++++++++++ probe_src/probe_py/cli.py | 67 ++++++----- 4 files changed, 281 insertions(+), 30 deletions(-) create mode 100644 probe_src/benchmark_results.csv create mode 100644 probe_src/performance_test.py diff --git a/flake.nix b/flake.nix index 60dd88bf..79e5250a 100644 --- a/flake.nix +++ b/flake.nix @@ -36,6 +36,7 @@ buildInputs = [ (pkgs.python312.withPackages (pypkgs: [ + pypkgs.psutil pypkgs.typer pypkgs.pycparser pypkgs.pytest diff --git a/probe_src/benchmark_results.csv b/probe_src/benchmark_results.csv new file mode 100644 index 00000000..6cc2478e --- /dev/null +++ b/probe_src/benchmark_results.csv @@ -0,0 +1,43 @@ +Command,Phase,Return Code,CPU Times,Memory Info,IO Counters,Start Time,End Time,Duration (s) +"echo 'Hello, World!'",No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=917504, vms=3416064, shared=786432, text=581632, lib=0, data=339968, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:09.607989,2024-07-20 21:44:09.609945,0.001956 +"echo 'Hello, World!'",No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:13.610088,2024-07-20 21:44:13.615808,0.00572 +"echo 'Hello, World!'",Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:18.345653,2024-07-20 21:44:19.120744,0.775091 +"echo 'Hello, World!'",Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:23.120899,2024-07-20 21:44:24.157867,1.036968 +"echo 'Hello, World!'",No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=1384448, shared=262144, text=581632, lib=0, data=270336, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:29.163526,2024-07-20 21:44:30.008540,0.845014 +"echo 'Hello, World!'",Transcription,0,,,,2024-07-20 21:44:30.008540,2024-07-20 21:44:39.878468,9.869916 +"echo 'Hello, World!'",No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=3403776, shared=262144, text=581632, lib=0, data=270336, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:34.008693,2024-07-20 21:44:35.035150,1.026457 +ls -l,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:40.740135,2024-07-20 21:44:40.743915,0.00378 +ls -l,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:44.744288,2024-07-20 21:44:44.752138,0.00785 +ls -l,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=3403776, shared=262144, text=581632, lib=0, data=270336, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:49.562167,2024-07-20 21:44:50.672784,1.110617 +ls -l,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=3403776, shared=262144, text=581632, lib=0, data=294912, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:44:54.673119,2024-07-20 21:44:55.409416,0.736297 +ls -l,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=3403776, shared=262144, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:00.341989,2024-07-20 21:45:01.225244,0.883255 +ls -l,Transcription,0,,,,2024-07-20 21:45:01.225244,2024-07-20 21:45:11.126677,9.90142 +ls -l,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:05.225425,2024-07-20 21:45:06.042519,0.817094 +pwd,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:11.974744,2024-07-20 21:45:11.976932,0.002188 +pwd,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:15.977299,2024-07-20 21:45:15.983080,0.005781 +pwd,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:20.718086,2024-07-20 21:45:21.492533,0.774447 +pwd,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:25.493807,2024-07-20 21:45:26.256538,0.762731 +pwd,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:30.995152,2024-07-20 21:45:31.762180,0.767028 +pwd,Transcription,0,,,,2024-07-20 21:45:31.762180,2024-07-20 21:45:41.233242,9.471051 +pwd,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:35.762409,2024-07-20 21:45:36.499283,0.736874 +head ../flake.nix,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=3403776, shared=262144, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:42.009583,2024-07-20 21:45:42.012778,0.003195 +head ../flake.nix,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:46.012883,2024-07-20 21:45:46.016393,0.00351 +head ../flake.nix,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:50.742829,2024-07-20 21:45:51.520314,0.777485 +head ../flake.nix,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:45:55.520691,2024-07-20 21:45:56.247363,0.726672 +head ../flake.nix,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:01.003745,2024-07-20 21:46:01.745426,0.741681 +head ../flake.nix,Transcription,0,,,,2024-07-20 21:46:01.745426,2024-07-20 21:46:11.199127,9.453691 +head ../flake.nix,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:05.745795,2024-07-20 21:46:06.468805,0.72301 +python3 -c 'print(2 + 2)',No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:12.016689,2024-07-20 21:46:12.047306,0.030617 +python3 -c 'print(2 + 2)',No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=917504, vms=3416064, shared=786432, text=581632, lib=0, data=339968, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:16.047405,2024-07-20 21:46:16.090870,0.043465 +python3 -c 'print(2 + 2)',Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:21.036912,2024-07-20 21:46:22.131088,1.094176 +python3 -c 'print(2 + 2)',Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:26.131528,2024-07-20 21:46:26.940515,0.808987 +python3 -c 'print(2 + 2)',No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:31.729361,2024-07-20 21:46:32.520150,0.790789 +python3 -c 'print(2 + 2)',Transcription,0,,,,2024-07-20 21:46:32.520150,2024-07-20 21:46:42.038137,9.517977 +python3 -c 'print(2 + 2)',No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:36.520752,2024-07-20 21:46:37.273333,0.752581 +cat tasks.md,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:42.789425,2024-07-20 21:46:42.792475,0.00305 +cat tasks.md,No PROBE,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:46.792891,2024-07-20 21:46:46.800191,0.0073 +cat tasks.md,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3403776, shared=393216, text=581632, lib=0, data=348160, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:51.526029,2024-07-20 21:46:52.316843,0.790814 +cat tasks.md,Record,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=393216, vms=3416064, shared=393216, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:46:56.317292,2024-07-20 21:46:57.034834,0.717542 +cat tasks.md,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=262144, vms=1384448, shared=262144, text=581632, lib=0, data=270336, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:47:02.223380,2024-07-20 21:47:03.136350,0.91297 +cat tasks.md,Transcription,0,,,,2024-07-20 21:47:03.136350,2024-07-20 21:47:12.819107,9.682746 +cat tasks.md,No Transcribe,0,"pcputimes(user=0.0, system=0.0, children_user=0.0, children_system=0.0, iowait=0.0)","pmem(rss=655360, vms=3416064, shared=524288, text=581632, lib=0, data=360448, dirty=0)","pio(read_count=9, write_count=0, read_bytes=0, write_bytes=0, read_chars=4979, write_chars=0)",2024-07-20 21:47:07.136500,2024-07-20 21:47:08.086574,0.950074 diff --git a/probe_src/performance_test.py b/probe_src/performance_test.py new file mode 100644 index 00000000..2281233b --- /dev/null +++ b/probe_src/performance_test.py @@ -0,0 +1,200 @@ +import subprocess +import datetime +import csv +import psutil +import time +from dataclasses import dataclass + +@dataclass +class Result: + returncode: int + cpu_times: tuple + memory_info: tuple + io_counters: tuple + stdout: str + stderr: str + start_time: datetime.datetime + end_time: datetime.datetime + +def benchmark_command(command: str, warmup_iterations: int, benchmark_iterations: int) -> list[Result]: + results = [] + + for _ in range(warmup_iterations): + subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + for _ in range(benchmark_iterations): + start_time_psutil = datetime.datetime.now() + + proc = psutil.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + cpu_times = memory_info = io_counters = None + + if proc.is_running(): + try: + cpu_times = proc.cpu_times() + memory_info = proc.memory_info() + io_counters = proc.io_counters() + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + + stdout, stderr = proc.communicate() + + datetime.datetime.now() + + datetime.datetime.now() + returncode = proc.wait() + end_time_wait = datetime.datetime.now() + + result = Result( + returncode=returncode, + cpu_times=cpu_times, + memory_info=memory_info, + io_counters=io_counters, + stdout=stdout.decode('utf-8'), + stderr=stderr.decode('utf-8'), + start_time=start_time_psutil, + end_time=end_time_wait + ) + results.append(result) + time.sleep(4) # Pause for 4 seconds after each command + + return results + +def benchmark_with_transcription(commands_to_run, warmup_count, benchmark_count): + with open('benchmark_results.csv', mode='w', newline='') as csv_file: + fieldnames = ['Command', 'Phase', 'Return Code', 'CPU Times', 'Memory Info', 'IO Counters', + 'Start Time', 'End Time', 'Duration (s)'] + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + + for command_to_run in commands_to_run: + # Run the command without PROBE + print(f"Running benchmark for command (No PROBE): {command_to_run}") + no_probe_results = benchmark_command(command_to_run, warmup_count, benchmark_count) + + for idx, result in enumerate(no_probe_results, start=1): + print(f"Result {idx} (No PROBE):") + print(f"Return Code: {result.returncode}") + print(f"CPU Times: {result.cpu_times}") + print(f"Memory Info: {result.memory_info}") + print(f"I/O Counters: {result.io_counters}") + print(f"Start Time: {result.start_time}") + print(f"End Time: {result.end_time}") + print(f"STDOUT:\n{result.stdout}") + print(f"STDERR:\n{result.stderr}") + print("-" * 50) + + writer.writerow({ + 'Command': command_to_run, + 'Phase': 'No PROBE', + 'Return Code': result.returncode, + 'CPU Times': result.cpu_times, + 'Memory Info': result.memory_info, + 'IO Counters': result.io_counters, + 'Start Time': result.start_time, + 'End Time': result.end_time, + 'Duration (s)': (result.end_time - result.start_time).total_seconds() + }) + + # Run ./PROBE record for both execution and transcription + record_command = f"./PROBE record {command_to_run}" + print(f"Running benchmark for command (Record): {record_command}") + record_results = benchmark_command(record_command, warmup_count, benchmark_count) + + for idx, result in enumerate(record_results, start=1): + print(f"Result {idx} (Record):") + print(f"Return Code: {result.returncode}") + print(f"CPU Times: {result.cpu_times}") + print(f"Memory Info: {result.memory_info}") + print(f"I/O Counters: {result.io_counters}") + print(f"Start Time: {result.start_time}") + print(f"End Time: {result.end_time}") + print(f"STDOUT:\n{result.stdout}") + print(f"STDERR:\n{result.stderr}") + print("-" * 50) + + writer.writerow({ + 'Command': command_to_run, + 'Phase': 'Record', + 'Return Code': result.returncode, + 'CPU Times': result.cpu_times, + 'Memory Info': result.memory_info, + 'IO Counters': result.io_counters, + 'Start Time': result.start_time, + 'End Time': result.end_time, + 'Duration (s)': (result.end_time - result.start_time).total_seconds() + }) + + # Run ./PROBE record --no-transcribe for execution only + no_transcribe_command = f"./PROBE record --no-transcribe {command_to_run}" + print(f"Running benchmark for command (No Transcribe): {no_transcribe_command}") + no_transcribe_results = benchmark_command(no_transcribe_command, warmup_count, benchmark_count) + + for idx, result in enumerate(no_transcribe_results, start=1): + print(f"Result {idx} (No Transcribe):") + print(f"Return Code: {result.returncode}") + print(f"CPU Times: {result.cpu_times}") + print(f"Memory Info: {result.memory_info}") + print(f"I/O Counters: {result.io_counters}") + print(f"Start Time: {result.start_time}") + print(f"End Time: {result.end_time}") + print(f"STDOUT:\n{result.stdout}") + print(f"STDERR:\n{result.stderr}") + print("-" * 50) + + writer.writerow({ + 'Command': command_to_run, + 'Phase': 'No Transcribe', + 'Return Code': result.returncode, + 'CPU Times': result.cpu_times, + 'Memory Info': result.memory_info, + 'IO Counters': result.io_counters, + 'Start Time': result.start_time, + 'End Time': result.end_time, + 'Duration (s)': (result.end_time - result.start_time).total_seconds() + }) + + # Run ./PROBE transcribe-only using the temporary probe directory + if result.returncode == 0: + probe_log_dir = result.stdout.strip().split(': ')[-1] # Extracting the probe log directory + transcribe_command = f"./PROBE transcribe-only {probe_log_dir} --output probe_log" + print(f"Running transcription for command: {command_to_run}") + transcribe_proc = subprocess.run(transcribe_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + transcribe_err = transcribe_proc.stderr + if transcribe_proc.returncode == 0: + transcribe_duration_seconds = (datetime.datetime.now() - result.end_time).total_seconds() + + writer.writerow({ + 'Command': command_to_run, + 'Phase': 'Transcription', + 'Return Code': transcribe_proc.returncode, + 'CPU Times': '', + 'Memory Info': '', + 'IO Counters': '', + 'Start Time': result.end_time, + 'End Time': datetime.datetime.now(), + 'Duration (s)': transcribe_duration_seconds + }) + + print(f"Transcription completed for command: {command_to_run}") + else: + print(f"Error in transcription for command: {command_to_run}") + print(f"Error message:\n{transcribe_err.decode('utf-8')}") + else: + print(f"Skipping transcription for command due to previous error: {command_to_run}") + +if __name__ == "__main__": + commands_to_run = [ + "echo 'Hello, World!'", + "ls -l", + "pwd", + "head ../flake.nix", + "python3 -c 'print(2 + 2)'", + "cat tasks.md" + ] + warmup_count = 1 + benchmark_count = 2 + + benchmark_with_transcription(commands_to_run, warmup_count, benchmark_count) + diff --git a/probe_src/probe_py/cli.py b/probe_src/probe_py/cli.py index 04b6aec3..295f31a6 100644 --- a/probe_src/probe_py/cli.py +++ b/probe_src/probe_py/cli.py @@ -11,15 +11,42 @@ from . import analysis from . import util - project_root = pathlib.Path(__file__).resolve().parent.parent - A = typing_extensions.Annotated - app = typer.Typer() +def transcribe(probe_dir: pathlib.Path, output: pathlib.Path, debug: bool = False) -> None: + """ + Transcribe the recorded data from PROBE_DIR into OUTPUT. + """ + probe_log_tar_obj = tarfile.open(name=str(output), mode="x:gz") + probe_log_tar_obj.add(probe_dir, arcname="") + probe_log_tar_obj.addfile( + util.default_tarinfo("README"), + fileobj=io.BytesIO(b"This archive was generated by PROBE."), + ) + probe_log_tar_obj.close() + if debug: + print() + print("PROBE log files:") + for path in probe_dir.glob("**/*"): + if not path.is_dir(): + print(path, path.stat().st_size) + print() + shutil.rmtree(probe_dir) + +@app.command() +def transcribe_only( + input_dir: pathlib.Path, + output: pathlib.Path = pathlib.Path("probe_log"), + debug: bool = typer.Option(default=False, help="Run in verbose mode"), +) -> None: + """ + Transcribe the recorded data from INPUT_DIR into OUTPUT. + """ + transcribe(input_dir, output, debug) @app.command( context_settings=dict( @@ -32,9 +59,10 @@ def record( debug: bool = typer.Option(default=False, help="Run verbose & debug build of libprobe"), make: bool = typer.Option(default=False, help="Run make prior to executing"), output: pathlib.Path = pathlib.Path("probe_log"), + no_transcribe: bool = typer.Option(default=False, help="Only execute without transcribing"), ) -> None: """ - Execute CMD... and record its provenance into OUTPUT. + Execute CMD... and optionally record its provenance into OUTPUT. """ if make: proc = subprocess.run( @@ -63,31 +91,13 @@ def record( env={**os.environ, "LD_PRELOAD": ld_preload, "__PROBE_DIR": str(probe_dir)}, ) - # Before this point is "executing with libprobe" - # After this point is "transcription" - # TODO: break this up into three CLI entrypoints - # 1. PROBE record should do both - # 2. PROBE record --no-transcribe should just do the execution - # 3. PROBE transcribe should just do the transcription - - probe_log_tar_obj = tarfile.open(name=str(output), mode="x:gz") - probe_log_tar_obj.add(probe_dir, arcname="") - probe_log_tar_obj.addfile( - util.default_tarinfo("README"), - fileobj=io.BytesIO(b"This archive was generated by PROBE."), - ) - probe_log_tar_obj.close() - if debug: - print() - print("PROBE log files:") - for path in probe_dir.glob("**/*"): - if not path.is_dir(): - print(path, path.stat().st_size) - print() - shutil.rmtree(probe_dir) + if no_transcribe: + typer.secho(f"Temporary probe directory: {probe_dir}", fg=typer.colors.YELLOW) + raise typer.Exit(proc.returncode) + + transcribe(probe_dir, output, debug) raise typer.Exit(proc.returncode) - @app.command() def process_graph( input: pathlib.Path = pathlib.Path("probe_log"), @@ -102,8 +112,6 @@ def process_graph( process_tree_prov_log = parse_probe_log.parse_probe_log_tar(probe_log_tar_obj) probe_log_tar_obj.close() print(analysis.construct_process_graph(process_tree_prov_log)) - - @app.command() def dump( @@ -127,4 +135,3 @@ def dump( if __name__ == "__main__": app() -