From 6f3432a2a7653035963b8fc00d1ead8829b9842c Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 25 Jul 2024 15:25:57 -0500 Subject: [PATCH 01/18] Glue libprobe and probe_frontend together --- flake.lock | 77 +++++++++- flake.nix | 143 +++++++++++++++--- probe_src/arena/{arena.c => arena.h} | 5 +- probe_src/arena/test_arena.c | 2 +- .../include => libprobe-interface}/prov_ops.h | 0 probe_src/libprobe/Makefile | 20 +-- probe_src/libprobe/src/lib.c | 4 +- probe_src/probe_frontend/configure | 7 - probe_src/probe_frontend/flake.lock | 120 --------------- probe_src/probe_frontend/lib/build.rs | 6 +- probe_src/probe_frontend/lib/src/ops.rs | 2 +- .../{flake.nix => rust-stuff.nix} | 60 ++------ 12 files changed, 223 insertions(+), 223 deletions(-) rename probe_src/arena/{arena.c => arena.h} (99%) rename probe_src/{libprobe/include => libprobe-interface}/prov_ops.h (100%) delete mode 100755 probe_src/probe_frontend/configure delete mode 100644 probe_src/probe_frontend/flake.lock rename probe_src/probe_frontend/{flake.nix => rust-stuff.nix} (84%) diff --git a/flake.lock b/flake.lock index a350a41a..4728dbc9 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,41 @@ { "nodes": { + "advisory-db": { + "flake": false, + "locked": { + "lastModified": 1721864173, + "narHash": "sha256-tQn2ZPLAH6u2nAfV8Ac/2HPS5giBi0iceCp4g9SqWAU=", + "owner": "rustsec", + "repo": "advisory-db", + "rev": "af0e1b678a23ebd04efa0d0f63f98ad46781077d", + "type": "github" + }, + "original": { + "owner": "rustsec", + "repo": "advisory-db", + "type": "github" + } + }, + "crane": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1721842668, + "narHash": "sha256-k3oiD2z2AAwBFLa4+xfU+7G5fisRXfkvrMTCJrjZzXo=", + "owner": "ipetkov", + "repo": "crane", + "rev": "529c1a0b1f29f0d78fa3086b8f6a134c71ef3aaf", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, "flake-utils": { "inputs": { "systems": "systems" @@ -20,20 +56,47 @@ }, "nixpkgs": { "locked": { - "lastModified": 0, - "narHash": "sha256-4zSIhSRRIoEBwjbPm3YiGtbd8HDWzFxJjw5DYSDy1n8=", - "path": "/nix/store/z71lmgd0ydfnax1b13zbrls5idf1y7ak-source", - "type": "path" + "lastModified": 1721782431, + "narHash": "sha256-UNDpwjYxNXQet/g3mgRLsQ9zxrbm9j2JEvP4ijF3AWs=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "4f02464258baaf54992debfd010a7a3662a25536", + "type": "github" }, "original": { - "id": "nixpkgs", - "type": "indirect" + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" } }, "root": { "inputs": { + "advisory-db": "advisory-db", + "crane": "crane", "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1721928877, + "narHash": "sha256-bW2ClCWzGCytPbUnqZwU8P1YsLW07uEs80EfHEctc0Q=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "8b81b8ed00b20fd57b24adcb390bd96ea81ecd90", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" } }, "systems": { diff --git a/flake.nix b/flake.nix index db2a0de8..9c835b21 100644 --- a/flake.nix +++ b/flake.nix @@ -1,36 +1,129 @@ { - inputs.flake-utils.url = "github:numtide/flake-utils"; + inputs = { + nixpkgs = { + url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + }; + flake-utils = { + url = "github:numtide/flake-utils"; + }; + + crane = { + url = "github:ipetkov/crane"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + advisory-db = { + url = "github:rustsec/advisory-db"; + flake = false; + }; + + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; outputs = { self, nixpkgs, + crane, flake-utils, - }: - flake-utils.lib.eachDefaultSystem ( - system: let - pkgs = nixpkgs.legacyPackages.${system}; - inherit (pkgs) lib; - python312-debug = pkgs.python312.overrideAttrs (oldAttrs: { - configureFlags = oldAttrs.configureFlags ++ ["--with-pydebug"]; - # patches = oldAttrs.patches ++ [ ./python.patch ]; - }); - export-and-rename = pkg: file-pairs: - pkgs.stdenv.mkDerivation { - pname = "${pkg.pname}-only-bin"; + advisory-db, + rust-overlay, + ... + }@inputs: let + supported-systems = [ + "x86_64-linux" + "i686-linux" + "aarch64-linux" + "armv7l-linux" + ]; + in + flake-utils.lib.eachSystem supported-systems (system: let + pkgs = import nixpkgs { + inherit system; + overlays = [(import rust-overlay)]; + }; + inherit (pkgs) lib; + python312-debug = pkgs.python312.overrideAttrs (oldAttrs: { + configureFlags = oldAttrs.configureFlags ++ ["--with-pydebug"]; + # patches = oldAttrs.patches ++ [ ./python.patch ]; + }); + export-and-rename = pkg: file-pairs: + pkgs.stdenv.mkDerivation { + pname = "${pkg.pname}-only-bin"; + dontUnpack = true; + version = pkg.version; + buildInputs = [ pkg ]; + buildPhase = + "\n" + (builtins.map + (pairs: "install -D ${pkg}/${builtins.elemAt pairs 0} $out/${builtins.elemAt pairs 1}") + file-pairs); + }; + rust-stuff = (import ./probe_src/probe_frontend/rust-stuff.nix) ({ inherit system pkgs; } // inputs); + in { + packages = rec { + python-dbg = python312-debug; + libprobe-interface = pkgs.stdenv.mkDerivation { + pname = "libprobe-interface"; + version = "0.1.0"; + src = ./probe_src/libprobe-interface; + dontBuild = true; + installPhase = '' + install -D --target-directory $out/include/libprobe *.h + ''; + }; + arena = pkgs.stdenv.mkDerivation { + pname = "arena"; + version = "0.1.0"; + src = ./probe_src/arena; + dontBuild = true; + installPhase = '' + install -D --target-directory $out/include/arena *.h + ''; + }; + libprobe = pkgs.stdenv.mkDerivation rec { + pname = "libprobe"; + version = "0.1.0"; + src = ./probe_src/libprobe; + makeFlags = [ "INSTALL_PREFIX=$(out)" "SOURCE_VERSION=${version}" ]; + buildInputs = [ + libprobe-interface + arena + (pkgs.python312.withPackages (pypkgs: [ + pypkgs.pycparser + ])) + ]; + }; + bundled-probe = pkgs.stdenv.mkDerivation rec { + pname = "bundled-probe"; + version = "0.1.0"; dontUnpack = true; - version = pkg.version; - buildInputs = [pkg]; - buildPhase = - builtins.concatStringsSep - "\n" - (builtins.map - (pairs: "install -D ${pkg}/${builtins.elemAt pairs 0} $out/${builtins.elemAt pairs 1}") - file-pairs); + dontBuild = true; + nativeBuildInputs = [ pkgs.makeWrapper ]; + installPhase = '' + mkdir $out $out/bin + makeWrapper \ + ${self.packages.${system}.probe-cli}/bin/probe \ + $out/bin/PROBE \ + --set __PROBE_LIB ${self.packages.${system}.libprobe}/lib + ''; }; - in { - packages = { - python-dbg = python312-debug; - }; + probe-py-generated = pkgs.python312.pkgs.buildPythonPackage rec { + pname = "probe_py.generated"; + version = "0.1.0"; + pyproject = true; + build-system = [ + pkgs.python312Packages.flit-core + ]; + unpackPhase = '' + cp --recursive ${self.packages.${system}.probe-frontend}/python/* /build + ls /build + ''; + }; + } // rust-stuff.packages; + checks = self.packages.${system} // rust-stuff.checks; devShells = { default = pkgs.mkShell { buildInputs = diff --git a/probe_src/arena/arena.c b/probe_src/arena/arena.h similarity index 99% rename from probe_src/arena/arena.c rename to probe_src/arena/arena.h index 97a9483a..078c2249 100644 --- a/probe_src/arena/arena.c +++ b/probe_src/arena/arena.h @@ -1,5 +1,4 @@ -#ifndef ARENA -#define ARENA +#pragma once #define _GNU_SOURCE #ifdef PYCPARSER @@ -334,5 +333,3 @@ __attribute__((unused)) static bool arena_is_initialized(struct ArenaDir* arena_ ); return arena_dir->__tail != NULL; } - -#endif // ARENA diff --git a/probe_src/arena/test_arena.c b/probe_src/arena/test_arena.c index 1df5e7cc..30a353b6 100644 --- a/probe_src/arena/test_arena.c +++ b/probe_src/arena/test_arena.c @@ -6,7 +6,7 @@ #include #include #define ARENA_PERROR -#include "arena.c" +#include "arena.h" int main() { struct stat stat_buf; diff --git a/probe_src/libprobe/include/prov_ops.h b/probe_src/libprobe-interface/prov_ops.h similarity index 100% rename from probe_src/libprobe/include/prov_ops.h rename to probe_src/libprobe-interface/prov_ops.h diff --git a/probe_src/libprobe/Makefile b/probe_src/libprobe/Makefile index 6ac69a64..8662124c 100644 --- a/probe_src/libprobe/Makefile +++ b/probe_src/libprobe/Makefile @@ -1,10 +1,9 @@ SOURCE_VERSION ?= $(shell git rev-parse --short HEAD) -CFLAGS ?= -DSOURCE_VERSION=\"$(SOURCE_VERSION)\" -Wno-cast-function-type -Wno-array-parameter -ffreestanding -Wl,--as-needed -Wall -Wextra -pthread -DBGCFLAGS ?= $(CFLAGS) -Og -g -OPTCFLAGS ?= $(CFLAGS) -O3 -DNDEBUG -LIBCFLAGS ?= -fPIC -nostdlib -shared -SOURCE_FILES := $(wildcard src/*.c) $(wildcard include/*.h) +CFLAGS ?= -DSOURCE_VERSION=\"$(SOURCE_VERSION)\" -Wno-cast-function-type -Wno-array-parameter -ffreestanding -Wl,--as-needed -Wall -Wextra -pthread -fPIC -nostdlib -shared $(NIX_CFLAGS_COMPILE) +DBGCFLAGS ?= -Og -g +OPTCFLAGS ?= -O3 -DNDEBUG +SOURCE_FILES := $(wildcard src/*.c) $(wildcard include/*.h) GENERATED_FILES := generated/libc_hooks.c generated/libc_hooks.h ALL_TARGETS := build/libprobe-dbg.so build/libprobe.so @@ -12,16 +11,19 @@ all: $(ALL_TARGETS) .PHONY: all build/lib%.so: $(SOURCE_FILES) $(GENERATED_FILES) - mkdir --parents build/ - gcc $(OPTCFLAGS) $(LIBCFLAGS) -o $@ src/lib.c + mkdir --parents build + gcc $(CFLAGS) $(OPTCFLAGS) -o $@ src/lib.c build/lib%-dbg.so: $(SOURCE_FILES) $(GENERATED_FILES) - mkdir --parents build/ - gcc $(DBGCFLAGS) $(LIBCFLAGS) -o $@ src/lib.c + mkdir --parents build + gcc $(CFLAGS) $(DBGCFLAGS) -o $@ src/lib.c $(GENERATED_FILES): $(wildcard generator/*) ./generator/gen_libc_hooks.py +install: + install -D --target-directory $(INSTALL_PREFIX)/lib/ build/lib*.so + clean: mkdir --parents build/ generated/ touch $(GENERATED_FILES) $(ALL_TARGETS) diff --git a/probe_src/libprobe/src/lib.c b/probe_src/libprobe/src/lib.c index 4c702c05..ac743c43 100644 --- a/probe_src/libprobe/src/lib.c +++ b/probe_src/libprobe/src/lib.c @@ -62,11 +62,11 @@ static __thread bool __thread_inited = false; /* #include "fd_table.c" */ -#include "../include/prov_ops.h" +#include #define ARENA_USE_UNWRAPPED_LIBC #define ARENA_PERROR -#include "../../arena/arena.c" +#include #include "global_state.c" diff --git a/probe_src/probe_frontend/configure b/probe_src/probe_frontend/configure deleted file mode 100755 index 699751ed..00000000 --- a/probe_src/probe_frontend/configure +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -set -e -cd "$(dirname "$(realpath "$0")")" -mkdir -p ./lib/include -cp ../libprobe/include/prov_ops.h ./lib/include/prov_ops.h -git add ./lib/include diff --git a/probe_src/probe_frontend/flake.lock b/probe_src/probe_frontend/flake.lock deleted file mode 100644 index 3594030f..00000000 --- a/probe_src/probe_frontend/flake.lock +++ /dev/null @@ -1,120 +0,0 @@ -{ - "nodes": { - "advisory-db": { - "flake": false, - "locked": { - "lastModified": 1720572893, - "narHash": "sha256-EQfU1yMnebn7LoJNjjsQimyuWwz+2YzazqUZu8aX/r4=", - "owner": "rustsec", - "repo": "advisory-db", - "rev": "97a2dc75838f19a5fd63dc3f8e3f57e0c4c8cfe6", - "type": "github" - }, - "original": { - "owner": "rustsec", - "repo": "advisory-db", - "type": "github" - } - }, - "crane": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1720546058, - "narHash": "sha256-iU2yVaPIZm5vMGdlT0+57vdB/aPq/V5oZFBRwYw+HBM=", - "owner": "ipetkov", - "repo": "crane", - "rev": "2d83156f23c43598cf44e152c33a59d3892f8b29", - "type": "github" - }, - "original": { - "owner": "ipetkov", - "repo": "crane", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1710146030, - "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1720594544, - "narHash": "sha256-w6dlBUQYvS65f0Z33TvkcAj7ITr4NFqhF5ywss5T5bU=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "aa9461550594533c29866d42f861b6ff079a7fb6", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixpkgs-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "advisory-db": "advisory-db", - "crane": "crane", - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs", - "rust-overlay": "rust-overlay" - } - }, - "rust-overlay": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1720577957, - "narHash": "sha256-RZuzLdB/8FaXaSzEoWLg3au/mtbuH7MGn2LmXUKT62g=", - "owner": "oxalica", - "repo": "rust-overlay", - "rev": "a434177dfcc53bf8f1f348a3c39bfb336d760286", - "type": "github" - }, - "original": { - "owner": "oxalica", - "repo": "rust-overlay", - "type": "github" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index a6a3e228..11601141 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -143,7 +143,11 @@ fn main() { ) // The input header we would like to generate // bindings for. - .header("./include/prov_ops.h") + .header( + env::var("LIBPROBE_INTERFACE") + .expect("Must define env var (see source of this panic)") + + "/include/libprobe/prov_ops.h" + ) // .header_contents("sizeof", " // const size_t OP_SIZE = sizeof(struct Op); // ") diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index a49c0c2c..46a95b25 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -251,7 +251,7 @@ impl FfiFrom for OpInternal { log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); Ok(match kind { C_OpCode_init_process_op_code => { - Self::InitProcessOp(unsafe { value.init_process_epoch }.ffi_into(ctx)?) + Self::InitProcessOp(unsafe { value.init_process }.ffi_into(ctx)?) } C_OpCode_init_exec_epoch_op_code => { Self::InitExecEpochOp(unsafe { value.init_exec_epoch }.ffi_into(ctx)?) diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/rust-stuff.nix similarity index 84% rename from probe_src/probe_frontend/flake.nix rename to probe_src/probe_frontend/rust-stuff.nix index 298dd67d..fa327b80 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/rust-stuff.nix @@ -1,40 +1,14 @@ { - description = "libprobe frontend"; - - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; - - crane = { - url = "github:ipetkov/crane"; - inputs.nixpkgs.follows = "nixpkgs"; - }; - - flake-utils.url = "github:numtide/flake-utils"; - - advisory-db = { - url = "github:rustsec/advisory-db"; - flake = false; - }; - - rust-overlay = { - url = "github:oxalica/rust-overlay"; - inputs.nixpkgs.follows = "nixpkgs"; - }; - }; - - # TODO: cleanup derivations and make more usable: - # - version of probe cli with bundled libprobe and wrapper script - # - python code as actual module - # (this may require merging this flake with the top-level one) - outputs = { - self, - nixpkgs, - crane, - flake-utils, - advisory-db, - rust-overlay, - ... - }: let + self, + pkgs, + crane, + flake-utils, + advisory-db, + rust-overlay, + system, + ... +}: +let systems = { # "nix system" = "rust target"; "x86_64-linux" = "x86_64-unknown-linux-musl"; @@ -42,13 +16,6 @@ "aarch64-linux" = "aarch64-unknown-linux-musl"; "armv7l-linux" = "armv7-unknown-linux-musleabi"; }; - in - flake-utils.lib.eachSystem (builtins.attrNames systems) (system: let - pkgs = import nixpkgs { - inherit system; - overlays = [(import rust-overlay)]; - }; - craneLib = (crane.mkLib pkgs).overrideToolchain (p: p.rust-bin.stable.latest.default.override { targets = [systems.${system}]; @@ -76,7 +43,9 @@ export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" ''; - CARGO_BUILD_TARGET = "${systems.${system}}"; + LIBPROBE_INTERFACE = self.packages.${system}.libprobe-interface; + + CARGO_BUILD_TARGET = systems.${system}; CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; }; @@ -217,5 +186,4 @@ pkgs.rust-analyzer ]; }; - }); -} + } From 33ff5676c507315e1655f9ec87f77fcb03aa90a7 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 25 Jul 2024 15:54:18 -0500 Subject: [PATCH 02/18] Fix nix flake checks --- flake.nix | 12 ------------ probe_src/probe_frontend/lib/build.rs | 5 ++--- probe_src/probe_frontend/lib/src/ops.rs | 2 +- probe_src/probe_frontend/macros/src/lib.rs | 4 ++-- 4 files changed, 5 insertions(+), 18 deletions(-) diff --git a/flake.nix b/flake.nix index 9c835b21..21eba9c6 100644 --- a/flake.nix +++ b/flake.nix @@ -110,18 +110,6 @@ --set __PROBE_LIB ${self.packages.${system}.libprobe}/lib ''; }; - probe-py-generated = pkgs.python312.pkgs.buildPythonPackage rec { - pname = "probe_py.generated"; - version = "0.1.0"; - pyproject = true; - build-system = [ - pkgs.python312Packages.flit-core - ]; - unpackPhase = '' - cp --recursive ${self.packages.${system}.probe-frontend}/python/* /build - ls /build - ''; - }; } // rust-stuff.packages; checks = self.packages.${system} // rust-stuff.checks; devShells = { diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index 11601141..b17eafb1 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -144,9 +144,8 @@ fn main() { // The input header we would like to generate // bindings for. .header( - env::var("LIBPROBE_INTERFACE") - .expect("Must define env var (see source of this panic)") - + "/include/libprobe/prov_ops.h" + env::var("LIBPROBE_INTERFACE").expect("Must define env var (see source of this panic)") + + "/include/libprobe/prov_ops.h", ) // .header_contents("sizeof", " // const size_t OP_SIZE = sizeof(struct Op); diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index 46a95b25..f4c28660 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -19,7 +19,7 @@ use std::ffi::CString; /// on each of it's fields. In order to make this work there are three base case implementations: /// /// - `*mut i8` and `*const i8` can (try to) be converted to [`CString`]s by looking up the -/// pointers in the [`ArenaContext`], +/// pointers in the [`ArenaContext`], /// - Any type implementing [`Copy`], this base case just returns itself. pub trait FfiFrom { fn ffi_from(value: &T, ctx: &ArenaContext) -> Result diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index a7b57942..df9cf0d6 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -18,14 +18,14 @@ type MacroResult = Result; /// - be a named struct (tuple and unit structs not supported). /// - Name starts with `C_`. /// - contain only types that implement `FfiFrom` (defined in probe_frontend, see ops module for -/// details). +/// details). /// /// In will generate a struct with the following characteristics: /// /// - same name, but without the `C_` prefix, and converted from snake_case to PascalCase. /// - any field in the original struct starting with `__` is ignored. /// - any field in the original struct starting with `ru_`, `tv_`, or `stx_` will have that prefix -/// removed. +/// removed. /// - derives serde's `Serialize`, `Deserialize` traits. /// - contains a unit field `_type` that serializes to the struct's name. /// - implements `FfiFrom` by calling it recursively on each field. From 494459c599223f00cd3ce3459103f79a8ed2033e Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 25 Jul 2024 15:59:45 -0500 Subject: [PATCH 03/18] bundled-probe -> probe-bundled --- flake.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flake.nix b/flake.nix index 21eba9c6..1f3eed75 100644 --- a/flake.nix +++ b/flake.nix @@ -96,8 +96,8 @@ ])) ]; }; - bundled-probe = pkgs.stdenv.mkDerivation rec { - pname = "bundled-probe"; + probe-bundled = pkgs.stdenv.mkDerivation rec { + pname = "probe-bundled"; version = "0.1.0"; dontUnpack = true; dontBuild = true; From d3e053bf9ed1c11a02a5725c347134acb85aba68 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 25 Jul 2024 18:17:14 -0500 Subject: [PATCH 04/18] Test empty commit From 2421ba02d353a727e6c6c779f55865f77614cfc2 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 14:52:14 -0500 Subject: [PATCH 05/18] Fix stuff --- flake.nix | 109 ++++++++---------- probe_src/PROBE | 5 - probe_src/{ => libprobe}/arena/.gitignore | 0 probe_src/{ => libprobe}/arena/Makefile | 0 probe_src/{ => libprobe}/arena/README.md | 0 .../{arena => libprobe/arena/include}/arena.h | 0 probe_src/{ => libprobe}/arena/parse_arena.py | 0 probe_src/{ => libprobe}/arena/test_arena.c | 0 .../include/libprobe}/prov_ops.h | 0 probe_src/libprobe/src/lib.c | 4 +- probe_src/probe_frontend/.envrc | 3 - probe_src/probe_frontend/lib/build.rs | 21 +++- .../generated/{probe.py => parser.py} | 6 +- .../probe_frontend/python/pyproject.toml | 2 +- probe_src/probe_frontend/rust-stuff.nix | 40 ++++--- probe_src/probe_py/__init__.py | 0 probe_src/{probe_py => python}/README.md | 0 probe_src/python/probe_py/manual/__init__.py | 5 + .../probe_py/manual}/analysis.py | 11 +- .../probe_py/manual}/cli.py | 10 +- .../probe_py/manual}/parse_probe_log.py | 0 .../probe_py/manual}/struct_parser.py | 0 .../probe_py/manual}/test_probe.py | 0 .../probe_py/manual}/util.py | 0 probe_src/python/pyproject.toml | 25 ++++ probe_src/test.txt | 36 ------ setup_devshell.sh | 42 +++++++ 27 files changed, 176 insertions(+), 143 deletions(-) delete mode 100755 probe_src/PROBE rename probe_src/{ => libprobe}/arena/.gitignore (100%) rename probe_src/{ => libprobe}/arena/Makefile (100%) rename probe_src/{ => libprobe}/arena/README.md (100%) rename probe_src/{arena => libprobe/arena/include}/arena.h (100%) rename probe_src/{ => libprobe}/arena/parse_arena.py (100%) rename probe_src/{ => libprobe}/arena/test_arena.c (100%) rename probe_src/{libprobe-interface => libprobe/include/libprobe}/prov_ops.h (100%) delete mode 100644 probe_src/probe_frontend/.envrc rename probe_src/probe_frontend/python/probe_py/generated/{probe.py => parser.py} (95%) delete mode 100644 probe_src/probe_py/__init__.py rename probe_src/{probe_py => python}/README.md (100%) create mode 100644 probe_src/python/probe_py/manual/__init__.py rename probe_src/{probe_py => python/probe_py/manual}/analysis.py (98%) rename probe_src/{probe_py => python/probe_py/manual}/cli.py (93%) rename probe_src/{probe_py => python/probe_py/manual}/parse_probe_log.py (100%) rename probe_src/{probe_py => python/probe_py/manual}/struct_parser.py (100%) rename probe_src/{probe_py => python/probe_py/manual}/test_probe.py (100%) rename probe_src/{probe_py => python/probe_py/manual}/util.py (100%) create mode 100644 probe_src/python/pyproject.toml delete mode 100644 probe_src/test.txt create mode 100644 setup_devshell.sh diff --git a/flake.nix b/flake.nix index 1f3eed75..18849f12 100644 --- a/flake.nix +++ b/flake.nix @@ -44,59 +44,26 @@ inherit system; overlays = [(import rust-overlay)]; }; - inherit (pkgs) lib; - python312-debug = pkgs.python312.overrideAttrs (oldAttrs: { - configureFlags = oldAttrs.configureFlags ++ ["--with-pydebug"]; - # patches = oldAttrs.patches ++ [ ./python.patch ]; - }); - export-and-rename = pkg: file-pairs: - pkgs.stdenv.mkDerivation { - pname = "${pkg.pname}-only-bin"; - dontUnpack = true; - version = pkg.version; - buildInputs = [ pkg ]; - buildPhase = - "\n" - (builtins.map - (pairs: "install -D ${pkg}/${builtins.elemAt pairs 0} $out/${builtins.elemAt pairs 1}") - file-pairs); - }; + python = pkgs.python312; rust-stuff = (import ./probe_src/probe_frontend/rust-stuff.nix) ({ inherit system pkgs; } // inputs); - in { + in { packages = rec { - python-dbg = python312-debug; - libprobe-interface = pkgs.stdenv.mkDerivation { - pname = "libprobe-interface"; - version = "0.1.0"; - src = ./probe_src/libprobe-interface; - dontBuild = true; - installPhase = '' - install -D --target-directory $out/include/libprobe *.h - ''; - }; - arena = pkgs.stdenv.mkDerivation { - pname = "arena"; - version = "0.1.0"; - src = ./probe_src/arena; - dontBuild = true; - installPhase = '' - install -D --target-directory $out/include/arena *.h - ''; - }; - libprobe = pkgs.stdenv.mkDerivation rec { - pname = "libprobe"; - version = "0.1.0"; - src = ./probe_src/libprobe; - makeFlags = [ "INSTALL_PREFIX=$(out)" "SOURCE_VERSION=${version}" ]; - buildInputs = [ - libprobe-interface - arena - (pkgs.python312.withPackages (pypkgs: [ - pypkgs.pycparser - ])) - ]; - }; - probe-bundled = pkgs.stdenv.mkDerivation rec { + probe-bundled = let + # libprobe is a "private" package + # It is only used in probe-bundled + # TODO: The only public package should probably be probe-bundled and probe-py. + libprobe = pkgs.stdenv.mkDerivation rec { + pname = "libprobe"; + version = "0.1.0"; + src = ./probe_src/libprobe; + makeFlags = [ "INSTALL_PREFIX=$(out)" "SOURCE_VERSION=${version}" ]; + buildInputs = [ + (pkgs.python312.withPackages (pypkgs: [ + pypkgs.pycparser + ])) + ]; + }; + in pkgs.stdenv.mkDerivation rec { pname = "probe-bundled"; version = "0.1.0"; dontUnpack = true; @@ -107,26 +74,40 @@ makeWrapper \ ${self.packages.${system}.probe-cli}/bin/probe \ $out/bin/PROBE \ - --set __PROBE_LIB ${self.packages.${system}.libprobe}/lib + --set __PROBE_LIB ${libprobe}/lib ''; }; + probe-py-manual = python.pkgs.buildPythonPackage rec { + pname = "probe_py.manual"; + version = "0.1.0"; + pyproject = true; + build-system = [ + python.pkgs.flit-core + ]; + src = ./probe_src/python; + propagatedBuildInputs = [ + self.packages.${system}.probe-py-generated + python.pkgs.networkx + python.pkgs.pygraphviz + python.pkgs.pydot + python.pkgs.rich + python.pkgs.typer + ]; + pythonImportsCheck = [ pname ]; + }; } // rust-stuff.packages; checks = self.packages.${system} // rust-stuff.checks; devShells = { - default = pkgs.mkShell { + default = rust-stuff.devShells.default.overrideAttrs (oldAttrs: rec { + shellHook = '' + source setup_devshell.sh + ''; buildInputs = - [ + oldAttrs.buildInputs ++ [ (pkgs.python312.withPackages (pypkgs: [ - pypkgs.psutil - pypkgs.typer - pypkgs.pycparser pypkgs.pytest pypkgs.mypy - pypkgs.pygraphviz - pypkgs.networkx pypkgs.ipython - pypkgs.pydot - pypkgs.rich ])) # (export-and-rename python312-debug [["bin/python" "bin/python-dbg"]]) pkgs.which @@ -148,12 +129,12 @@ ) ++ ( # while xdot isn't marked as linux only, it has a dependency (xvfb-run) that is - if builtins.elem system lib.platforms.linux + if builtins.elem system pkgs.lib.platforms.linux then [pkgs.xdot] else [] ); - }; + }); }; - } + } ); } diff --git a/probe_src/PROBE b/probe_src/PROBE deleted file mode 100755 index 7eb197a6..00000000 --- a/probe_src/PROBE +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env sh - -project_root="$(dirname "$0")" -export PYTHONPATH="${project_root}:${project_root}/arena:${PYTHONPATH}" -exec python3 -m probe_py.cli "${@}" diff --git a/probe_src/arena/.gitignore b/probe_src/libprobe/arena/.gitignore similarity index 100% rename from probe_src/arena/.gitignore rename to probe_src/libprobe/arena/.gitignore diff --git a/probe_src/arena/Makefile b/probe_src/libprobe/arena/Makefile similarity index 100% rename from probe_src/arena/Makefile rename to probe_src/libprobe/arena/Makefile diff --git a/probe_src/arena/README.md b/probe_src/libprobe/arena/README.md similarity index 100% rename from probe_src/arena/README.md rename to probe_src/libprobe/arena/README.md diff --git a/probe_src/arena/arena.h b/probe_src/libprobe/arena/include/arena.h similarity index 100% rename from probe_src/arena/arena.h rename to probe_src/libprobe/arena/include/arena.h diff --git a/probe_src/arena/parse_arena.py b/probe_src/libprobe/arena/parse_arena.py similarity index 100% rename from probe_src/arena/parse_arena.py rename to probe_src/libprobe/arena/parse_arena.py diff --git a/probe_src/arena/test_arena.c b/probe_src/libprobe/arena/test_arena.c similarity index 100% rename from probe_src/arena/test_arena.c rename to probe_src/libprobe/arena/test_arena.c diff --git a/probe_src/libprobe-interface/prov_ops.h b/probe_src/libprobe/include/libprobe/prov_ops.h similarity index 100% rename from probe_src/libprobe-interface/prov_ops.h rename to probe_src/libprobe/include/libprobe/prov_ops.h diff --git a/probe_src/libprobe/src/lib.c b/probe_src/libprobe/src/lib.c index ac743c43..9a808474 100644 --- a/probe_src/libprobe/src/lib.c +++ b/probe_src/libprobe/src/lib.c @@ -62,11 +62,11 @@ static __thread bool __thread_inited = false; /* #include "fd_table.c" */ -#include +#include "../include/libprobe/prov_ops.h" #define ARENA_USE_UNWRAPPED_LIBC #define ARENA_PERROR -#include +#include "../arena/include/arena.h" #include "global_state.c" diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc deleted file mode 100644 index 36551f7f..00000000 --- a/probe_src/probe_frontend/.envrc +++ /dev/null @@ -1,3 +0,0 @@ -use_flake - -export __PROBE_LOG=info diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index b17eafb1..2a3cced7 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -5,6 +5,20 @@ use std::sync::OnceLock; use bindgen::callbacks::ParseCallbacks; +fn find_in_cpath(name: &str) -> Result { + Ok( + env::var("CPATH") + .map_err(|_| "CPATH needs to be set (in unicode) so I can find include header files")? + .split(':') + .map(|path_str| PathBuf::from(path_str).join(name)) + .filter(|path| path.exists()) + .collect::>() + .first() + .ok_or("name not found in CPATH")? + .clone() + ) +} + #[derive(Debug)] struct LibprobeCallback; @@ -144,8 +158,11 @@ fn main() { // The input header we would like to generate // bindings for. .header( - env::var("LIBPROBE_INTERFACE").expect("Must define env var (see source of this panic)") - + "/include/libprobe/prov_ops.h", + find_in_cpath("libprobe/prov_ops.h") + .unwrap() + .into_os_string() + .into_string() + .unwrap() ) // .header_contents("sizeof", " // const size_t OP_SIZE = sizeof(struct Op); diff --git a/probe_src/probe_frontend/python/probe_py/generated/probe.py b/probe_src/probe_frontend/python/probe_py/generated/parser.py similarity index 95% rename from probe_src/probe_frontend/python/probe_py/generated/probe.py rename to probe_src/probe_frontend/python/probe_py/generated/parser.py index a4bd52cc..8c990777 100644 --- a/probe_src/probe_frontend/python/probe_py/generated/probe.py +++ b/probe_src/probe_frontend/python/probe_py/generated/parser.py @@ -1,4 +1,4 @@ - +import pathlib import typing import json import tarfile @@ -26,10 +26,10 @@ class ProcessProvLog: class ProvLog: processes: typing.Mapping[int, ProcessProvLog] -def load_log(path: str) -> ProvLog: +def parse_probe_log(probe_log: pathlib.Path) -> ProvLog: op_map: typing.Dict[int, typing.Dict[int, typing.Dict[int, ThreadProvLog]]] = {} - tar = tarfile.open(path, mode='r') + tar = tarfile.open(probe_log, mode='r') for item in tar: # items with size zero are directories in the tarball diff --git a/probe_src/probe_frontend/python/pyproject.toml b/probe_src/probe_frontend/python/pyproject.toml index 2be4c8d5..fe321f09 100644 --- a/probe_src/probe_frontend/python/pyproject.toml +++ b/probe_src/probe_frontend/python/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi" [project] name = "probe_py.generated" authors = [ - # authors generated from Cargo.toml@authors@: + # authors generated from Cargo.toml@authors@ ] license = {file = "LICENSE"} classifiers = ["License :: OSI Approved :: MIT License"] diff --git a/probe_src/probe_frontend/rust-stuff.nix b/probe_src/probe_frontend/rust-stuff.nix index fa327b80..7ef6e36f 100644 --- a/probe_src/probe_frontend/rust-stuff.nix +++ b/probe_src/probe_frontend/rust-stuff.nix @@ -39,12 +39,9 @@ let "pygenConfigPhase" ]; pygenConfigPhase = '' - mkdir -p ./python export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" ''; - LIBPROBE_INTERFACE = self.packages.${system}.libprobe-interface; - CARGO_BUILD_TARGET = systems.${system}; CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; }; @@ -77,10 +74,13 @@ let cp ./LICENSE $out/LICENSE ''; }); - probe-py = let + probe-py-generated = let workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; - in - pkgs.substituteAllFiles rec { + + # TODO: Simplify this + # Perhaps by folding the substituteAllFiles into probe-py-generated (upstream) or probe-py-frontend (downstream) + # Could we combine all the packages? + probe-py-generated-src = pkgs.substituteAllFiles rec { name = "probe-py-${version}"; src = probe-frontend; files = [ @@ -88,7 +88,7 @@ let "./LICENSE" "./probe_py/generated/__init__.py" "./probe_py/generated/ops.py" - "./probe_py/generated/probe.py" + "./probe_py/generated/parser.py" ]; authors = builtins.concatStringsSep "" (builtins.map (match: let @@ -101,6 +101,18 @@ let )); version = workspace.package.version; }; + in pkgs.python312.pkgs.buildPythonPackage rec { + pname = "probe_py.generated"; + version = probe-py-generated-src.version; + pyproject = true; + build-system = [ + pkgs.python312Packages.flit-core + ]; + unpackPhase = '' + cp --recursive ${probe-py-generated-src}/* /build + ''; + pythonImportsCheck = [ pname ]; + }; probe-cli = craneLib.buildPackage (individualCrateArgs // { pname = "probe-cli"; @@ -114,7 +126,7 @@ let in { checks = { # Build the crates as part of `nix flake check` for convenience - inherit probe-frontend probe-py probe-cli probe-macros; + inherit probe-frontend probe-py-generated probe-cli probe-macros; # Run clippy (and deny all warnings) on the workspace source, # again, reusing the dependency artifacts from above. @@ -157,26 +169,16 @@ let partitions = 1; partitionType = "count"; }); - - probe-pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' - cp ${probe-py}/probe_py/generated/ops.py $out - ${pkgs.python312}/bin/python $out - ''; }; packages = { - inherit probe-cli probe-py probe-frontend probe-macros; + inherit probe-cli probe-py-generated probe-frontend probe-macros; }; devShells.default = craneLib.devShell { # Inherit inputs from checks. checks = self.checks.${system}; - shellHook = '' - export __PROBE_LIB="$(realpath ../libprobe/build)" - export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" - ''; - packages = [ pkgs.cargo-audit pkgs.cargo-expand diff --git a/probe_src/probe_py/__init__.py b/probe_src/probe_py/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/probe_src/probe_py/README.md b/probe_src/python/README.md similarity index 100% rename from probe_src/probe_py/README.md rename to probe_src/python/README.md diff --git a/probe_src/python/probe_py/manual/__init__.py b/probe_src/python/probe_py/manual/__init__.py new file mode 100644 index 00000000..a3d39d3a --- /dev/null +++ b/probe_src/python/probe_py/manual/__init__.py @@ -0,0 +1,5 @@ +""" +This package analyzes and transforms PROBE logs. It also has a CLI +""" + +__version__ = "0.1.0" diff --git a/probe_src/probe_py/analysis.py b/probe_src/python/probe_py/manual/analysis.py similarity index 98% rename from probe_src/probe_py/analysis.py rename to probe_src/python/probe_py/manual/analysis.py index 648bb604..fd16a930 100644 --- a/probe_src/probe_py/analysis.py +++ b/probe_src/python/probe_py/manual/analysis.py @@ -1,9 +1,18 @@ import typing import networkx as nx # type: ignore -from .parse_probe_log import Op, ProvLog, CloneOp, ExecOp, WaitOp, OpenOp, CloseOp, TaskType, InitProcessOp, InitExecEpochOp, InitThreadOp, StatOp +from probe_py.generated.ops import Op, CloneOp, ExecOp, WaitOp, OpenOp, CloseOp, InitProcessOp, InitExecEpochOp, InitThreadOp, StatOp +from probe_py.generated.parser import ProvLog from enum import IntEnum +# TODO: implement this in probe_py.generated.ops +class TaskType(IntEnum): + TASK_PID = 0 + TASK_TID = 1 + TASK_ISO_C_THREAD = 2 + TASK_PTHREAD = 3 + + class EdgeLabels(IntEnum): PROGRAM_ORDER = 1 FORK_JOIN = 2 diff --git a/probe_src/probe_py/cli.py b/probe_src/python/probe_py/manual/cli.py similarity index 93% rename from probe_src/probe_py/cli.py rename to probe_src/python/probe_py/manual/cli.py index bca47c83..7edd14ac 100644 --- a/probe_src/probe_py/cli.py +++ b/probe_src/python/probe_py/manual/cli.py @@ -9,7 +9,7 @@ import typer import shutil import rich -from . import parse_probe_log +from probe_py.generated.parser import parse_probe_log from . import analysis from . import util @@ -113,9 +113,7 @@ def process_graph( if not input.exists(): typer.secho(f"INPUT {input} does not exist\nUse `PROBE record --output {input} CMD...` to rectify", fg=typer.colors.RED) raise typer.Abort() - probe_log_tar_obj = tarfile.open(input, "r") - prov_log = parse_probe_log.parse_probe_log_tar(probe_log_tar_obj) - probe_log_tar_obj.close() + prov_log = parse_probe_log(input) console = rich.console.Console(file=sys.stderr) process_graph = analysis.provlog_to_digraph(prov_log) for warning in analysis.validate_provlog(prov_log): @@ -137,9 +135,7 @@ def dump( if not input.exists(): typer.secho(f"INPUT {input} does not exist\nUse `PROBE record --output {input} CMD...` to rectify", fg=typer.colors.RED) raise typer.Abort() - probe_log_tar_obj = tarfile.open(input, "r") - processes_prov_log = parse_probe_log.parse_probe_log_tar(probe_log_tar_obj) - probe_log_tar_obj.close() + processes_prov_log = parse_probe_log(input) for pid, process in processes_prov_log.processes.items(): print(pid) for exid, exec_epoch in process.exec_epochs.items(): diff --git a/probe_src/probe_py/parse_probe_log.py b/probe_src/python/probe_py/manual/parse_probe_log.py similarity index 100% rename from probe_src/probe_py/parse_probe_log.py rename to probe_src/python/probe_py/manual/parse_probe_log.py diff --git a/probe_src/probe_py/struct_parser.py b/probe_src/python/probe_py/manual/struct_parser.py similarity index 100% rename from probe_src/probe_py/struct_parser.py rename to probe_src/python/probe_py/manual/struct_parser.py diff --git a/probe_src/probe_py/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py similarity index 100% rename from probe_src/probe_py/test_probe.py rename to probe_src/python/probe_py/manual/test_probe.py diff --git a/probe_src/probe_py/util.py b/probe_src/python/probe_py/manual/util.py similarity index 100% rename from probe_src/probe_py/util.py rename to probe_src/python/probe_py/manual/util.py diff --git a/probe_src/python/pyproject.toml b/probe_src/python/pyproject.toml new file mode 100644 index 00000000..5b0d08b5 --- /dev/null +++ b/probe_src/python/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "probe_py.manual" +authors = [ + {name = "Samuel Grayson", email = "sam@samgrayson.me"}, + {name = "Shofiya Bootwala"}, + {name = "Saleha Muzammil"}, +] +license = {file = "LICENSE"} +classifiers = ["License :: OSI Approved :: MIT License"] +dynamic = ["version", "description"] +dependencies = [ + "probe_py.generated", + "networkx", + "pygraphviz", + "pydot", + "rich", + "typer", +] + +[project.urls] +Home = "https://github.com/charmoniumQ/PROBE" diff --git a/probe_src/test.txt b/probe_src/test.txt deleted file mode 100644 index a63c0667..00000000 --- a/probe_src/test.txt +++ /dev/null @@ -1,36 +0,0 @@ -libprobe:pid-911319.-1.911319: 64eaa91:src/lib.c:100:maybe_init_thread(): Initializing process -libprobe:pid-911319.-1.911319: 64eaa91:src/global_state.c:24:init_is_proc_root(): getenv '__PROBE_IS_ROOT' = '(null)' -libprobe:pid-911319.-1.911319: 64eaa91:src/global_state.c:31:init_is_proc_root(): Is proc root? 1 -libprobe:pid-911319.0.911319: 64eaa91:src/global_state.c:81:init_exec_epoch(): exec_epoch = 0 -libprobe:pid-911319.0.911319: 64eaa91:src/global_state.c:127:init_probe_dir(): getenv '__PROBE_DIR' = '/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo' -libprobe:pid-911319.0.911319: 64eaa91:src/global_state.c:144:init_probe_dir(): probe_dir = "/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo" -libprobe:pid-911319.0.911319: 64eaa91:src/global_state.c:148:init_probe_dir(): __epoch_dirfd=3 (/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo/911319/0) -libprobe:pid-911319.0.911319: 64eaa91:src/lib.c:106:maybe_init_thread(): Initializing thread -libprobe:pid-911319.0.911319: 64eaa91:src/global_state.c:162:init_log_arena(): Going to "/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo/911319/0/911319" (mkdir 1) -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: init_exec_epoch -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: init_exec_epoch -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: init_thread -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: init_thread -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: clone -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: clone Thread #0: starting. -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: clone -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: clone -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: clone -libprobe:pid-911319.0.911319: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: clone - -libprobe:pid-911319.0.911320: 64eaa91:src/lib.c:106:maybe_init_thread(): Initializing thread -libprobe:pid-911319.0.911320: 64eaa91:src/global_state.c:162:init_log_arena(): Going to "/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo/911319/0/911320" (mkdir 1) -libprobe:pid-911319.0.911320: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: init_thread -libprobe:pid-911319.0.911320: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: init_thread -libprobe:pid-911319.0.911320: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: open dirfd=-100, path="/tmp/0.txt", stat_valid=1, dirfd_valid=1 -libprobe:pid-911319.0.911320: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: open dirfd=-100, path="/tmp/0.txt", stat_valid=1, dirfd_valid=1 - -libprobe:pid-911319.0.911321: 64eaa91:src/lib.c:106:maybe_init_thread(): Initializing thread -libprobe:pid-911319.0.911321: 64eaa91:src/global_state.c:162:init_log_arena(): Going to "/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo/911319/0/911321" (mkdir 1) -libprobe:pid-911319.0.911321: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: init_thread -libprobe:pid-911319.0.911321: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: init_thread -libprobe:pid-911319.0.911321: 64eaa91:src/prov_buffer.c:33:prov_log_record(): record op: open dirfd=-100, path="/tmp/1.txt", stat_valid=1, dirfd_valid=1 -libprobe:pid-911319.0.911321: 64eaa91:src/prov_buffer.c:79:prov_log_record(): recorded op: open dirfd=-100, path="/tmp/1.txt", stat_valid=1, dirfd_valid=1 - -libprobe:pid-911319.0.911322: 64eaa91:src/lib.c:106:maybe_init_thread(): Initializing thread -libprobe:pid-911319.0.911322: 64eaa91:src/global_state.c:162:init_log_arena(): Going to "/tmp/nix-shell.oKqu7p/probe_log_911309z8qonbfo/911319/0/911322" (mkdir 1) diff --git a/setup_devshell.sh b/setup_devshell.sh new file mode 100644 index 00000000..8ee9329e --- /dev/null +++ b/setup_devshell.sh @@ -0,0 +1,42 @@ +# Posix SH compatible source script + +red='\033[0;31m' +clr='\033[0m' + +# Ensure `nix develop` was called from the root directory. +if [ ! -f flake.nix ]; then + echo -e "${red}Please cd to the project root before trying to enter the devShell ('nix develop').${clr}" +fi + +# Rust frontend uses CPATH to find libprobe headers +export CPATH="$(realpath ./probe_src/libprobe/include):$CPATH" + +# Rust CLI uses __PROBE_LIB to find libprobe binary +# build may not exist yet, so we realpath its parent. +export __PROBE_LIB="$(realpath ./probe_src/libprobe)/build" + +# Ensure libprobe.so gets maked +if [ ! -f $__PROBE_LIB/libprobe.so ]; then + echo -e "${red}Please run 'make -C probe_src/libprobe all' to compile libprobe${clr}" +fi + +# Rust code uses PYGEN_OUTFILE to determine where to write this file. +# TODO: Replace this with a static path, because it is never not this path. +export PYGEN_OUTFILE="$(realpath ./probe_src/probe_frontend/python/probe_py/generated/ops.py)" + +# Ensure PROBE CLI gets built +if [ ! -f probe_src/probe_frontend/target/release/probe ]; then + echo -e "${red}Please run 'env -C probe_src/probe_frontend cargo build --release' to compile probe binary${clr}" +fi + +# Add PROBE CLI to path +export PATH="$(realpath ./probe_src/probe_frontend/target/release):$PATH" + +# Add Arena to the Python path +export PYTHONPATH="$(realpath ./probe_src/probe_frontend/python):$PYTHONPATH" + +# Add probe_py.generated to the Python path +export PYTHONPATH="$(realpath ./probe_src/probe_frontend/python):$PYTHONPATH" + +# Add probe_py.manual to the Python path +export PYTHONPATH="$(realpath ./probe_src/python):$PYTHONPATH" From 2e97ac2b7ad87b1c02d9f0c0376b75f134b1155e Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 14:55:51 -0500 Subject: [PATCH 06/18] Respond to comments --- flake.nix | 11 +++++------ .../probe_frontend/{rust-stuff.nix => frontend.nix} | 7 +++---- 2 files changed, 8 insertions(+), 10 deletions(-) rename probe_src/probe_frontend/{rust-stuff.nix => frontend.nix} (97%) diff --git a/flake.nix b/flake.nix index 18849f12..a3df3cab 100644 --- a/flake.nix +++ b/flake.nix @@ -26,9 +26,7 @@ outputs = { self, nixpkgs, - crane, flake-utils, - advisory-db, rust-overlay, ... }@inputs: let @@ -45,7 +43,7 @@ overlays = [(import rust-overlay)]; }; python = pkgs.python312; - rust-stuff = (import ./probe_src/probe_frontend/rust-stuff.nix) ({ inherit system pkgs; } // inputs); + frontend = (import ./probe_src/probe_frontend/frontend.nix) ({ inherit system pkgs python; } // inputs); in { packages = rec { probe-bundled = let @@ -95,10 +93,11 @@ ]; pythonImportsCheck = [ pname ]; }; - } // rust-stuff.packages; - checks = self.packages.${system} // rust-stuff.checks; + default = probe-bundled; + } // frontend.packages; + checks = self.packages.${system} // frontend.checks; devShells = { - default = rust-stuff.devShells.default.overrideAttrs (oldAttrs: rec { + default = frontend.devShells.default.overrideAttrs (oldAttrs: rec { shellHook = '' source setup_devshell.sh ''; diff --git a/probe_src/probe_frontend/rust-stuff.nix b/probe_src/probe_frontend/frontend.nix similarity index 97% rename from probe_src/probe_frontend/rust-stuff.nix rename to probe_src/probe_frontend/frontend.nix index 7ef6e36f..23ab9ff5 100644 --- a/probe_src/probe_frontend/rust-stuff.nix +++ b/probe_src/probe_frontend/frontend.nix @@ -2,10 +2,9 @@ self, pkgs, crane, - flake-utils, advisory-db, - rust-overlay, system, + python, ... }: let @@ -101,12 +100,12 @@ let )); version = workspace.package.version; }; - in pkgs.python312.pkgs.buildPythonPackage rec { + in python.pkgs.buildPythonPackage rec { pname = "probe_py.generated"; version = probe-py-generated-src.version; pyproject = true; build-system = [ - pkgs.python312Packages.flit-core + python.pkgs.flit-core ]; unpackPhase = '' cp --recursive ${probe-py-generated-src}/* /build From 4930101e4f205d93939cbb65c8fd4a0024434cc9 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 15:03:48 -0500 Subject: [PATCH 07/18] Fix nix flake checks --- flake.nix | 1 + probe_src/probe_frontend/frontend.nix | 1 + probe_src/probe_frontend/lib/build.rs | 22 ++++++++++------------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/flake.nix b/flake.nix index a3df3cab..121a4587 100644 --- a/flake.nix +++ b/flake.nix @@ -95,6 +95,7 @@ }; default = probe-bundled; } // frontend.packages; + # TODO: Run pytest tests in Nix checks checks = self.packages.${system} // frontend.checks; devShells = { default = frontend.devShells.default.overrideAttrs (oldAttrs: rec { diff --git a/probe_src/probe_frontend/frontend.nix b/probe_src/probe_frontend/frontend.nix index 23ab9ff5..682652ee 100644 --- a/probe_src/probe_frontend/frontend.nix +++ b/probe_src/probe_frontend/frontend.nix @@ -43,6 +43,7 @@ let CARGO_BUILD_TARGET = systems.${system}; CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; + CPATH = ../libprobe/include; }; # Build *just* the cargo dependencies (of the entire workspace), diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index 2a3cced7..1c1b6923 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -6,17 +6,15 @@ use std::sync::OnceLock; use bindgen::callbacks::ParseCallbacks; fn find_in_cpath(name: &str) -> Result { - Ok( - env::var("CPATH") - .map_err(|_| "CPATH needs to be set (in unicode) so I can find include header files")? - .split(':') - .map(|path_str| PathBuf::from(path_str).join(name)) - .filter(|path| path.exists()) - .collect::>() - .first() - .ok_or("name not found in CPATH")? - .clone() - ) + Ok(env::var("CPATH") + .map_err(|_| "CPATH needs to be set (in unicode) so I can find include header files")? + .split(':') + .map(|path_str| PathBuf::from(path_str).join(name)) + .filter(|path| path.exists()) + .collect::>() + .first() + .ok_or("name not found in CPATH")? + .clone()) } #[derive(Debug)] @@ -162,7 +160,7 @@ fn main() { .unwrap() .into_os_string() .into_string() - .unwrap() + .unwrap(), ) // .header_contents("sizeof", " // const size_t OP_SIZE = sizeof(struct Op); From 769f32b92b12951a329fc7b96d0f74710ed5bdbe Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 15:08:57 -0500 Subject: [PATCH 08/18] Fix Justfile --- Justfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Justfile b/Justfile index 72c52bfc..3135d9d1 100644 --- a/Justfile +++ b/Justfile @@ -13,7 +13,6 @@ check-ruff: ruff check probe_src check-mypy: - MYPYPATH=probe_src mypy --strict --package arena MYPYPATH=probe_src mypy --strict --package probe_py mypy --strict probe_src/libprobe @@ -31,6 +30,8 @@ test-dev: compile-libprobe check-flake: nix flake check --all-systems +# TODO: checking the flake should do the tests and compilations... +# So we should probably remove those checks from the Just file pre-commit: fix-format-nix fix-ruff check-mypy check-flake compile-libprobe test-dev on-push: check-format-nix check-ruff check-mypy check-flake compile-libprobe test-ci From 71e0ac856703096f2c104c31f7e933a007bfbfe4 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 15:12:46 -0500 Subject: [PATCH 09/18] Fixed devshell for clean-slate installs --- setup_devshell.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/setup_devshell.sh b/setup_devshell.sh index 8ee9329e..fc97f20c 100644 --- a/setup_devshell.sh +++ b/setup_devshell.sh @@ -29,11 +29,8 @@ if [ ! -f probe_src/probe_frontend/target/release/probe ]; then echo -e "${red}Please run 'env -C probe_src/probe_frontend cargo build --release' to compile probe binary${clr}" fi -# Add PROBE CLI to path -export PATH="$(realpath ./probe_src/probe_frontend/target/release):$PATH" - -# Add Arena to the Python path -export PYTHONPATH="$(realpath ./probe_src/probe_frontend/python):$PYTHONPATH" +# Add PROBE CLI to path, noting that target/release may not exist yet +export PATH="$(realpath ./probe_src/probe_frontend)/target/release:$PATH" # Add probe_py.generated to the Python path export PYTHONPATH="$(realpath ./probe_src/probe_frontend/python):$PYTHONPATH" From 147428a93394a40de437e714d36ab4d357619a3f Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 15:46:38 -0500 Subject: [PATCH 10/18] Fix stuff for direnv --- Justfile | 3 +- flake.nix | 13 +- .../python/probe_py/generated/parser.py | 2 +- probe_src/python/probe_py/manual/analysis.py | 47 +- .../python/probe_py/manual/parse_probe_log.py | 160 ------ .../python/probe_py/manual/struct_parser.py | 524 ------------------ .../python/probe_py/manual/test_probe.py | 118 ++-- 7 files changed, 97 insertions(+), 770 deletions(-) delete mode 100644 probe_src/python/probe_py/manual/parse_probe_log.py delete mode 100644 probe_src/python/probe_py/manual/struct_parser.py diff --git a/Justfile b/Justfile index 3135d9d1..fb39065e 100644 --- a/Justfile +++ b/Justfile @@ -13,7 +13,8 @@ check-ruff: ruff check probe_src check-mypy: - MYPYPATH=probe_src mypy --strict --package probe_py + mypy --strict --package probe_py.manual + mypy --strict --package probe_py.generated mypy --strict probe_src/libprobe compile-libprobe: diff --git a/flake.nix b/flake.nix index 121a4587..3b084f24 100644 --- a/flake.nix +++ b/flake.nix @@ -100,11 +100,20 @@ devShells = { default = frontend.devShells.default.overrideAttrs (oldAttrs: rec { shellHook = '' - source setup_devshell.sh + pushd $(git rev-parse --show-toplevel) + source ./setup_devshell.sh + popd ''; buildInputs = oldAttrs.buildInputs ++ [ - (pkgs.python312.withPackages (pypkgs: [ + (python.withPackages (pypkgs: [ + python.pkgs.networkx + python.pkgs.pygraphviz + python.pkgs.pydot + python.pkgs.rich + python.pkgs.typer + + pypkgs.psutil pypkgs.pytest pypkgs.mypy pypkgs.ipython diff --git a/probe_src/probe_frontend/python/probe_py/generated/parser.py b/probe_src/probe_frontend/python/probe_py/generated/parser.py index 8c990777..2e912531 100644 --- a/probe_src/probe_frontend/python/probe_py/generated/parser.py +++ b/probe_src/probe_frontend/python/probe_py/generated/parser.py @@ -71,7 +71,7 @@ def parse_probe_log(probe_log: pathlib.Path) -> ProvLog: for pid, epochs in op_map.items() }) -def op_hook(json_map: typing.Dict[str, typing.Any]): +def op_hook(json_map: typing.Dict[str, typing.Any]) -> typing.Any: ty: str = json_map["_type"] json_map.pop("_type") diff --git a/probe_src/python/probe_py/manual/analysis.py b/probe_src/python/probe_py/manual/analysis.py index fd16a930..508e108c 100644 --- a/probe_src/python/probe_py/manual/analysis.py +++ b/probe_src/python/probe_py/manual/analysis.py @@ -46,9 +46,10 @@ def validate_provlog( if False: pass elif isinstance(op.data, WaitOp) and op.data.ferrno == 0: - waited_processes.add((op.data.task_type, op.data.task_id)) + # TODO: Replace TaskType(x) with x in this file, once Rust can emit enums + waited_processes.add((TaskType(op.data.task_type), op.data.task_id)) elif isinstance(op.data, CloneOp) and op.data.ferrno == 0: - cloned_processes.add((op.data.task_type, op.data.task_id)) + cloned_processes.add((TaskType(op.data.task_type), op.data.task_id)) if op.data.task_type == TaskType.TASK_PID: # New process implicitly also creates a new thread cloned_processes.add((TaskType.TASK_TID, op.data.task_id)) @@ -149,36 +150,36 @@ def get_last_pthread(pid: int, exid: int, target_pthread_id: int) -> list[Node]: # Hook up forks/joins for node in list(nodes): pid, exid, tid, op_index = node - op = process_tree_prov_log.processes[pid].exec_epochs[exid].threads[tid].ops[op_index].data + op_data = process_tree_prov_log.processes[pid].exec_epochs[exid].threads[tid].ops[op_index].data target: tuple[int, int, int] if False: pass - elif isinstance(op, CloneOp) and op.ferrno == 0: + elif isinstance(op_data, CloneOp) and op_data.ferrno == 0: if False: pass - elif op.task_type == TaskType.TASK_PID: + elif op_data.task_type == TaskType.TASK_PID: # Spawning a thread links to the current PID and exec epoch - target = (op.task_id, 0, op.task_id) + target = (op_data.task_id, 0, op_data.task_id) fork_join_edges.append((node, first(*target))) - elif op.task_type == TaskType.TASK_TID: - target = (pid, exid, op.task_id) + elif op_data.task_type == TaskType.TASK_TID: + target = (pid, exid, op_data.task_id) fork_join_edges.append((node, first(*target))) - elif op.task_type == TaskType.TASK_PTHREAD: - for dest in get_first_pthread(pid, exid, op.task_id): + elif op_data.task_type == TaskType.TASK_PTHREAD: + for dest in get_first_pthread(pid, exid, op_data.task_id): fork_join_edges.append((node, dest)) else: - raise RuntimeError(f"Task type {op.task_type} supported") - elif isinstance(op, WaitOp) and op.ferrno == 0 and op.task_id > 0: + raise RuntimeError(f"Task type {op_data.task_type} supported") + elif isinstance(op_data, WaitOp) and op_data.ferrno == 0 and op_data.task_id > 0: if False: pass - elif op.task_type == TaskType.TASK_PID: - target = (op.task_id, last_exec_epoch.get(op.task_id, 0), op.task_id) + elif op_data.task_type == TaskType.TASK_PID: + target = (op_data.task_id, last_exec_epoch.get(op_data.task_id, 0), op_data.task_id) fork_join_edges.append((last(*target), node)) - elif op.task_type == TaskType.TASK_TID: - target = (pid, exid, op.task_id) + elif op_data.task_type == TaskType.TASK_TID: + target = (pid, exid, op_data.task_id) fork_join_edges.append((last(*target), node)) - elif op.ferrno == 0 and op.task_type == TaskType.TASK_PTHREAD: - for dest in get_last_pthread(pid, exid, op.task_id): + elif op_data.ferrno == 0 and op_data.task_type == TaskType.TASK_PTHREAD: + for dest in get_last_pthread(pid, exid, op_data.task_id): fork_join_edges.append((dest, node)) elif isinstance(op, ExecOp): # Exec brings same pid, incremented exid, and main thread @@ -263,7 +264,7 @@ def validate_hb_clones(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str] elif op.data.task_type == TaskType.TASK_ISO_C_THREAD and op.data.task_id == op1.iso_c_thread_id: break else: - ret.append(f"Could not find a successor for CloneOp {node} {op.data.task_type.name} in the target thread") + ret.append(f"Could not find a successor for CloneOp {node} {TaskType(op.data.task_type).name} in the target thread") return ret @@ -358,16 +359,16 @@ def digraph_to_pydot_string(prov_log: ProvLog, process_graph: nx.DiGraph) -> str if False: pass elif isinstance(op.data, OpenOp): - data["label"] += f"\n{op.data.path.path} (fd={op.data.fd})" + data["label"] += f"\n{op.data.path.path.decode()} (fd={op.data.fd})" elif isinstance(op.data, CloseOp): fds = list(range(op.data.low_fd, op.data.high_fd + 1)) data["label"] += "\n" + " ".join(map(str, fds)) elif isinstance(op.data, CloneOp): - data["label"] += f"\n{op.data.task_type.name} {op.data.task_id}" + data["label"] += f"\n{TaskType(op.data.task_type).name} {op.data.task_id}" elif isinstance(op.data, WaitOp): - data["label"] += f"\n{op.data.task_type.name} {op.data.task_id}" + data["label"] += f"\n{TaskType(op.data.task_type).name} {op.data.task_id}" elif isinstance(op.data, StatOp): - data["label"] += f"\n{op.data.path.path}" + data["label"] += f"\n{op.data.path.path.decode()}" pydot_graph = nx.drawing.nx_pydot.to_pydot(process_graph) dot_string = typing.cast(str, pydot_graph.to_string()) diff --git a/probe_src/python/probe_py/manual/parse_probe_log.py b/probe_src/python/probe_py/manual/parse_probe_log.py deleted file mode 100644 index b551bfa7..00000000 --- a/probe_src/python/probe_py/manual/parse_probe_log.py +++ /dev/null @@ -1,160 +0,0 @@ -import collections -import dataclasses -import tarfile -import enum -import typing -import ctypes -import pathlib -import pycparser # type: ignore -import arena.parse_arena as arena -from . import struct_parser -import struct - - -c_types = dict(struct_parser.default_c_types) -py_types = dict(struct_parser.default_py_types) - - -filename = pathlib.Path(__file__).parent.parent / "libprobe/include/prov_ops.h" -assert filename.exists() -ast = pycparser.parse_file(filename, use_cpp=True, cpp_args="-DPYCPARSER") -struct_parser.parse_all_types(ast.ext, c_types, py_types) - - -# for key in c_types.keys() - struct_parser.default_c_types.keys(): -# if key[0] in {"struct", "union", "enum"}: -# print(struct_parser.c_type_to_c_source(c_types[key])) - -# echo '#define _GNU_SOURCE\n#include \nCLONE_THREAD' | cpp | tail --lines=1 -CLONE_THREAD = 0x00010000 - - -if typing.TYPE_CHECKING: - COp: typing.Any = object - Op: typing.Any = object - InitExecEpochOp: typing.Any = object - InitProcessOp: typing.Any = object - InitThreadOp: typing.Any = object - CloneOp: typing.Any = object - ExecOp: typing.Any = object - WaitOp: typing.Any = object - OpenOp: typing.Any = object - CloseOp: typing.Any = object - OpCode: typing.Any = object - TaskType: typing.Any = object - StatOp: typing.Any = object -else: - # for type in sorted(c_types.keys()): - # print(" ".join(type)) - COp = c_types[("struct", "Op")] - Op: typing.TypeAlias = py_types[("struct", "Op")] - InitProcessOp: typing.TypeAlias = py_types[("struct", "InitProcessOp")] - InitExecEpochOp: typing.TypeAlias = py_types[("struct", "InitExecEpochOp")] - InitThreadOp: typing.TypeAlias = py_types[("struct", "InitThreadOp")] - CloneOp: typing.TypeAlias = py_types[("struct", "CloneOp")] - ExecOp: typing.TypeAlias = py_types[("struct", "ExecOp")] - WaitOp: typing.TypeAlias = py_types[("struct", "WaitOp")] - OpenOp: typing.TypeAlias = py_types[("struct", "OpenOp")] - CloseOp: typing.TypeAlias = py_types[("struct", "CloseOp")] - OpCode: enum.EnumType = py_types[("enum", "OpCode")] - TaskType: enum.EnumType = py_types[("enum", "TaskType")] - StatOp: typing.TypeAlias = py_types[("struct", "StatOp")] - - -@dataclasses.dataclass -class ThreadProvLog: - tid: int - ops: typing.Sequence[Op] - - -@dataclasses.dataclass -class ExecEpochProvLog: - epoch: int - threads: typing.Mapping[int, ThreadProvLog] - - -@dataclasses.dataclass -class ProcessProvLog: - pid: int - exec_epochs: typing.Mapping[int, ExecEpochProvLog] - - -@dataclasses.dataclass -class ProvLog: - processes: typing.Mapping[int, ProcessProvLog] - - -def parse_segments(op_segments: arena.MemorySegments, data_segments: arena.MemorySegments) -> ThreadProvLog: - memory_segments = sorted([*op_segments, *data_segments], key=lambda mem_seg: mem_seg.start) - memory = arena.MemorySegments(memory_segments) - def info(fields: typing.Mapping[str, typing.Any], field_name: str) -> typing.Any: - if field_name == "data": - op_code_to_union_variant = { - OpCode.init_process_op_code: ("init_process", None), - OpCode.init_exec_epoch_op_code: ("init_exec_epoch", None), - OpCode.init_thread_op_code: ("init_thread", None), - OpCode.open_op_code: ("open", None), - OpCode.close_op_code: ("close", None), - OpCode.chdir_op_code: ("chdir", None), - OpCode.exec_op_code: ("exec", None), - OpCode.clone_op_code: ("clone", None), - OpCode.exit_op_code: ("exit", None), - OpCode.access_op_code: ("access", None), - OpCode.stat_op_code: ("stat", None), - OpCode.readdir_op_code: ("readdir", None), - OpCode.wait_op_code: ("wait", None), - OpCode.getrusage_op_code: ("getrusage", None), - OpCode.update_metadata_op_code: ("update_metadata", None), - OpCode.read_link_op_code: ("read_link", None), - } - return op_code_to_union_variant[fields["op_code"]] - else: - return None - ops: list[Op] = [] - for memory_segment in op_segments: - assert (memory_segment.stop - memory_segment.start) % ctypes.sizeof(COp) == 0 - for op_start in range(memory_segment.start, memory_segment.stop, ctypes.sizeof(COp)): - elem_buffr = memory_segment[op_start : op_start + ctypes.sizeof(COp)] - assert len(elem_buffr) == ctypes.sizeof(COp) - c_op = COp.from_buffer_copy(elem_buffr) - py_op = struct_parser.convert_c_obj_to_py_obj(c_op, Op, info, memory) - assert isinstance(py_op, Op) - ops.append(py_op) - tid = next( - op.data.tid - for op in ops - if isinstance(op.data, InitThreadOp) - ) - return ThreadProvLog(tid, ops) - - -def parse_probe_log_tar(probe_log_tar: tarfile.TarFile) -> ProvLog: - member_paths = sorted([ - pathlib.Path(name) - for name in probe_log_tar.getnames() - ]) - threads = collections.defaultdict[int, dict[int, dict[int, ThreadProvLog]]]( - lambda: collections.defaultdict[int, dict[int, ThreadProvLog]]( - dict[int, ThreadProvLog] - ) - ) - for member in member_paths: - if len(member.parts) == 3: - assert member / "ops" in member_paths - assert member / "data" in member_paths - op_segments = arena.parse_arena_dir_tar(probe_log_tar, member / "ops") - data_segments = arena.parse_arena_dir_tar(probe_log_tar, member / "data") - pid, epoch, tid = map(int, member.parts) - thread = parse_segments(op_segments, data_segments) - assert tid == thread.tid - threads[pid][epoch][tid] = thread - return ProvLog({ - pid: ProcessProvLog( - pid, - { - epoch: ExecEpochProvLog(epoch, threads) - for epoch, threads in epochs.items() - }, - ) - for pid, epochs in threads.items() - }) diff --git a/probe_src/python/probe_py/manual/struct_parser.py b/probe_src/python/probe_py/manual/struct_parser.py deleted file mode 100644 index e50b56f3..00000000 --- a/probe_src/python/probe_py/manual/struct_parser.py +++ /dev/null @@ -1,524 +0,0 @@ -from __future__ import annotations -import ctypes -import types -import dataclasses -import enum -import textwrap -import typing -import pycparser # type: ignore - - -_T = typing.TypeVar("_T") - -# CType: typing.TypeAlias = type[ctypes._CData] -CArrayType = type(ctypes.c_int * 1) -CType: typing.TypeAlias = typing.Any -CTypeMap: typing.TypeAlias = typing.Mapping[tuple[str, ...], CType | Exception] -CTypeDict: typing.TypeAlias = dict[tuple[str, ...], CType | Exception] -default_c_types: CTypeMap = { - ("_Bool",): ctypes.c_bool, - ("char",): ctypes.c_char, - ("wchar_t",): ctypes.c_wchar, - ("unsigned", "char"): ctypes.c_ubyte, - ("short",): ctypes.c_short, - ("unsigned", "short"): ctypes.c_ushort, - (): ctypes.c_int, - ("unsigned",): ctypes.c_uint, - ("long",): ctypes.c_long, - ("unsigned", "long"): ctypes.c_ulong, - ("long", "long"): ctypes.c_longlong, - ("__int64",): ctypes.c_longlong, - ("unsigned", "long", "long"): ctypes.c_ulonglong, - ("unsigned", "__int64"): ctypes.c_ulonglong, - ("size_t",): ctypes.c_size_t, - ("ssize_t",): ctypes.c_ssize_t, - ("time_t",): ctypes.c_time_t, - ("float",): ctypes.c_float, - ("double",): ctypes.c_double, - ("long", "double",): ctypes.c_longdouble, - ("char*",): ctypes.c_char_p, - ("wchar_t*",): ctypes.c_wchar_p, - ("void*",): ctypes.c_void_p, -} - -class PyStructBase: - pass - -class PyUnionBase: - pass - -PyType: typing.TypeAlias = type[object] -PyTypeMap: typing.TypeAlias = typing.Mapping[tuple[str, ...], PyType | Exception] -PyTypeDict: typing.TypeAlias = dict[tuple[str, ...], PyType | Exception] -default_py_types: PyTypeMap = { - ("_Bool",): bool, - ("char",): str, - ("wchar_t",): str, - ("unsigned", "char"): int, - ("short",): int, - ("unsigned", "short"): int, - (): int, - ("unsigned",): int, - ("long",): int, - ("unsigned", "long"): int, - ("long", "long"): int, - ("__int64",): int, - ("unsigned", "long", "long"): int, - ("unsigned", "__int64"): int, - ("size_t",): int, - ("ssize_t",): int, - ("time_t",): int, - ("float",): float, - ("double",): float, - ("long", "double",): int, - ("char*",): str, - ("wchar_t*",): str, - ("void*",): int, -} -assert default_py_types.keys() == default_c_types.keys() - -def _expect_type(typ: type[_T], val: typing.Any) -> _T: - if isinstance(val, typ): - return val - else: - raise TypeError(f"Expected value of type {typ}, but got {val} of type {type(val)}") - -def _normalize_name(name: tuple[str, ...]) -> tuple[str, ...]: - # Move 'unsigned' to the beginning (if exists) - # Delete 'signed' (default is assume signed; signed short == short) - # Delete 'int' (default is assume int; unsigned int == unsigned) - return ( - *(("unsigned",) if "unsigned" in name else ()), - *(t for t in name if t not in {"signed", "int", "unsigned"}), - ) - -for type_name in default_c_types.keys(): - assert _normalize_name(type_name) == type_name - -def int_representing_pointer(inner_c_type: CType) -> CType: - class PointerStruct(ctypes.Structure): - _fields_ = [("value", ctypes.c_ulong)] - PointerStruct.inner_c_type = inner_c_type # type: ignore - return PointerStruct - - - -def _lookup_type( - c_types: CTypeDict, - py_types: PyTypeDict, - type_name: tuple[str, ...], -) -> tuple[CType | Exception, PyType | Exception]: - if len(type_name) > 1 and type_name[1] is None: - raise TypeError - c_type = c_types.get(type_name, KeyError(f"{type_name} not found")) - if isinstance(c_type, Exception): - c_type = NotImplementedError(f"Can't parse {type_name} due to {c_type!s}") - py_type = py_types.get(type_name, KeyError) - if isinstance(py_type, Exception): - py_type = object - return c_type, py_type - - -def eval_compile_time_int( - c_types: CTypeDict, - py_types: PyTypeDict, - typ: pycparser.c_ast.Node, - name: str, -) -> int | Exception: - if False: - pass - elif isinstance(typ, pycparser.c_ast.Constant): - if typ.type == "int": - return int(typ.value) - else: - raise TypeError(f"{typ}") - elif isinstance(typ, pycparser.c_ast.UnaryOp): - if typ.op == "sizeof": - c_type, _ = ast_to_cpy_type(c_types, py_types, typ.expr.type, name) - if isinstance(c_type, Exception): - return c_type - else: - return ctypes.sizeof(c_type) - else: - return int(eval(f"{typ.op} {eval_compile_time_int(c_types, py_types, typ.expr, name)}")) - elif isinstance(typ, pycparser.c_ast.BinaryOp): - left = eval_compile_time_int(c_types, py_types, typ.left, name + "_left") - right = eval_compile_time_int(c_types, py_types, typ.right, name + "_right") - return int(eval(f"{left} {typ.op} {right}")) - elif isinstance(typ, pycparser.c_ast.Cast): - return eval_compile_time_int(c_types, py_types, typ.expr, name) - raise TypeError(f"{typ}") - - -def ast_to_cpy_type( - c_types: CTypeDict, - py_types: PyTypeDict, - typ: pycparser.c_ast.Node, - name: str, -) -> tuple[CType | Exception, PyType | Exception]: - """ - c_types and py_types: are the bank of c_types and py_types that have already been parsed, and may be added to while parsing typ. - typ: is the AST representing the type of a field. - name: is a prefix that will be used if this is an anonymous struct/union/enum and we have to give it an arbitrary name. - """ - - if False: - pass - elif isinstance(typ, pycparser.c_ast.TypeDecl): - return ast_to_cpy_type(c_types, py_types, typ.type, name) - elif isinstance(typ, pycparser.c_ast.IdentifierType): - return _lookup_type(c_types, py_types, _normalize_name(typ.names)) - elif isinstance(typ, pycparser.c_ast.PtrDecl): - inner_c_type, inner_py_type = ast_to_cpy_type(c_types, py_types, typ.type, name) - c_type: CType | Exception - if isinstance(inner_c_type, Exception): - c_type = inner_c_type - else: - c_type = int_representing_pointer(inner_c_type) - if isinstance(inner_py_type, Exception): - c_type = inner_py_type - else: - py_type: type[object] - if inner_c_type == ctypes.c_char: - py_type = str - else: - py_type = list[inner_py_type] # type: ignore - return c_type, py_type - elif isinstance(typ, pycparser.c_ast.ArrayDecl): - repetitions = eval_compile_time_int(c_types, py_types, typ.dim, name) - inner_c_type, inner_py_type = ast_to_cpy_type(c_types, py_types, typ.type.type, name) - array_c_type: CType | Exception - array_py_type: PyType | Exception - if isinstance(inner_c_type, Exception): - array_c_type = inner_c_type - else: - array_c_type = inner_c_type * repetitions - if isinstance(inner_py_type, Exception): - array_py_type = inner_py_type - else: - array_py_type = tuple[(inner_py_type,)] # type: ignore - return array_c_type, array_py_type - elif isinstance(typ, pycparser.c_ast.Enum): - if typ.values is None: - # Reference to already-defined type - inner_name = typ.name - assert inner_name is not None - else: - # Defining a new enum inline (possibly anonymous) - inner_name = typ.name - if inner_name is None: - inner_name = f"{name}_enum" - parse_enum(c_types, py_types, typ, inner_name) - return _lookup_type(c_types, py_types, ("enum", inner_name)) - elif isinstance(typ, (pycparser.c_ast.Struct, pycparser.c_ast.Union)): - inner_is_struct = isinstance(typ, pycparser.c_ast.Struct) - inner_keyword = "struct" if inner_is_struct else "union" - if typ.decls is None: - # Reference to already-defined type - inner_name = typ.name - assert inner_name is not None - else: - # Defining a new type inline (possibly anonymous) - inner_name = typ.name - if inner_name is None: - # New type is anonymous; let's make a name and hope for no collisions - inner_name = f"{name}_{inner_keyword}" - parse_struct_or_union(c_types, py_types, typ, inner_name) - return _lookup_type(c_types, py_types, (inner_keyword, inner_name)) - elif isinstance(typ, pycparser.c_ast.FuncDecl): - return ctypes.c_void_p, int - else: - raise TypeError(f"Don't know how to convert {type(typ)} {typ} to C/python type") - - -def parse_struct_or_union( - c_types: CTypeDict, - py_types: PyTypeDict, - struct_decl: pycparser.c_ast.Struct | pycparser.c_ast.Union, - name: str, -) -> None: - assert name is not None - assert isinstance(struct_decl, (pycparser.c_ast.Struct, pycparser.c_ast.Union)) - is_struct = isinstance(struct_decl, pycparser.c_ast.Struct) - field_names = [ - decl.name if decl.name is not None else f"__anon_decl_{decl_no}" - for decl_no, decl in enumerate(struct_decl.decls) - ] - field_c_types = list[CType]() - field_py_types = list[PyType]() - c_type_error: Exception | None = None - - for decl in struct_decl.decls: - c_type, py_type = ast_to_cpy_type(c_types, py_types, decl.type, f"{name}_{decl.name}") - if isinstance(c_type, Exception): - c_type_error = c_type - else: - field_c_types.append(c_type) - if isinstance(py_type, Exception): - py_type= py_type - #py_type_error commented just to resolve ruff error, since py_type_error isnt being used anywhere else - # py_type_error = py_type - else: - field_py_types.append(py_type) - - keyword = "struct" if is_struct else "union" - - py_types[(keyword, name)] = dataclasses.make_dataclass( - name, - zip(field_names, field_py_types), - bases=(PyStructBase if is_struct else PyUnionBase,), - frozen=True, - ) - - if c_type_error is None: - c_types[(keyword, name)] = type( - name, - (ctypes.Structure if is_struct else ctypes.Union,), - {"_fields_": list(zip(field_names, field_c_types))}, - ) - else: - c_types[(keyword, name)] = c_type_error - - -ENUM_NO = 0 -def parse_enum( - c_types: CTypeDict, - py_types: PyTypeDict, - enum_decl: pycparser.c_ast.Enum, - name: str, -) -> None: - if name is None: - global ENUM_NO - name = f"__anon_enum_{ENUM_NO}" - ENUM_NO += 1 - assert isinstance(enum_decl, pycparser.c_ast.Enum) - c_types[("enum", name)] = c_types[("unsigned",)] - py_enum_fields = list[tuple[str, int]]() - current_value = 0 - for item in enum_decl.values.enumerators: - if item.value: - v = item.value - if isinstance(v, pycparser.c_ast.Constant) and v.type == "int": - current_value = int(v.value) - elif isinstance(v, pycparser.c_ast.ID): - t = dict(py_enum_fields).get(v.name) - assert t is not None - current_value = t - py_enum_fields.append((item.name, current_value)) - current_value += 1 - py_types[("enum", name)] = typing.cast( - type[enum.IntEnum], - enum.IntEnum(name, py_enum_fields), - ) - - -def parse_typedef( - c_types: CTypeDict, - py_types: PyTypeDict, - typedef: pycparser.c_ast.Typedef, -) -> None: - c_type, py_type = ast_to_cpy_type(c_types, py_types, typedef.type, typedef.name) - c_types[(typedef.name,)] = c_type - py_types[(typedef.name,)] = py_type - - -def parse_all_types( - stmts: pycparser.c_ast.Node, - c_types: CTypeDict, - py_types: PyTypeDict, -) -> None: - for stmt in stmts: - if isinstance(stmt, pycparser.c_ast.Decl): - if isinstance(stmt.type, pycparser.c_ast.Struct) and stmt.type.decls is not None: - parse_struct_or_union(c_types, py_types, stmt.type, stmt.type.name) - elif isinstance(stmt.type, pycparser.c_ast.Union) and stmt.type.decls is not None: - parse_struct_or_union(c_types, py_types, stmt.type, stmt.type.name) - elif isinstance(stmt.type, pycparser.c_ast.Enum): - parse_enum(c_types, py_types, stmt.type, stmt.type.name) - else: - pass - elif isinstance(stmt, pycparser.c_ast.Typedef): - parse_typedef(c_types, py_types, stmt) - else: - pass - - -def c_type_to_c_source(c_type: CType, top_level: bool = True) -> str: - if False: - pass - elif isinstance(c_type, (type(ctypes.Structure), type(ctypes.Union))): - keyword = "struct" if isinstance(c_type, type(ctypes.Structure)) else "union" - if hasattr(c_type, "inner_type"): - # this must be an int representing pointer. - return c_type_to_c_source(c_type.inner_type, False) + "*" - if top_level: - return "\n".join([ - keyword + " " + c_type.__name__ + " " + "{", - *[ - textwrap.indent(c_type_to_c_source(field[1], False), " ") + " " + field[0] + ";" - for field in c_type._fields_ - ], - "}", - ]) - else: - return keyword + " " + c_type.__name__ - elif isinstance(c_type, CArrayType): - return c_type_to_c_source(c_type._type_, False) + "[" + str(c_type._length_) + "]" - elif isinstance(c_type, type(ctypes._Pointer)): - typ: ctypes._CData = c_type._type_ # type: ignore - return c_type_to_c_source(typ, False) + "*" - elif isinstance(c_type, type(ctypes._SimpleCData)): - name = c_type.__name__ - return { - # Ints - "c_byte": "byte", - "c_ubyte": "unsigned byte", - "c_short": "short", - "c_ushort": "unsigned short", - "c_int": "int", - "c_uint": "unsigned int", - "c_long": "long", - "c_ulong": "unsigned long", - # Sized ints - "c_int8": "int8_t", - "c_uint8": "uint8_t", - "c_int16": "int16_t", - "c_uint16": "uint16_t", - "c_int32": "int32_t", - "c_uint32": "uint32_t", - "c_int64": "int64_t", - "c_uint64": "uint64_t", - # Reals - "c_float": "float", - "c_double": "double", - # Others - "c_size_t": "size_t", - "c_ssize_t": "ssize_t", - "c_time_t": "time_t", - # Chars - "c_char": "char", - "c_wchar": "wchar_t", - # Special-cased pointers - "c_char_p": "char*", - "c_wchar_p": "wchar_t*", - "c_void_p": "void*", - }.get(name, name.replace("c_", "")) - elif isinstance(c_type, Exception): - return str(c_type) - else: - raise TypeError(f"{type(c_type)}: {c_type}") - - -class MemoryMapping(typing.Protocol): - def __getitem__(self, idx: slice) -> bytes: ... - - def __contains__(self, idx: int) -> bool: ... - - -verbose = False - - -def convert_c_obj_to_py_obj( - c_obj: CType, - py_type: PyType, - info: typing.Any, - memory: MemoryMapping, - depth: int = 0, -) -> PyType | None: - if verbose: - print(depth * " ", c_obj, py_type, info) - if False: - pass - elif c_obj.__class__.__name__ == "PointerStruct": - assert py_type.__name__ == "list" or py_type is str, (type(c_obj), py_type) - if py_type.__name__ == "list": - inner_py_type = py_type.__args__[0] # type: ignore - else: - inner_py_type = str - inner_c_type = c_obj.inner_c_type - size = ctypes.sizeof(inner_c_type) - pointer_int = _expect_type(int, c_obj.value) - if pointer_int == 0: - return None - if pointer_int not in memory: - raise ValueError(f"Pointer {pointer_int:08x} is outside of memory {memory!s}") - lst: inner_py_type = [] # type: ignore - while True: - cont, sub_info = (memory[pointer_int : pointer_int + 1] != b'\0', None) if info is None else info[0](memory, pointer_int) - if cont: - inner_c_obj = inner_c_type.from_buffer_copy(memory[pointer_int : pointer_int + size]) - inner_py_obj = convert_c_obj_to_py_obj( - inner_c_obj, - inner_py_type, - sub_info, - memory, - depth + 1, - ) - lst.append(inner_py_obj) # type: ignore - pointer_int += size - else: - break - if py_type is str: - return "".join(lst) # type: ignore - else: - return lst - elif isinstance(c_obj, ctypes.Array): - assert isinstance(py_type, types.GenericAlias) and py_type.__origin__ is tuple and (py_type.__args__) - inner_py_type = py_type.__args__[0] - all(inner_py_type == arg for arg in py_type.__args__) - return list( - convert_c_obj_to_py_obj( - inner_c_obj, - inner_py_type, - info, - memory, - depth + 1, - ) - for inner_c_obj in c_obj - ) - elif isinstance(c_obj, ctypes.Structure): - if not dataclasses.is_dataclass(py_type): - raise TypeError(f"If {type(c_obj)} is a struct, then {py_type} should be a dataclass") - fields = dict[str, typing.Any]() - for py_field in dataclasses.fields(py_type): - if verbose: - print(depth * " ", py_field.name) - fields[py_field.name] = convert_c_obj_to_py_obj( - getattr(c_obj, py_field.name), - py_field.type, - None if info is None else info(fields, py_field.name), - memory, - depth + 1, - ) - return py_type(**fields) # type: ignore - elif isinstance(c_obj, ctypes.Union): - if not dataclasses.is_dataclass(py_type): - raise TypeError(f"If {type(c_obj)} is a union, then {py_type} should be a dataclass") - for field in dataclasses.fields(py_type): - if field.name == info[0]: - break - else: - raise KeyError(f"No field {info[0]} in {[field.name for field in dataclasses.fields(py_type)]}") - return convert_c_obj_to_py_obj( - getattr(c_obj, info[0]), - field.type, - info[1], - memory, - depth + 1, - ) - elif isinstance(c_obj, ctypes._SimpleCData): - if isinstance(py_type, enum.EnumType): - assert isinstance(c_obj.value, int) - return py_type(c_obj.value) # type: ignore - elif py_type is str: - assert isinstance(c_obj, ctypes.c_char) - return c_obj.value.decode() # type: ignore - else: - ret = c_obj.value - return _expect_type(py_type, ret) # type: ignore - elif isinstance(c_obj, py_type): - return c_obj # type: ignore - elif isinstance(c_obj, int) and isinstance(py_type, enum.EnumType): - return py_type(c_obj) # type: ignore - else: - raise TypeError(f"{c_obj!r} of c_type {type(c_obj)!r} cannot be converted to py_type {py_type!r}") diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index 2646752c..73fdb67b 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -1,7 +1,8 @@ import typing import tarfile +from probe_py.generated.parser import ProvLog, parse_probe_log +from probe_py.generated.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op from . import analysis -from . import parse_probe_log import pathlib import networkx as nx # type: ignore import subprocess @@ -19,21 +20,21 @@ def test_diff_cmd() -> None: process_tree_prov_log = execute_command(command, 1) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = ['../flake.nix','../flake.lock'] + paths = [b'../flake.nix',b'../flake.lock'] dfs_edges = list(nx.dfs_edges(process_graph)) match_open_and_close_fd(dfs_edges, process_tree_prov_log, paths) - + def test_bash_in_bash() -> None: command = ["bash", "-c", "head ../flake.nix ; head ../flake.lock"] process_tree_prov_log = execute_command(command) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = ['../flake.nix', '../flake.lock'] + paths = [b'../flake.nix', b'../flake.lock'] process_file_map = {} dfs_edges = list(nx.dfs_edges(process_graph)) parent_process_id = dfs_edges[0][0][0] - process_file_map[paths[len(paths)-1]] = parent_process_id + process_file_map[b"../flake.lock"] = parent_process_id check_for_clone_and_open(dfs_edges, process_tree_prov_log, 1, process_file_map, paths) def test_bash_in_bash_pipe() -> None: @@ -41,7 +42,7 @@ def test_bash_in_bash_pipe() -> None: process_tree_prov_log = execute_command(command) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = ['../flake.nix','stdout'] + paths = [b'../flake.nix',b'stdout'] dfs_edges = list(nx.dfs_edges(process_graph)) check_for_clone_and_open(dfs_edges, process_tree_prov_log, len(paths), {}, paths) @@ -54,10 +55,10 @@ def test_pthreads() -> None: bfs_nodes = [node for layer in nx.bfs_layers(process_graph, root_node) for node in layer] dfs_edges = list(nx.dfs_edges(process_graph)) total_pthreads = 3 - paths = ['/tmp/0.txt', '/tmp/1.txt', '/tmp/2.txt'] + paths = [b'/tmp/0.txt', b'/tmp/1.txt', b'/tmp/2.txt'] check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) -def execute_command(command: list[str], return_code: int = 0) -> parse_probe_log.ProvLog: +def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input = pathlib.Path("probe_log") result = subprocess.run( ['./PROBE', 'record'] + (["--debug"] if DEBUG_LIBPROBE else []) + (["--make"] if REMAKE_LIBPROBE else []) + command, @@ -67,18 +68,16 @@ def execute_command(command: list[str], return_code: int = 0) -> parse_probe_log ) assert result.returncode == return_code assert input.exists() - probe_log_tar_obj = tarfile.open(input, "r") - process_tree_prov_log = parse_probe_log.parse_probe_log_tar(probe_log_tar_obj) - probe_log_tar_obj.close() + process_tree_prov_log = parse_probe_log(input) return process_tree_prov_log def check_for_clone_and_open( dfs_edges: typing.Sequence[tuple[Node, Node]], - process_tree_prov_log: parse_probe_log.ProvLog, + process_tree_prov_log: ProvLog, number_of_child_process: int, - process_file_map: dict[str, int], - paths: list[str], + process_file_map: dict[bytes, int], + paths: list[bytes], ) -> None: # to ensure files which are opened are closed file_descriptors = [] @@ -97,38 +96,38 @@ def check_for_clone_and_open( curr_node_op = get_op_from_provlog(process_tree_prov_log, curr_pid, curr_epoch_idx, curr_tid, curr_op_idx) if curr_node_op is not None: - curr_node_op = curr_node_op.data - if(isinstance(curr_node_op,parse_probe_log.CloneOp)): + curr_node_op_data = curr_node_op.data + if(isinstance(curr_node_op_data,CloneOp)): next_op = get_op_from_provlog(process_tree_prov_log, edge[1][0], edge[1][1], edge[1][2], edge[1][3]) if next_op is not None: - next_op = next_op.data - if isinstance(next_op,parse_probe_log.ExecOp): - assert edge[1][0] == curr_node_op.task_id - check_child_processes.append(curr_node_op.task_id) + next_op_data = next_op.data + if isinstance(next_op_data,ExecOp): + assert edge[1][0] == curr_node_op_data.task_id + check_child_processes.append(curr_node_op_data.task_id) continue - if isinstance(next_op,parse_probe_log.InitProcessOp): - assert edge[1][0] == curr_node_op.task_id - check_child_processes.append(curr_node_op.task_id) + if isinstance(next_op_data,InitProcessOp): + assert edge[1][0] == curr_node_op_data.task_id + check_child_processes.append(curr_node_op_data.task_id) continue - if isinstance(next_op,parse_probe_log.CloseOp) and edge[0][0]!=edge[1][0]: - assert edge[1][0] == curr_node_op.task_id - check_child_processes.append(curr_node_op.task_id) + if isinstance(next_op_data,CloseOp) and edge[0][0]!=edge[1][0]: + assert edge[1][0] == curr_node_op_data.task_id + check_child_processes.append(curr_node_op_data.task_id) continue if edge[1][3] == -1: continue current_child_process+=1 - check_wait.append(curr_node_op.task_id) + check_wait.append(curr_node_op_data.task_id) if len(paths)!=0: - process_file_map[paths[current_child_process-1]] = curr_node_op.task_id - elif(isinstance(curr_node_op,parse_probe_log.WaitOp)): - ret_pid = curr_node_op.task_id - wait_option = curr_node_op.options + process_file_map[paths[current_child_process-1]] = curr_node_op_data.task_id + elif(isinstance(curr_node_op_data,WaitOp)): + ret_pid = curr_node_op_data.task_id + wait_option = curr_node_op_data.options if wait_option == 0: assert ret_pid in check_wait check_wait.remove(ret_pid) - if(isinstance(curr_node_op,parse_probe_log.OpenOp)) and curr_node_op.ferrno == 0: - file_descriptors.append(curr_node_op.fd) - path = curr_node_op.path.path + if(isinstance(curr_node_op_data,OpenOp)) and curr_node_op_data.ferrno == 0: + file_descriptors.append(curr_node_op_data.fd) + path = curr_node_op_data.path.path if path in paths: if len(process_file_map.keys())!=0: # ensure the right cloned process has OpenOp for the path @@ -136,23 +135,24 @@ def check_for_clone_and_open( if curr_pid!=parent_process_id: assert curr_pid in check_child_processes check_child_processes.remove(curr_pid) - elif(isinstance(curr_node_op,parse_probe_log.CloseOp)): - fd = curr_node_op.low_fd + elif(isinstance(curr_node_op_data,CloseOp)): + fd = curr_node_op_data.low_fd if fd in reserved_file_descriptors: continue - if curr_node_op.ferrno != 0: + if curr_node_op_data.ferrno != 0: continue if fd in file_descriptors: file_descriptors.remove(fd) - elif(isinstance(curr_node_op,parse_probe_log.ExecOp)): + elif(isinstance(curr_node_op_data,ExecOp)): # check if stdout is read in right child process if(edge[1][3]==-1): continue next_init_op = get_op_from_provlog(process_tree_prov_log,curr_pid,1,curr_pid,0) if next_init_op is not None: - next_init_op = next_init_op.data - if next_init_op.program_name == 'tail': - assert process_file_map['stdout'] == curr_pid + next_init_op_data = next_init_op.data + assert isinstance(next_init_op_data, InitExecEpochOp) + if next_init_op_data.program_name == b'tail': + assert process_file_map[b'stdout'] == curr_pid check_child_processes.remove(curr_pid) # check number of cloneOps @@ -166,8 +166,8 @@ def check_for_clone_and_open( def match_open_and_close_fd( dfs_edges: typing.Sequence[tuple[Node, Node]], - process_tree_prov_log: parse_probe_log.ProvLog, - paths: list[str], + process_tree_prov_log: ProvLog, + paths: list[bytes], ) -> None: reserved_file_descriptors = [0, 1, 2] file_descriptors = set[int]() @@ -175,17 +175,17 @@ def match_open_and_close_fd( curr_pid, curr_epoch_idx, curr_tid, curr_op_idx = edge[0] curr_node_op = get_op_from_provlog(process_tree_prov_log, curr_pid, curr_epoch_idx, curr_tid, curr_op_idx) if curr_node_op is not None: - curr_node_op = curr_node_op.data - if(isinstance(curr_node_op,parse_probe_log.OpenOp)): - file_descriptors.add(curr_node_op.fd) - path = curr_node_op.path.path + curr_node_op_data = curr_node_op.data + if(isinstance(curr_node_op_data,OpenOp)): + file_descriptors.add(curr_node_op_data.fd) + path = curr_node_op_data.path.path if path in paths: paths.remove(path) - elif(isinstance(curr_node_op,parse_probe_log.CloseOp)): - fd = curr_node_op.low_fd + elif(isinstance(curr_node_op_data,CloseOp)): + fd = curr_node_op_data.low_fd if fd in reserved_file_descriptors: continue - if curr_node_op.ferrno != 0: + if curr_node_op_data.ferrno != 0: continue assert fd in file_descriptors file_descriptors.remove(fd) @@ -196,9 +196,9 @@ def match_open_and_close_fd( def check_pthread_graph( bfs_nodes: typing.Sequence[Node], dfs_edges: typing.Sequence[tuple[Node, Node]], - process_tree_prov_log: parse_probe_log.ProvLog, + process_tree_prov_log: ProvLog, total_pthreads: int, - paths: list[str], + paths: list[bytes], ) -> None: check_wait = [] process_file_map = {} @@ -211,14 +211,14 @@ def check_pthread_graph( for edge in dfs_edges: curr_pid, curr_epoch_idx, curr_tid, curr_op_idx = edge[0] curr_node_op = get_op_from_provlog(process_tree_prov_log, curr_pid, curr_epoch_idx, curr_tid, curr_op_idx) - if(isinstance(curr_node_op.data,parse_probe_log.CloneOp)): + if(isinstance(curr_node_op.data,CloneOp)): if edge[1][2] != curr_tid: continue check_wait.append(curr_node_op.data.task_id) if len(paths)!=0: process_file_map[paths[current_child_process]] = curr_node_op.data.task_id current_child_process+=1 - if isinstance(curr_node_op.data,parse_probe_log.WaitOp): + if isinstance(curr_node_op.data,WaitOp): ret_pid = curr_node_op.data.task_id wait_option = curr_node_op.data.options if wait_option == 0: @@ -229,7 +229,7 @@ def check_pthread_graph( for node in bfs_nodes: curr_pid, curr_epoch_idx, curr_tid, curr_op_idx = node curr_node_op = get_op_from_provlog(process_tree_prov_log, curr_pid, curr_epoch_idx, curr_tid, curr_op_idx) - if curr_node_op is not None and (isinstance(curr_node_op.data,parse_probe_log.OpenOp)): + if curr_node_op is not None and (isinstance(curr_node_op.data,OpenOp)): file_descriptors.add(curr_node_op.data.fd) path = curr_node_op.data.path.path print("open", curr_tid, curr_node_op.pthread_id, curr_node_op.data.fd) @@ -237,7 +237,7 @@ def check_pthread_graph( if len(process_file_map.keys())!=0 and parent_pthread_id!=curr_node_op.pthread_id: # ensure the right cloned process has OpenOp for the path assert process_file_map[path] == curr_node_op.pthread_id - elif curr_node_op is not None and (isinstance(curr_node_op.data, parse_probe_log.CloseOp)): + elif curr_node_op is not None and (isinstance(curr_node_op.data, CloseOp)): fd = curr_node_op.data.low_fd print("close", curr_tid, curr_node_op.pthread_id, curr_node_op.data.low_fd) if fd in reserved_file_descriptors: @@ -256,12 +256,12 @@ def check_pthread_graph( assert len(file_descriptors) == 0 def get_op_from_provlog( - process_tree_prov_log: parse_probe_log.ProvLog, + process_tree_prov_log: ProvLog, pid: int, exec_epoch_id: int, tid: int, op_idx: int, -) -> parse_probe_log.Op: +) -> Op: if op_idx == -1 or exec_epoch_id == -1: - return None + raise ValueError() return process_tree_prov_log.processes[pid].exec_epochs[exec_epoch_id].threads[tid].ops[op_idx] From db7a47d070a94fd624508566ce4f5b7b283abd36 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 15:51:26 -0500 Subject: [PATCH 11/18] Fix ruff check --- probe_src/python/probe_py/manual/test_probe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index 73fdb67b..a3c5c272 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -1,5 +1,4 @@ import typing -import tarfile from probe_py.generated.parser import ProvLog, parse_probe_log from probe_py.generated.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op from . import analysis From a16ccf0bc0a4377f320a16079ad0eeb37243afb2 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 18:28:03 -0500 Subject: [PATCH 12/18] Fix Mypy checks --- probe_src/python/probe_py/manual/test_probe.py | 2 +- setup_devshell.sh | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index a3c5c272..6ad484f5 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -60,7 +60,7 @@ def test_pthreads() -> None: def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input = pathlib.Path("probe_log") result = subprocess.run( - ['./PROBE', 'record'] + (["--debug"] if DEBUG_LIBPROBE else []) + (["--make"] if REMAKE_LIBPROBE else []) + command, + ['probe', 'record'] + (["--debug"] if DEBUG_LIBPROBE else []) + (["--make"] if REMAKE_LIBPROBE else []) + command, # capture_output=True, # text=True, check=False, diff --git a/setup_devshell.sh b/setup_devshell.sh index fc97f20c..348802d6 100644 --- a/setup_devshell.sh +++ b/setup_devshell.sh @@ -9,11 +9,10 @@ if [ ! -f flake.nix ]; then fi # Rust frontend uses CPATH to find libprobe headers -export CPATH="$(realpath ./probe_src/libprobe/include):$CPATH" +export CPATH="$PWD/probe_src/libprobe/include:$CPATH" # Rust CLI uses __PROBE_LIB to find libprobe binary -# build may not exist yet, so we realpath its parent. -export __PROBE_LIB="$(realpath ./probe_src/libprobe)/build" +export __PROBE_LIB="$PWD/probe_src/libprobe/build" # Ensure libprobe.so gets maked if [ ! -f $__PROBE_LIB/libprobe.so ]; then @@ -22,18 +21,20 @@ fi # Rust code uses PYGEN_OUTFILE to determine where to write this file. # TODO: Replace this with a static path, because it is never not this path. -export PYGEN_OUTFILE="$(realpath ./probe_src/probe_frontend/python/probe_py/generated/ops.py)" +export PYGEN_OUTFILE="$PWD/probe_src/probe_frontend/python/probe_py/generated/ops.py" # Ensure PROBE CLI gets built if [ ! -f probe_src/probe_frontend/target/release/probe ]; then echo -e "${red}Please run 'env -C probe_src/probe_frontend cargo build --release' to compile probe binary${clr}" fi -# Add PROBE CLI to path, noting that target/release may not exist yet -export PATH="$(realpath ./probe_src/probe_frontend)/target/release:$PATH" +# Add PROBE CLI to path +export PATH="$PWD/probe_src/probe_frontend/target/release:$PATH" # Add probe_py.generated to the Python path -export PYTHONPATH="$(realpath ./probe_src/probe_frontend/python):$PYTHONPATH" +export PYTHONPATH="$PWD/probe_src/probe_frontend/python:$PYTHONPATH" +export MYPYPATH="$PWD/probe_src/probe_frontend/python:$MYPYPATH" # Add probe_py.manual to the Python path -export PYTHONPATH="$(realpath ./probe_src/python):$PYTHONPATH" +export PYTHONPATH="$PWD/probe_src/python:$PYTHONPATH" +export MYPYPATH="$PWD/probe_src/python:$MYPYPATH" From ed0af19f45626e1ccf220d5f8e7835c3bb3dc1ee Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 30 Jul 2024 18:39:49 -0500 Subject: [PATCH 13/18] Fix tests (paritally) --- probe_src/python/probe_py/manual/test_probe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index 6ad484f5..ae7a155f 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -1,3 +1,4 @@ +import pytest import typing from probe_py.generated.parser import ProvLog, parse_probe_log from probe_py.generated.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op @@ -59,13 +60,17 @@ def test_pthreads() -> None: def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input = pathlib.Path("probe_log") + if input.exists(): + input.unlink() result = subprocess.run( ['probe', 'record'] + (["--debug"] if DEBUG_LIBPROBE else []) + (["--make"] if REMAKE_LIBPROBE else []) + command, # capture_output=True, # text=True, check=False, ) - assert result.returncode == return_code + # TODO: Discuss if PROBE should preserve the returncode. + # The Rust CLI currently does not + # assert result.returncode == return_code assert input.exists() process_tree_prov_log = parse_probe_log(input) return process_tree_prov_log From df345f996f46ed2b3de03e956e1044dae5dd4e94 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Fri, 2 Aug 2024 14:42:37 -0500 Subject: [PATCH 14/18] Fix devshell --- Justfile | 13 ++++++++----- flake.nix | 15 ++++++++++----- setup_devshell.sh | 29 +++++++++++++---------------- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/Justfile b/Justfile index fb39065e..c5907b94 100644 --- a/Justfile +++ b/Justfile @@ -17,9 +17,14 @@ check-mypy: mypy --strict --package probe_py.generated mypy --strict probe_src/libprobe -compile-libprobe: +compile-lib: make --directory=probe_src/libprobe all +compile-cli: + env --chdir=probe_src/probe_frontend cargo build --release + +compile: compile-lib compile-cli + test-ci: compile-libprobe make --directory=probe_src/tests/c all cd probe_src && python -m pytest . @@ -31,8 +36,6 @@ test-dev: compile-libprobe check-flake: nix flake check --all-systems -# TODO: checking the flake should do the tests and compilations... -# So we should probably remove those checks from the Just file -pre-commit: fix-format-nix fix-ruff check-mypy check-flake compile-libprobe test-dev +pre-commit: fix-format-nix fix-ruff compile-all check-mypy check-flake test-dev -on-push: check-format-nix check-ruff check-mypy check-flake compile-libprobe test-ci +on-push: check-format-nix check-ruff compile-all check-mypy check-flake test-ci diff --git a/flake.nix b/flake.nix index 3b084f24..9b13a368 100644 --- a/flake.nix +++ b/flake.nix @@ -107,16 +107,21 @@ buildInputs = oldAttrs.buildInputs ++ [ (python.withPackages (pypkgs: [ - python.pkgs.networkx - python.pkgs.pygraphviz - python.pkgs.pydot - python.pkgs.rich - python.pkgs.typer + # probe_py.manual runtime requirements + pypkgs.networkx + pypkgs.pygraphviz + pypkgs.pydot + pypkgs.rich + pypkgs.typer + # probe_py.manual "dev time" requirements pypkgs.psutil pypkgs.pytest pypkgs.mypy pypkgs.ipython + + # libprobe build time requirement + pypkgs.pycparser ])) # (export-and-rename python312-debug [["bin/python" "bin/python-dbg"]]) pkgs.which diff --git a/setup_devshell.sh b/setup_devshell.sh index 348802d6..82c07bf4 100644 --- a/setup_devshell.sh +++ b/setup_devshell.sh @@ -3,38 +3,35 @@ red='\033[0;31m' clr='\033[0m' -# Ensure `nix develop` was called from the root directory. -if [ ! -f flake.nix ]; then - echo -e "${red}Please cd to the project root before trying to enter the devShell ('nix develop').${clr}" -fi +project_root="$(dirname "$(realpath "${BASH_SOURCE[0]}")")" # Rust frontend uses CPATH to find libprobe headers -export CPATH="$PWD/probe_src/libprobe/include:$CPATH" +export CPATH="$project_root/probe_src/libprobe/include:$CPATH" # Rust CLI uses __PROBE_LIB to find libprobe binary -export __PROBE_LIB="$PWD/probe_src/libprobe/build" +export __PROBE_LIB="$project_root/probe_src/libprobe/build" # Ensure libprobe.so gets maked -if [ ! -f $__PROBE_LIB/libprobe.so ]; then - echo -e "${red}Please run 'make -C probe_src/libprobe all' to compile libprobe${clr}" +if [ ! -f "$__PROBE_LIB/libprobe.so" ]; then + echo -e "${red}Please run 'just compile-lib' to compile libprobe${clr}" fi # Rust code uses PYGEN_OUTFILE to determine where to write this file. # TODO: Replace this with a static path, because it is never not this path. -export PYGEN_OUTFILE="$PWD/probe_src/probe_frontend/python/probe_py/generated/ops.py" +export PYGEN_OUTFILE="$project_root/probe_src/probe_frontend/python/probe_py/generated/ops.py" # Ensure PROBE CLI gets built -if [ ! -f probe_src/probe_frontend/target/release/probe ]; then - echo -e "${red}Please run 'env -C probe_src/probe_frontend cargo build --release' to compile probe binary${clr}" +if [ ! -f $project_root/probe_src/probe_frontend/target/release/probe ]; then + echo -e "${red}Please run 'just compile-cli' to compile probe binary${clr}" fi # Add PROBE CLI to path -export PATH="$PWD/probe_src/probe_frontend/target/release:$PATH" +export PATH="$project_root/probe_src/probe_frontend/target/release:$PATH" # Add probe_py.generated to the Python path -export PYTHONPATH="$PWD/probe_src/probe_frontend/python:$PYTHONPATH" -export MYPYPATH="$PWD/probe_src/probe_frontend/python:$MYPYPATH" +export PYTHONPATH="$project_root/probe_src/probe_frontend/python:$PYTHONPATH" +export MYPYPATH="$project_root/probe_src/probe_frontend/python:$MYPYPATH" # Add probe_py.manual to the Python path -export PYTHONPATH="$PWD/probe_src/python:$PYTHONPATH" -export MYPYPATH="$PWD/probe_src/python:$MYPYPATH" +export PYTHONPATH="$project_root/probe_src/python:$PYTHONPATH" +export MYPYPATH="$project_root/probe_src/python:$MYPYPATH" From 6c2dde0459d4329ed970e645a2d3a9883fbb5ec6 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Fri, 2 Aug 2024 15:34:45 -0500 Subject: [PATCH 15/18] Updated tests --- .gitignore | 2 ++ Justfile | 24 ++++++++++---- probe_src/python/probe_py/manual/analysis.py | 25 ++++++++------- .../python/probe_py/manual/test_probe.py | 32 +++++++++++-------- probe_src/tasks.md | 29 ++++++++++------- 5 files changed, 67 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index bffe2260..db5b1270 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ **/.directory **/.Trash* **/desktop.ini + +probe_log diff --git a/Justfile b/Justfile index c5907b94..719c1392 100644 --- a/Justfile +++ b/Justfile @@ -12,6 +12,18 @@ check-ruff: #ruff format --check probe_src # TODO: uncomment ruff check probe_src +check-format-rust: + env --chdir probe_src/probe_frontend cargo fmt --check + +fix-format-rust: + env --chdir probe_src/probe_frontend cargo fmt + +check-clippy: + env --chdir probe_src/probe_frontend cargo clippy + +fix-clippy: + env --chdir probe_src/probe_frontend cargo clippy --fix --allow-staged + check-mypy: mypy --strict --package probe_py.manual mypy --strict --package probe_py.generated @@ -25,17 +37,15 @@ compile-cli: compile: compile-lib compile-cli -test-ci: compile-libprobe - make --directory=probe_src/tests/c all - cd probe_src && python -m pytest . +test-ci: compile-lib + python -m pytest . -test-dev: compile-libprobe - make --directory=probe_src/tests/c all +test-dev: compile-lib cd probe_src && python -m pytest . --failed-first --maxfail=1 check-flake: nix flake check --all-systems -pre-commit: fix-format-nix fix-ruff compile-all check-mypy check-flake test-dev +pre-commit: fix-format-nix fix-ruff fix-format-rust fix-clippy compile check-mypy test-dev -on-push: check-format-nix check-ruff compile-all check-mypy check-flake test-ci +on-push: check-format-nix check-ruff check-format-rust check-clippy compile check-mypy check-flake test-ci diff --git a/probe_src/python/probe_py/manual/analysis.py b/probe_src/python/probe_py/manual/analysis.py index 508e108c..e3218856 100644 --- a/probe_src/python/probe_py/manual/analysis.py +++ b/probe_src/python/probe_py/manual/analysis.py @@ -181,7 +181,7 @@ def get_last_pthread(pid: int, exid: int, target_pthread_id: int) -> list[Node]: elif op_data.ferrno == 0 and op_data.task_type == TaskType.TASK_PTHREAD: for dest in get_last_pthread(pid, exid, op_data.task_id): fork_join_edges.append((dest, node)) - elif isinstance(op, ExecOp): + elif isinstance(op_data, ExecOp): # Exec brings same pid, incremented exid, and main thread target = pid, exid + 1, pid exec_edges.append((node, first(*target))) @@ -264,7 +264,7 @@ def validate_hb_clones(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str] elif op.data.task_type == TaskType.TASK_ISO_C_THREAD and op.data.task_id == op1.iso_c_thread_id: break else: - ret.append(f"Could not find a successor for CloneOp {node} {TaskType(op.data.task_type).name} in the target thread") + ret.append(f"Could not find a successor for CloneOp {node} {TaskType(op.data.task_type).name} in the target thread/process/whatever") return ret @@ -301,18 +301,19 @@ def validate_hb_acyclic(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str def validate_hb_execs(provlog: ProvLog, process_graph: nx.DiGraph) -> list[str]: ret = list[str]() - for (node0, node1) in process_graph.edges: + for node0 in process_graph.nodes(): pid0, eid0, tid0, op0 = node0 - pid1, eid1, tid1, op1 = node1 op0 = prov_log_get_node(provlog, *node0) - op1 = prov_log_get_node(provlog, *node1) - if False: - pass - elif isinstance(op0.data, ExecOp): - if eid0 + 1 != eid1: - ret.append(f"ExecOp {node0} is followed by {node1}, whose exec epoch id should be {eid0 + 1}") - if not isinstance(op1.data, InitExecEpochOp): - ret.append(f"ExecOp {node0} is followed by {node1}, which is not InitExecEpoch") + if isinstance(op0.data, ExecOp): + for node1 in process_graph.successors(node0): + pid1, eid1, tid1, op1 = node1 + op1 = prov_log_get_node(provlog, *node1) + if isinstance(op1.data, InitExecEpochOp): + if eid0 + 1 != eid1: + ret.append(f"ExecOp {node0} is followed by {node1}, whose exec epoch id should be {eid0 + 1}") + break + else: + ret.append(f"ExecOp {node0} is not followed by an InitExecEpochOp.") return ret diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index ae7a155f..94497653 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -1,4 +1,3 @@ -import pytest import typing from probe_py.generated.parser import ProvLog, parse_probe_log from probe_py.generated.ops import OpenOp, CloneOp, ExecOp, InitProcessOp, InitExecEpochOp, CloseOp, WaitOp, Op @@ -13,50 +12,54 @@ REMAKE_LIBPROBE = False +project_root = pathlib.Path(__file__).resolve().parent.parent.parent.parent.parent + + def test_diff_cmd() -> None: - command = [ - 'diff', '../flake.nix', '../flake.lock' - ] + paths = [str(project_root / "flake.nix"), str(project_root / "flake.lock")] + command = ['diff', *paths] process_tree_prov_log = execute_command(command, 1) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = [b'../flake.nix',b'../flake.lock'] + path_bytes = [path.encode() for path in paths] dfs_edges = list(nx.dfs_edges(process_graph)) - match_open_and_close_fd(dfs_edges, process_tree_prov_log, paths) + match_open_and_close_fd(dfs_edges, process_tree_prov_log, path_bytes) def test_bash_in_bash() -> None: - command = ["bash", "-c", "head ../flake.nix ; head ../flake.lock"] + command = ["bash", "-c", f"head {project_root}/flake.nix ; head {project_root}/flake.lock"] process_tree_prov_log = execute_command(command) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) + print(analysis.digraph_to_pydot_string(process_tree_prov_log, process_graph)) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = [b'../flake.nix', b'../flake.lock'] + paths = [f'{project_root}/flake.nix'.encode(), f'{project_root}/flake.lock'.encode()] process_file_map = {} dfs_edges = list(nx.dfs_edges(process_graph)) parent_process_id = dfs_edges[0][0][0] - process_file_map[b"../flake.lock"] = parent_process_id + process_file_map[f"{project_root}/flake.lock".encode()] = parent_process_id + process_file_map[f"{project_root}/flake.nix".encode()] = parent_process_id check_for_clone_and_open(dfs_edges, process_tree_prov_log, 1, process_file_map, paths) def test_bash_in_bash_pipe() -> None: - command = ["bash", "-c", "head ../flake.nix | tail"] + command = ["bash", "-c", f"head {project_root}/flake.nix | tail"] process_tree_prov_log = execute_command(command) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) - paths = [b'../flake.nix',b'stdout'] + paths = [f'{project_root}/flake.nix'.encode(), b'stdout'] dfs_edges = list(nx.dfs_edges(process_graph)) check_for_clone_and_open(dfs_edges, process_tree_prov_log, len(paths), {}, paths) def test_pthreads() -> None: - process_tree_prov_log = execute_command(["./tests/c/createFile.exe"]) + process_tree_prov_log = execute_command([f"{project_root}/probe_src/tests/c/createFile.exe"]) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) - assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) + #assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) root_node = [n for n in process_graph.nodes() if process_graph.out_degree(n) > 0 and process_graph.in_degree(n) == 0][0] bfs_nodes = [node for layer in nx.bfs_layers(process_graph, root_node) for node in layer] dfs_edges = list(nx.dfs_edges(process_graph)) total_pthreads = 3 paths = [b'/tmp/0.txt', b'/tmp/1.txt', b'/tmp/2.txt'] - check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) + #check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input = pathlib.Path("probe_log") @@ -71,6 +74,7 @@ def execute_command(command: list[str], return_code: int = 0) -> ProvLog: # TODO: Discuss if PROBE should preserve the returncode. # The Rust CLI currently does not # assert result.returncode == return_code + assert result.returncode == 0 assert input.exists() process_tree_prov_log = parse_probe_log(input) return process_tree_prov_log diff --git a/probe_src/tasks.md b/probe_src/tasks.md index 89fff2d4..82087036 100644 --- a/probe_src/tasks.md +++ b/probe_src/tasks.md @@ -1,20 +1,20 @@ -a- [ ] Implement Rust CLI for record. Jenna is working on this. +- [x] Implement Rust CLI for record. Jenna is working on this. - The Rust wrapper should replace the functionality of `record` in the `./probe_py/cli.py`. It should output a language-neutral structure that can be parsed quickly later on. - [x] The Rust wrapper should exec the program in an environment with libprobe in `LD_PRELOAD`. - [x] The Rust wrapper should transcribe the C structs into a language-neutral format. - [x] Split "transcribing" from "running in PROBE". We should be able to do them in two steps. - - [ ] Parse the language-neutral format into a `ProvLogTree` in Python, replacing `./probe_py/parse_probe_log.py`. - - [ ] Make sure analysis code still runs. + - [x] Parse the language-neutral format into a `ProvLogTree` in Python, replacing `./probe_py/parse_probe_log.py`. + - [x] Make sure analysis code still runs. - [ ] Get GDB working. - - [ ] Compile statically. -- [ ] Write end-to-end-tests. End-to-end test should verify properties of the NetworkX graph returned by `provlog_to_digraph`. - - [ ] Check generic properties (Shofiya is working on this) - - [ ] The file descriptor used in CloseOp is one returned by a prior OpenOp (or a special file descriptor). - - [ ] Verify we aren't "missing" an Epoch ID, e.g., 0, 1, 3, 4 is missing 2. - - [ ] Verify that the TID returned by CloneOp is the same as the TID in the InitOp of the new thread. - - [ ] Verify that the TID returned by WaitOp is a TID previously returned by CloneOp. - - [ ] Verify the graph is acyclic and has one root. - - [ ] Put some of these checks in a function, and have that function be called by `PROBE analysis --check`. + - [x] Compile statically. +- [x] Write end-to-end-tests. End-to-end test should verify properties of the NetworkX graph returned by `provlog_to_digraph`. + - [x] Check generic properties (Shofiya is working on this) + - [x] The file descriptor used in CloseOp is one returned by a prior OpenOp (or a special file descriptor). + - [x] Verify we aren't "missing" an Epoch ID, e.g., 0, 1, 3, 4 is missing 2. + - [x] Verify that the TID returned by CloneOp is the same as the TID in the InitOp of the new thread. + - [x] Verify that the TID returned by WaitOp is a TID previously returned by CloneOp. + - [x] Verify the graph is acyclic and has one root. + - [x] Put some of these checks in a function, and have that function be called by `PROBE analysis --check`. - Note that the application may not close every file descriptor it opens; that would be considered a "sloppy" application, but it should still work in PROBE. - [x] Write a pthreads application for testing purposes (Saleha finished this). - [ ] Verify some properties of the pthreads application. @@ -29,6 +29,11 @@ a- [ ] Implement Rust CLI for record. Jenna is working on this. - [ ] Verify that this doesn't crash `sh -c "sh -c 'cat a ; cat b' ; sh -c 'cat d ; cat e'"` (in the past it did) - [ ] Continue along these lines one or two more cases. - [ ] Link with libbacktrace on `--debug` runs. +- [ ] Refactor some identifiers in codebase. + - [ ] prov_log_process_tree -> process_tree + - [ ] (pid, ex_id, tid, op_id) -> dataclass + - [ ] digraph, process_graph -> hb_graph +- Libprobe should identify which was the "root" process. - [ ] Write remote script wrappers - [ ] Write an SSH wrapper. Asif and Shofiya are working on this. - [ ] There should be a shell script named `ssh` that calls `./PROBE ssh `. From 372893f48541521afca39457ac000026a5bafe4a Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Fri, 2 Aug 2024 16:09:52 -0500 Subject: [PATCH 16/18] Restore tests --- Justfile | 4 +- .../python/probe_py/manual/test_probe.py | 10 +++-- probe_src/tasks.md | 39 +++++++++++-------- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/Justfile b/Justfile index 719c1392..1b7ae107 100644 --- a/Justfile +++ b/Justfile @@ -38,10 +38,10 @@ compile-cli: compile: compile-lib compile-cli test-ci: compile-lib - python -m pytest . + pytest probe_src test-dev: compile-lib - cd probe_src && python -m pytest . --failed-first --maxfail=1 + pytest probe_src --failed-first --maxfail=1 check-flake: nix flake check --all-systems diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index 94497653..8279d8d2 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -53,13 +53,13 @@ def test_bash_in_bash_pipe() -> None: def test_pthreads() -> None: process_tree_prov_log = execute_command([f"{project_root}/probe_src/tests/c/createFile.exe"]) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) - #assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) + assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) root_node = [n for n in process_graph.nodes() if process_graph.out_degree(n) > 0 and process_graph.in_degree(n) == 0][0] bfs_nodes = [node for layer in nx.bfs_layers(process_graph, root_node) for node in layer] dfs_edges = list(nx.dfs_edges(process_graph)) total_pthreads = 3 paths = [b'/tmp/0.txt', b'/tmp/1.txt', b'/tmp/2.txt'] - #check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) + check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input = pathlib.Path("probe_log") @@ -67,10 +67,12 @@ def execute_command(command: list[str], return_code: int = 0) -> ProvLog: input.unlink() result = subprocess.run( ['probe', 'record'] + (["--debug"] if DEBUG_LIBPROBE else []) + (["--make"] if REMAKE_LIBPROBE else []) + command, - # capture_output=True, - # text=True, + capture_output=True, + text=True, check=False, ) + print(result.stdout) + print(result.stderr) # TODO: Discuss if PROBE should preserve the returncode. # The Rust CLI currently does not # assert result.returncode == return_code diff --git a/probe_src/tasks.md b/probe_src/tasks.md index 82087036..c183a211 100644 --- a/probe_src/tasks.md +++ b/probe_src/tasks.md @@ -1,14 +1,14 @@ -- [x] Implement Rust CLI for record. Jenna is working on this. +- [x] Implement Rust CLI for record. Jenna finished this. - The Rust wrapper should replace the functionality of `record` in the `./probe_py/cli.py`. It should output a language-neutral structure that can be parsed quickly later on. - [x] The Rust wrapper should exec the program in an environment with libprobe in `LD_PRELOAD`. - [x] The Rust wrapper should transcribe the C structs into a language-neutral format. - [x] Split "transcribing" from "running in PROBE". We should be able to do them in two steps. - [x] Parse the language-neutral format into a `ProvLogTree` in Python, replacing `./probe_py/parse_probe_log.py`. - [x] Make sure analysis code still runs. - - [ ] Get GDB working. + - [x] Get GDB working. - [x] Compile statically. - [x] Write end-to-end-tests. End-to-end test should verify properties of the NetworkX graph returned by `provlog_to_digraph`. - - [x] Check generic properties (Shofiya is working on this) + - [x] Check generic properties Shofiya and Sam finished this. - [x] The file descriptor used in CloseOp is one returned by a prior OpenOp (or a special file descriptor). - [x] Verify we aren't "missing" an Epoch ID, e.g., 0, 1, 3, 4 is missing 2. - [x] Verify that the TID returned by CloneOp is the same as the TID in the InitOp of the new thread. @@ -33,7 +33,13 @@ - [ ] prov_log_process_tree -> process_tree - [ ] (pid, ex_id, tid, op_id) -> dataclass - [ ] digraph, process_graph -> hb_graph -- Libprobe should identify which was the "root" process. + - [ ] Reformat Nix and Python + - [ ] Reformat repository layout + - [ ] `probe_src` -> `src` or just `/` (moving children up a level + - [ ] `probe_frontend` -> `rust`, and renaming the packages in it to `cli`, `macros`, and `pygen` + - [ ] Move tests to root level? + - [ ] Distinguish between unit-tests and end-to-end tests + - [ ] Ensure Arena tests, struct_parser tests, and c tests are being compiled and exercised. Currently, I don't think the c tests are being compiled. Should pytest runner compile them or Justfile? Clang-tidy should cover them. - [ ] Write remote script wrappers - [ ] Write an SSH wrapper. Asif and Shofiya are working on this. - [ ] There should be a shell script named `ssh` that calls `./PROBE ssh `. @@ -59,16 +65,14 @@ - [x] Check Python code with Mypy. - [x] Run tests on the current machine. - [x] Write a CI script that uses Nix to install dependencies and run the Justfiles. - - [ ] Check (not format) code in Alejandra and Black. - - [ ] Reformat all Python code in Black. - - [ ] Figure out why tests don't work. + - [x] Check (not format) code in Alejandra and Black. + - [x] Figure out why tests don't work. - [ ] Run tests in an Ubuntu Docker container. - [ ] Run tests in a really old Ubuntu Docker container. -- [ ] Write microbenchmarking - - [ ] Run performance test-cases in two steps: one with just libprobe record and one with just transcription. (3 new CLI entrypoints, described in comments in CLI.py) + - [ ] Figure out how to intelligently combine Nix checks, Just checks, and GitHub CI checks, so we aren't duplicating checks. +- [x] Write microbenchmarking + - [x] Run performance test-cases in two steps: one with just libprobe record and one with just transcription. (3 new CLI entrypoints, described in comments in CLI.py) - [ ] Write interesting performance tests, using `benchmark/workloads.py` as inspiration. - - [ ] Run the benchmarks with Hyperfine, in Containerexec, in a Python script, storing the result as a CSV. - - Python script that runs `hyperfine ./PROBE record --no-transcribe` and `hyperfine ./PROBE transcribe` for several tests. - [ ] Output conversions - [ ] From the NetworkX digraph, export (Shofiya is working on this): - [ ] A dataflow graph, showing only files, processes, and the flow of information between them. The following rules define when there is an edge: @@ -78,9 +82,11 @@ - [ ] [Process Run Crate](https://www.researchobject.org/workflow-run-crate/profiles/process_run_crate/) (Saleha is working on this) - [ ] [Common Workflow Language](https://www.commonwl.org/) - [ ] Write a test that runs the resulting CWL. + - [ ] Nextflow (Kyrilos is working on this) + - [ ] Write a test that runs the resulting CWL. - [ ] Makefile - [ ] Write a test that runs the resulting Makefile. - - [ ] LLM context prompt + - [ ] LLM context prompt (Kyrilos is working on this) - Build on the work of Nichole Bufford et al. - [ ] Consider how to combine provenance from multiple sources - [ ] Consider language-level sources like rdtlite @@ -92,15 +98,16 @@ - [x] Debug `createFile.c` crash while trying to `mkdir_and_descend`. - [x] Debug `gcc`. - [x] Add thread ID and pthread ID to op. - - [ ] Add Dup ops and debug `bash -c 'head foo > bar'` - - [ ] Compile Libprobe with static Musl instead of glibc. + - [ ] Libprobe should identify which was the "root" process. + - [ ] Add Dup ops and debug `bash -c 'head foo > bar'` (branch add-new-ops). Sam is working on this + - [ ] Libprobe should detect Musl vs Glibc at runtime. - [ ] Put magic bytes in arena - [ ] Unify the Arenas. - [ ] Try to break it. Jenna has some input on this. - [ ] Add interesting cases to tests. - [ ] Add more Ops (see branch add-new-ops) -- [ ] Sort dirents (see branch sort-dirents) -- [ ] Generate a replay package. +- [ ] Sort dirents (see branch sort-dirents). Sam is working on this +- [ ] Generate a replay package (see branch generate-replay-package). Sam is working on this - [ ] Should be activated by a flag: `./PROBE record --with-replay` - [ ] Should copy all read files into the probe log. - [ ] Should export the PROBE log to the following formats with a CWL script: From 3b722397e4004b86dcd479cb005a9f3403a95e3c Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Sat, 3 Aug 2024 19:01:28 -0500 Subject: [PATCH 17/18] Fix tests by starting at root node --- probe_src/python/probe_py/manual/test_probe.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/probe_src/python/probe_py/manual/test_probe.py b/probe_src/python/probe_py/manual/test_probe.py index 8279d8d2..9855a57a 100644 --- a/probe_src/python/probe_py/manual/test_probe.py +++ b/probe_src/python/probe_py/manual/test_probe.py @@ -30,11 +30,11 @@ def test_bash_in_bash() -> None: command = ["bash", "-c", f"head {project_root}/flake.nix ; head {project_root}/flake.lock"] process_tree_prov_log = execute_command(command) process_graph = analysis.provlog_to_digraph(process_tree_prov_log) - print(analysis.digraph_to_pydot_string(process_tree_prov_log, process_graph)) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) paths = [f'{project_root}/flake.nix'.encode(), f'{project_root}/flake.lock'.encode()] process_file_map = {} - dfs_edges = list(nx.dfs_edges(process_graph)) + start_node = [node for node, degree in process_graph.in_degree() if degree == 0][0] + dfs_edges = list(nx.dfs_edges(process_graph,source=start_node)) parent_process_id = dfs_edges[0][0][0] process_file_map[f"{project_root}/flake.lock".encode()] = parent_process_id process_file_map[f"{project_root}/flake.nix".encode()] = parent_process_id @@ -46,7 +46,8 @@ def test_bash_in_bash_pipe() -> None: process_graph = analysis.provlog_to_digraph(process_tree_prov_log) assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) paths = [f'{project_root}/flake.nix'.encode(), b'stdout'] - dfs_edges = list(nx.dfs_edges(process_graph)) + start_node = [node for node, degree in process_graph.in_degree() if degree == 0][0] + dfs_edges = list(nx.dfs_edges(process_graph,source=start_node)) check_for_clone_and_open(dfs_edges, process_tree_prov_log, len(paths), {}, paths) @@ -56,7 +57,8 @@ def test_pthreads() -> None: assert not analysis.validate_hb_graph(process_tree_prov_log, process_graph) root_node = [n for n in process_graph.nodes() if process_graph.out_degree(n) > 0 and process_graph.in_degree(n) == 0][0] bfs_nodes = [node for layer in nx.bfs_layers(process_graph, root_node) for node in layer] - dfs_edges = list(nx.dfs_edges(process_graph)) + root_node = [n for n in process_graph.nodes() if process_graph.out_degree(n) > 0 and process_graph.in_degree(n) == 0][0] + dfs_edges = list(nx.dfs_edges(process_graph,source=root_node)) total_pthreads = 3 paths = [b'/tmp/0.txt', b'/tmp/1.txt', b'/tmp/2.txt'] check_pthread_graph(bfs_nodes, dfs_edges, process_tree_prov_log, total_pthreads, paths) From 0f5fb02c55d2f5569b60add40c873439af3acf40 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Sat, 3 Aug 2024 19:12:07 -0500 Subject: [PATCH 18/18] Compile tests before running them --- Justfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Justfile b/Justfile index 1b7ae107..185d9147 100644 --- a/Justfile +++ b/Justfile @@ -35,7 +35,10 @@ compile-lib: compile-cli: env --chdir=probe_src/probe_frontend cargo build --release -compile: compile-lib compile-cli +compile-tests: + make --directory=probe_src/tests/c all + +compile: compile-lib compile-cli compile-tests test-ci: compile-lib pytest probe_src