feat: Results analysis

holochain · Sep 30, 2024 · 9365e09 · 9365e09
1 parent 84a5576
commit 9365e09
Show file tree

Hide file tree

Showing 14 changed files with 241 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -30,3 +30,4 @@ logs/**/*.log
 *.key
 /data
 scenarios/*/logs
+run_summary.jsonl
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,6 +11,8 @@ members = [
   "bindings/trycp_client",
   "bindings/trycp_runner",
 
+  "summariser",
+
   "happ_builder",
 
   "scenarios/app_install",
@@ -41,7 +43,7 @@ members = [
   "zomes/countersigning/coordinator",
   "zomes/countersigning/integrity",
   "zomes/validated/coordinator",
-  "zomes/validated/integrity",
+  "zomes/validated/integrity", "framework/summary_model",
 ]
 
 # By default, don't build the scenarios or zomes.
@@ -58,6 +60,8 @@ default-members = [
   "bindings/trycp_client",
   "bindings/trycp_runner",
 
+  "summariser",
+
   "happ_builder",
 ]
 
@@ -73,6 +77,7 @@ opentelemetry_api = { version = "0.20.0", features = ["metrics"] }
 toml = "0.8.10"
 serde = "1"
 serde_yaml = "0.9"
+serde_json = "1"
 walkdir = "2"
 sysinfo = "0.31"
 url = "2.5.0"
@@ -89,6 +94,7 @@ tokio-tungstenite = "0.21"
 rmp-serde = "1"
 rand = "0.8"
 ed25519-dalek = "2.1"
+itertools = "0.13"
 
 # Deps for Holochain
 holochain_client = { version = "=0.6.0-dev.10" }
@@ -111,6 +117,7 @@ wind_tunnel_core = { path = "./framework/core", version = "0.3.0-alpha.1" }
 wind_tunnel_instruments = { path = "./framework/instruments", version = "0.3.0-alpha.1" }
 wind_tunnel_instruments_derive = { path = "./framework/instruments_derive", version = "0.3.0-alpha.1" }
 wind_tunnel_runner = { path = "./framework/runner", version = "0.3.0-alpha.1" }
+wind_tunnel_summary_model = { path = "./framework/summary_model", version = "0.3.0-alpha.1" }
 
 # Bindings
 holochain_client_instrumented = { path = "./bindings/client", version = "0.3.0-alpha.1" }

diff --git a/bindings/trycp_runner/src/definition.rs b/bindings/trycp_runner/src/definition.rs
@@ -21,6 +21,10 @@ impl<RV: UserValuesConstraint, V: UserValuesConstraint> TryCPScenarioDefinitionB
     /// Once the TryCP customisations have been made, use this function to switch back to
     /// configuring default properties for the scenario.
     pub fn into_std(self) -> ScenarioDefinitionBuilder<RV, V> {
+        // These environment variables are common to TryCP tests. Always capture them and just let
+        // scenarios add any that are custom.
         self.inner
+            .add_capture_env("CONDUCTOR_CONFIG")
+            .add_capture_env("MIN_PEERS")
     }
 }
diff --git a/conductor-config-ci.yaml b/conductor-config-ci.yaml
@@ -4,3 +4,7 @@ network:
     - type: webrtc
       signal_url: "ws://localhost:4423"
   bootstrap_service: "http://localhost:4422"
+dpki:
+  dna_path: ~
+  network_seed: wind-tunnel-test
+  no_dpki: true
diff --git a/framework/runner/Cargo.toml b/framework/runner/Cargo.toml
@@ -23,6 +23,7 @@ nanoid = { workspace = true }
 
 wind_tunnel_core = { workspace = true }
 wind_tunnel_instruments = { workspace = true }
+wind_tunnel_summary_model = { workspace = true }
 
 [lints]
 workspace = true
diff --git a/framework/runner/src/definition.rs b/framework/runner/src/definition.rs
@@ -24,6 +24,7 @@ pub struct ScenarioDefinitionBuilder<RV: UserValuesConstraint, V: UserValuesCons
     cli: WindTunnelScenarioCli,
     default_agent_count: Option<usize>,
     default_duration_s: Option<u64>,
+    capture_env: HashSet<String>,
     setup_fn: Option<GlobalHookMut<RV>>,
     setup_agent_fn: Option<AgentHookMut<RV, V>>,
     agent_behaviour: HashMap<String, AgentHookMut<RV, V>>,
@@ -42,6 +43,7 @@ pub struct ScenarioDefinition<RV: UserValuesConstraint, V: UserValuesConstraint>
     pub(crate) assigned_behaviours: Vec<AssignedBehaviour>,
     pub(crate) duration_s: Option<u64>,
     pub(crate) connection_string: String,
+    pub(crate) capture_env: HashSet<String>,
     pub(crate) no_progress: bool,
     pub(crate) reporter: ReporterOpt,
     pub(crate) setup_fn: Option<GlobalHookMut<RV>>,
@@ -92,6 +94,7 @@ impl<RV: UserValuesConstraint, V: UserValuesConstraint> ScenarioDefinitionBuilde
             cli,
             default_agent_count: None,
             default_duration_s: None,
+            capture_env: HashSet::with_capacity(0),
             setup_fn: None,
             setup_agent_fn: None,
             agent_behaviour: HashMap::new(),
@@ -116,6 +119,11 @@ impl<RV: UserValuesConstraint, V: UserValuesConstraint> ScenarioDefinitionBuilde
         self
     }
 
+    pub fn add_capture_env(mut self, key: &str) -> Self {
+        self.capture_env.insert(key.to_string());
+        self
+    }
+
     /// Sets the global setup hook for this scenario. It will be run once, before any agents are started.
     pub fn use_setup(mut self, setup_fn: GlobalHookMut<RV>) -> Self {
         self.setup_fn = Some(setup_fn);
@@ -205,6 +213,7 @@ impl<RV: UserValuesConstraint, V: UserValuesConstraint> ScenarioDefinitionBuilde
             assigned_behaviours: build_assigned_behaviours(&self.cli, resolved_agent_count)?,
             duration_s: resolved_duration,
             connection_string: self.cli.connection_string,
+            capture_env: self.capture_env,
             no_progress: self.cli.no_progress,
             reporter: self.cli.reporter,
             setup_fn: self.setup_fn,

diff --git a/framework/runner/src/run.rs b/framework/runner/src/run.rs
@@ -1,12 +1,12 @@
 use std::path::PathBuf;
 use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
 
 use anyhow::Context;
 use wind_tunnel_core::prelude::{AgentBailError, ShutdownHandle, ShutdownSignalError};
 use wind_tunnel_instruments::ReportConfig;
-
+use wind_tunnel_summary_model::append_run_summary;
 use crate::cli::ReporterOpt;
 use crate::monitor::start_monitor;
 use crate::progress::start_progress;
@@ -25,6 +25,22 @@ pub fn run<RV: UserValuesConstraint, V: UserValuesConstraint>(
 
     let definition = definition.build()?;
 
+    // Create the summary for the run. This is used to link the run with the report and build a
+    // summary from the metrics after the run has completed.
+    let mut summary = wind_tunnel_summary_model::RunSummary::new(
+        run_id.clone(),
+        definition.name.clone(),
+        SystemTime::now(),
+        definition.duration_s.clone(),
+        definition.assigned_behaviours.iter().map(|b| b.agent_count).sum(),
+        definition.assigned_behaviours.iter().map(|b| (b.behaviour_name.clone(), b.agent_count)).collect(),
+    );
+    for capture in &definition.capture_env {
+        if let Ok(value) = std::env::var(capture) {
+            summary.add_env(capture.clone(), value);
+        }
+    }
+
     log::info!("Running scenario: {}", definition.name);
 
     let runtime = tokio::runtime::Runtime::new().context("Failed to create Tokio runtime")?;
@@ -214,6 +230,11 @@ pub fn run<RV: UserValuesConstraint, V: UserValuesConstraint>(
     // Then wait for the reporting to finish
     runner_context_for_teardown.reporter().finalize();
 
+    summary.set_peer_end_count(agents_run_to_completion.load(std::sync::atomic::Ordering::Acquire));
+    if let Err(e) = append_run_summary(summary, PathBuf::from("run_summary.jsonl")) {
+        log::error!("Failed to append run summary: {:?}", e);
+    }
+
     println!("#RunId: [{}]", run_id);
 
     Ok(agents_run_to_completion.load(std::sync::atomic::Ordering::Acquire))

diff --git a/framework/summary_model/Cargo.toml b/framework/summary_model/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "wind_tunnel_summary_model"
+version = "0.3.0-alpha.1"
+description = "The Wind Tunnel summary model"
+license = "MIT"
+authors = ["ThetaSinner"]
+edition = "2021"
+categories = ["development-tools::testing", "development-tools::profiling"]
+homepage = "https://github.com/holochain/wind-tunnel"
+repository = "https://github.com/holochain/wind-tunnel"
+
+[dependencies]
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+anyhow = { workspace = true }
+
+[lints]
+workspace = true
diff --git a/framework/summary_model/src/lib.rs b/framework/summary_model/src/lib.rs
@@ -0,0 +1,119 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::io::{BufRead, Write};
+use std::path::PathBuf;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+/// Summary of a run
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RunSummary {
+    /// The unique run id
+    ///
+    /// Chosen by the runner. Unique for each run.
+    pub run_id: String,
+    /// The name of the scenario that was run
+    pub scenario_name: String,
+    /// The time the run started
+    ///
+    /// This is a Unix timestamp in seconds.
+    pub started_at: u64,
+    /// The duration that the run was configured with, in seconds
+    ///
+    /// If the run was configured for soak testing, then this will not be set.
+    ///
+    /// It is possible that the run finished sooner than `started_at + run_duration` if all
+    /// behaviours failed. As long as [RunSummary::peer_end_count] is greater than 0 then that
+    /// number of agents will have run for the full duration.
+    pub run_duration: Option<u64>,
+    /// The number of peers configured
+    ///
+    /// This is the number of peers that were either configured or required by the behaviour
+    /// configuration.
+    pub peer_count: usize,
+    /// The number of peers at the end of the test
+    ///
+    /// If some peers exit early, for example due to a fatal error during a behaviour run or an
+    /// unavailable conductor, then this will be less than [RunSummary::peer_count].
+    pub peer_end_count: usize,
+    /// The behaviour configuration
+    ///
+    /// This is the number of agents that were assigned to each behaviour.
+    pub behaviours: HashMap<String, usize>,
+    /// Environment variables set for the run
+    ///
+    /// This won't capture all environment variables. Just the ones that the runner is aware of or
+    /// that are included by the scenario itself.
+    pub env: HashMap<String, String>,
+}
+
+impl RunSummary {
+    /// Create a new run summary
+    pub fn new(
+        run_id: String,
+        scenario_name: String,
+        started_at: SystemTime,
+        run_duration: Option<u64>,
+        peer_count: usize,
+        behaviours: HashMap<String, usize>,
+    ) -> Self {
+        Self {
+            run_id,
+            scenario_name,
+            started_at: started_at
+                .duration_since(UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs(),
+            run_duration,
+            peer_count,
+            peer_end_count: 0,
+            behaviours,
+            env: HashMap::with_capacity(0),
+        }
+    }
+
+    /// Set the peer end count
+    pub fn set_peer_end_count(&mut self, peer_end_count: usize) {
+        self.peer_end_count = peer_end_count;
+    }
+
+    /// Add an environment variable
+    pub fn add_env(&mut self, key: String, value: String) {
+        self.env.insert(key, value);
+    }
+}
+
+/// Append the run summary to a file
+///
+/// The summary will be serialized to JSON and output as a single line followed by a newline. The
+/// recommended file extension is `.jsonl`.
+pub fn append_run_summary(run_summary: RunSummary, path: PathBuf) -> anyhow::Result<()> {
+    let mut file = std::fs::OpenOptions::new()
+        .append(true)
+        .create(true)
+        .open(path)?;
+    store_run_summary(run_summary, &mut file)?;
+    file.write("\n".as_bytes())?;
+    Ok(())
+}
+
+/// Serialize the run summary to a writer
+pub fn store_run_summary<W: Write>(run_summary: RunSummary, writer: &mut W) -> anyhow::Result<()> {
+    serde_json::to_writer(writer, &run_summary)?;
+    Ok(())
+}
+
+/// Load run summaries from a file
+///
+/// The file should contain one JSON object per line. This is the format produced by
+/// [append_run_summary].
+pub fn load_summary_runs(path: PathBuf) -> anyhow::Result<Vec<RunSummary>> {
+    let file = std::fs::File::open(path)?;
+    let reader = std::io::BufReader::new(file);
+    let mut runs = Vec::new();
+    for line in reader.lines() {
+        let line = line?;
+        let run: RunSummary = serde_json::from_str(&line)?;
+        runs.push(run);
+    }
+    Ok(runs)
+}
diff --git a/scenarios/validation_receipts/README.md b/scenarios/validation_receipts/README.md
@@ -21,7 +21,7 @@ You need around at least 10 peers, or the nodes will never get the required numb
 
 ### NO_VALIDATION_COMPLETE
 
-By default this scenario will wait for a complete set of validation receipts before moving on to commit the next record. If you want to publish new records on every round, building up an ever-growing list of action hashes to check for validation complete, run with the `NO_VALIDATION_COMPLETE=1` environment variable.
+By default, this scenario will wait for a complete set of validation receipts before moving on to commit the next record. If you want to publish new records on every round, building up an ever-growing list of action hashes to check for validation complete, run with the `NO_VALIDATION_COMPLETE=1` environment variable.
 
 Example:
 

diff --git a/scenarios/validation_receipts/src/main.rs b/scenarios/validation_receipts/src/main.rs
@@ -186,6 +186,7 @@ fn main() -> WindTunnelResult<()> {
         TryCPAgentContext<ScenarioValues>,
     >::new_with_init(env!("CARGO_PKG_NAME"))?
     .into_std()
+        .add_capture_env("NO_VALIDATION_COMPLETE")
     .use_agent_setup(agent_setup)
     .use_agent_behaviour(agent_behaviour)
     .use_agent_teardown(agent_teardown);

diff --git a/summariser/Cargo.toml b/summariser/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "holochain_summariser"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+influxdb = { workspace = true }
+itertools = { workspace = true }
+
+wind_tunnel_summary_model = { workspace = true }
+
+[lints]
+workspace = true
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,3 +30,4 @@ logs/**/*.log @@
     *.key
     /data
     scenarios/*/logs
+    run_summary.jsonl