From 219627f075108434bfa99fd15b965cb109729db2 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 14:43:16 +0200 Subject: [PATCH 01/10] Add code for wrapping the node metrics canister --- Cargo.Bazel.lock | 16 +++- Cargo.lock | 1 + Cargo.toml | 1 + rs/cli/src/commands/mod.rs | 8 ++ .../node_metrics/from_node_metrics.rs | 94 +++++++++++++++++++ .../node_metrics/from_subnet_management.rs | 79 ++++++++++++++++ rs/cli/src/commands/node_metrics/mod.rs | 42 +++++++++ .../canister_ids.json | 5 + .../src/trustworthy-node-metrics/Cargo.toml | 4 +- .../src/trustworthy-node-metrics/src/lib.rs | 2 +- rs/ic-canisters/Cargo.toml | 1 + rs/ic-canisters/src/lib.rs | 1 + rs/ic-canisters/src/node_metrics.rs | 53 +++++++++++ 13 files changed, 302 insertions(+), 5 deletions(-) create mode 100644 rs/cli/src/commands/node_metrics/from_node_metrics.rs create mode 100644 rs/cli/src/commands/node_metrics/from_subnet_management.rs create mode 100644 rs/cli/src/commands/node_metrics/mod.rs create mode 100644 rs/dre-canisters/trustworthy-node-metrics/canister_ids.json create mode 100644 rs/ic-canisters/src/node_metrics.rs diff --git a/Cargo.Bazel.lock b/Cargo.Bazel.lock index 9eaffcbc8..3576759aa 100644 --- a/Cargo.Bazel.lock +++ b/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "4a6bc91f724363b615928d9a2dba8822b5d9c444065dfa889058897281c98d8c", + "checksum": "176efb382cc5a1fbc48ef3cabec774254b046c828d01cf8d169d4ba58ddcc544", "crates": { "actix-codec 0.5.2": { "name": "actix-codec", @@ -45731,8 +45731,18 @@ "name": "trustworthy-node-metrics", "version": "0.4.3", "repository": null, - "targets": [], - "library_target_name": null, + "targets": [ + { + "Library": { + "crate_name": "trustworthy_node_metrics", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "trustworthy_node_metrics", "common_attrs": { "compile_data_glob": [ "**" diff --git a/Cargo.lock b/Cargo.lock index 35d44a268..9b4ca7c16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3438,6 +3438,7 @@ dependencies = [ "sha2 0.10.8", "simple_asn1", "thiserror", + "trustworthy-node-metrics", "url", ] diff --git a/Cargo.toml b/Cargo.toml index fcdf17984..20f21dd05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ cryptoki = "0.3.1" csv = "1.3.0" custom_error = "1.9.2" decentralization = { path = "rs/decentralization" } +trustworthy-node-metrics = { path = "rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics" } derive_builder = "0.20.0" derive_more = "0.99.18" dialoguer = "0.11.0" diff --git a/rs/cli/src/commands/mod.rs b/rs/cli/src/commands/mod.rs index f5b26d192..f57308f1c 100644 --- a/rs/cli/src/commands/mod.rs +++ b/rs/cli/src/commands/mod.rs @@ -10,6 +10,7 @@ use get::Get; use heal::Heal; use hostos::HostOsCmd; use ic_management_types::{MinNakamotoCoefficients, Network, NodeFeature}; +use node_metrics::NodeMetricsCmd; use nodes::Nodes; use proposals::Proposals; use propose::Propose; @@ -30,6 +31,7 @@ mod firewall; mod get; mod heal; pub mod hostos; +mod node_metrics; mod nodes; mod proposals; mod propose; @@ -123,6 +125,9 @@ pub enum Subcommands { /// Manage versions Version(VersionCmd), + /// Fetch node metrics + NodeMetrics(NodeMetricsCmd), + /// Manage hostos versions HostOs(HostOsCmd), @@ -257,6 +262,7 @@ impl ExecutableCommand for Args { Subcommands::Upgrade(c) => c.require_ic_admin(), Subcommands::Proposals(c) => c.require_ic_admin(), Subcommands::Completions(c) => c.require_ic_admin(), + Subcommands::NodeMetrics(c) => c.require_ic_admin(), } } @@ -279,6 +285,7 @@ impl ExecutableCommand for Args { Subcommands::Upgrade(c) => c.execute(ctx).await, Subcommands::Proposals(c) => c.execute(ctx).await, Subcommands::Completions(c) => c.execute(ctx).await, + Subcommands::NodeMetrics(c) => c.execute(ctx).await, } } @@ -301,6 +308,7 @@ impl ExecutableCommand for Args { Subcommands::Upgrade(c) => c.validate(cmd), Subcommands::Proposals(c) => c.validate(cmd), Subcommands::Completions(c) => c.validate(cmd), + Subcommands::NodeMetrics(c) => c.validate(cmd), } } } diff --git a/rs/cli/src/commands/node_metrics/from_node_metrics.rs b/rs/cli/src/commands/node_metrics/from_node_metrics.rs new file mode 100644 index 000000000..179e7a3bf --- /dev/null +++ b/rs/cli/src/commands/node_metrics/from_node_metrics.rs @@ -0,0 +1,94 @@ +use std::{ + collections::{btree_map::Entry, BTreeMap}, + sync::Mutex, +}; + +use clap::Args; +use futures::future::try_join_all; +use ic_canisters::{ + management::{NodeMetrics, NodeMetricsHistoryResponse}, + node_metrics::NodeMetricsCanisterWrapper, +}; +use ic_types::PrincipalId; +use itertools::Itertools; +use log::info; + +use super::{ExecutableCommand, IcAdminRequirement}; + +#[derive(Debug, Args)] +pub struct FromNodeMetrics { + /// Start at timestamp in nanoseconds, if empty will dump daily metrics + /// since May 18, 2024 + pub start_at_timestamp: u64, + + /// Vector of subnets to query, if empty will dump metrics for + /// all subnets + pub subnet_ids: Vec, +} + +impl ExecutableCommand for FromNodeMetrics { + fn require_ic_admin(&self) -> IcAdminRequirement { + IcAdminRequirement::Detect + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + let lock = Mutex::new(()); + let mut metrics_by_subnet = BTreeMap::new(); + + let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; + info!("Started action..."); + + let metrics_client = NodeMetricsCanisterWrapper::new(canister_agent.agent); + + let node_metrics_response = match &self.subnet_ids.is_empty() { + true => metrics_client.get_node_metrics(None, Some(self.start_at_timestamp)).await?, + false => { + let subnets = self.subnet_ids.clone(); + let metrics_client_ref = &metrics_client; + + try_join_all( + subnets + .into_iter() + .map(|subnet| async move { metrics_client_ref.get_node_metrics(Some(subnet), Some(self.start_at_timestamp)).await }), + ) + .await? + .into_iter() + .flatten() + .collect_vec() + } + }; + + for metrics in node_metrics_response { + let node_metrics_history = NodeMetricsHistoryResponse { + timestamp_nanos: metrics.ts, + node_metrics: metrics + .node_metrics + .into_iter() + .map(|m| NodeMetrics { + node_id: PrincipalId::from(m.node_id), + num_block_failures_total: m.num_block_failures_total, + num_blocks_proposed_total: m.num_blocks_proposed_total, + }) + .collect_vec(), + }; + + match metrics_by_subnet.entry(metrics.subnet_id) { + Entry::Occupied(mut entry) => { + let v: &mut Vec = entry.get_mut(); + v.push(node_metrics_history) + } + Entry::Vacant(entry) => { + entry.insert(vec![node_metrics_history]); + } + } + } + + metrics_by_subnet.values_mut().for_each(|f| f.sort_by_key(|k| k.timestamp_nanos)); + + println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); + + Ok(()) + } + + fn validate(&self, _cmd: &mut clap::Command) {} +} diff --git a/rs/cli/src/commands/node_metrics/from_subnet_management.rs b/rs/cli/src/commands/node_metrics/from_subnet_management.rs new file mode 100644 index 000000000..641fbe54d --- /dev/null +++ b/rs/cli/src/commands/node_metrics/from_subnet_management.rs @@ -0,0 +1,79 @@ +use std::{ + collections::BTreeMap, + str::FromStr, + sync::{Arc, Mutex}, +}; + +use clap::Args; +use ic_canisters::{management::WalletCanisterWrapper, registry::RegistryCanisterWrapper}; +use ic_types::{CanisterId, PrincipalId}; +use log::{info, warn}; + +use super::{ExecutableCommand, IcAdminRequirement}; + +#[derive(Debug, Args)] +pub struct FromSubnetManagement { + /// Wallet that should be used to query node metrics history + /// in form of canister id + pub wallet: String, + + /// Start at timestamp in nanoseconds + pub start_at_timestamp: u64, + + /// Vector of subnets to query, if empty will dump metrics for + /// all subnets + pub subnet_ids: Vec, +} + +impl ExecutableCommand for FromSubnetManagement { + fn require_ic_admin(&self) -> IcAdminRequirement { + IcAdminRequirement::Detect + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + let lock = Mutex::new(()); + let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; + info!("Started action..."); + let wallet_client = Arc::new(WalletCanisterWrapper::new(canister_agent.agent.clone())); + + let subnets = match &self.subnet_ids.is_empty() { + false => self.subnet_ids.clone(), + true => { + let registry_client = RegistryCanisterWrapper::new(canister_agent.agent); + registry_client.get_subnets().await? + } + }; + + let mut metrics_by_subnet = BTreeMap::new(); + info!("Running in parallel mode"); + + let wallet: CanisterId = CanisterId::from_str(&self.wallet)?; + + let handles = subnets + .into_iter() + .map(|s| (s, wallet_client.clone(), self.start_at_timestamp)) + .map(|(s, w, start)| { + info!("Spawning thread for subnet: {}", s); + tokio::spawn(async move { (s, w.get_node_metrics_history(wallet, start, s).await) }) + }); + + for handle in handles { + let (subnet, maybe_metrics) = handle.await?; + match maybe_metrics { + Ok(m) => { + info!("Received metrics for subnet: {}", subnet); + metrics_by_subnet.insert(subnet, m); + } + Err(e) => { + warn!("Couldn't fetch trustworthy metrics for subnet {}: {}", subnet, e); + } + }; + } + + println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); + + Ok(()) + } + + fn validate(&self, _cmd: &mut clap::Command) {} +} diff --git a/rs/cli/src/commands/node_metrics/mod.rs b/rs/cli/src/commands/node_metrics/mod.rs new file mode 100644 index 000000000..1c948521a --- /dev/null +++ b/rs/cli/src/commands/node_metrics/mod.rs @@ -0,0 +1,42 @@ +use super::{ExecutableCommand, IcAdminRequirement}; +use clap::{Args, Subcommand}; +use from_node_metrics::FromNodeMetrics; +use from_subnet_management::FromSubnetManagement; + +mod from_node_metrics; +mod from_subnet_management; + +#[derive(Args, Debug)] +pub struct NodeMetricsCmd { + #[clap(subcommand)] + pub subcommand: NodeMetricsCommand, +} + +#[derive(Subcommand, Debug)] +pub enum NodeMetricsCommand { + FromSubnetManagementCanister(FromSubnetManagement), + FromNodeMetricsCanister(FromNodeMetrics), +} + +impl ExecutableCommand for NodeMetricsCmd { + fn require_ic_admin(&self) -> IcAdminRequirement { + match &self.subcommand { + NodeMetricsCommand::FromNodeMetricsCanister(c) => c.require_ic_admin(), + NodeMetricsCommand::FromSubnetManagementCanister(c) => c.require_ic_admin(), + } + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + match &self.subcommand { + NodeMetricsCommand::FromNodeMetricsCanister(c) => c.execute(ctx).await, + NodeMetricsCommand::FromSubnetManagementCanister(c) => c.execute(ctx).await, + } + } + + fn validate(&self, cmd: &mut clap::Command) { + match &self.subcommand { + NodeMetricsCommand::FromNodeMetricsCanister(c) => c.validate(cmd), + NodeMetricsCommand::FromSubnetManagementCanister(c) => c.validate(cmd), + } + } +} diff --git a/rs/dre-canisters/trustworthy-node-metrics/canister_ids.json b/rs/dre-canisters/trustworthy-node-metrics/canister_ids.json new file mode 100644 index 000000000..801c55b3c --- /dev/null +++ b/rs/dre-canisters/trustworthy-node-metrics/canister_ids.json @@ -0,0 +1,5 @@ +{ + "trustworthy-node-metrics": { + "ic": "oqi72-gaaaa-aaaam-ac2pq-cai" + } +} \ No newline at end of file diff --git a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/Cargo.toml b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/Cargo.toml index 81a6d6fd2..dd4c49695 100644 --- a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/Cargo.toml +++ b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/Cargo.toml @@ -9,7 +9,9 @@ documentation.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] -crate-type = ["cdylib"] +crate-type = ["cdylib", "lib"] +name = "trustworthy_node_metrics" +path = "src/lib.rs" [dependencies] candid = { workspace = true } diff --git a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/src/lib.rs b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/src/lib.rs index 7143d3eac..557c34c05 100644 --- a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/src/lib.rs +++ b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/src/lib.rs @@ -4,7 +4,7 @@ use std::time::Duration; use types::{SubnetNodeMetricsArgs, SubnetNodeMetricsResponse}; mod metrics_manager; mod stable_memory; -mod types; +pub mod types; // Management canisters updates node metrics every day const TIMER_INTERVAL_SEC: u64 = 60 * 60 * 24; diff --git a/rs/ic-canisters/Cargo.toml b/rs/ic-canisters/Cargo.toml index 05076e834..b0f110637 100644 --- a/rs/ic-canisters/Cargo.toml +++ b/rs/ic-canisters/Cargo.toml @@ -33,3 +33,4 @@ simple_asn1 = { workspace = true } thiserror = { workspace = true } url = { workspace = true } ic-sns-wasm = { workspace = true } +trustworthy-node-metrics = { workspace = true } diff --git a/rs/ic-canisters/src/lib.rs b/rs/ic-canisters/src/lib.rs index 9a751242c..c15f20018 100644 --- a/rs/ic-canisters/src/lib.rs +++ b/rs/ic-canisters/src/lib.rs @@ -20,6 +20,7 @@ use url::Url; pub mod governance; pub mod management; +pub mod node_metrics; pub mod parallel_hardware_identity; pub mod registry; pub mod sns_wasm; diff --git a/rs/ic-canisters/src/node_metrics.rs b/rs/ic-canisters/src/node_metrics.rs new file mode 100644 index 000000000..69b415a1b --- /dev/null +++ b/rs/ic-canisters/src/node_metrics.rs @@ -0,0 +1,53 @@ +use std::str::FromStr; + +use candid::{Decode, Encode}; +use ic_agent::Agent; +use ic_base_types::{CanisterId, PrincipalId}; +use log::error; +use trustworthy_node_metrics::types::{SubnetNodeMetricsArgs, SubnetNodeMetricsResponse}; + +use crate::IcAgentCanisterClient; + +const NODE_METRICS_CANISTER: &str = "oqi72-gaaaa-aaaam-ac2pq-cai"; + +pub struct NodeMetricsCanisterWrapper { + agent: Agent, + node_metrics_canister: CanisterId, +} + +impl From for NodeMetricsCanisterWrapper { + fn from(value: IcAgentCanisterClient) -> Self { + NodeMetricsCanisterWrapper::new(value.agent) + } +} + +impl NodeMetricsCanisterWrapper { + pub fn new(agent: Agent) -> Self { + Self { + agent, + node_metrics_canister: CanisterId::from_str(NODE_METRICS_CANISTER).unwrap(), + } + } + + pub async fn get_node_metrics(&self, subnet_id: Option, from_ts: Option) -> anyhow::Result> { + let request = SubnetNodeMetricsArgs { + ts: from_ts, + subnet_id: subnet_id.map(|s| s.0), + }; + + let response = self + .agent + .query(&self.node_metrics_canister.into(), "subnet_node_metrics") + .with_arg(Encode!(&request)?) + .call() + .await?; + + match Decode!(&response, Result, String>)? { + Ok(result) => Ok(result), + Err(err) => { + error!("Failed to decode Node Metrics"); + Err(anyhow::anyhow!(err)) + } + } + } +} From e270c9eb5ca7eb9c99947cc1c45dad5441742693 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 14:44:50 +0200 Subject: [PATCH 02/10] Fix typo --- rs/cli/src/commands/node_metrics/from_node_metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/commands/node_metrics/from_node_metrics.rs b/rs/cli/src/commands/node_metrics/from_node_metrics.rs index 179e7a3bf..4ac3d3d4f 100644 --- a/rs/cli/src/commands/node_metrics/from_node_metrics.rs +++ b/rs/cli/src/commands/node_metrics/from_node_metrics.rs @@ -17,7 +17,7 @@ use super::{ExecutableCommand, IcAdminRequirement}; #[derive(Debug, Args)] pub struct FromNodeMetrics { - /// Start at timestamp in nanoseconds, if empty will dump daily metrics + /// Start at timestamp in nanoseconds, if 0 it will dump daily metrics /// since May 18, 2024 pub start_at_timestamp: u64, From 639615828cfe986737b795fb74ac4e2cf33d6544 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 15:46:05 +0200 Subject: [PATCH 03/10] Added docs for retrieving untrusted metrics --- Cargo.Bazel.lock | 14 ++++- .../trustworthy-metrics.md | 6 +- docs/trustworthy-metrics/untrusted-metrics.md | 62 +++++++++++++++++++ 3 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 docs/trustworthy-metrics/untrusted-metrics.md diff --git a/Cargo.Bazel.lock b/Cargo.Bazel.lock index 96c0b71b1..bbe594131 100644 --- a/Cargo.Bazel.lock +++ b/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "f3c6195e6654db56b4c772ec401cc5689f9b0fa6466d7c0fc6543302e0774321", + "checksum": "21560a1c6cf60a5df7d6ab8a9c786242587fcc03a6171df0fc1574fc13c8911c", "crates": { "actix-codec 0.5.2": { "name": "actix-codec", @@ -28369,29 +28369,35 @@ ], "crate_features": { "common": [ - "elf", - "errno", "general", "ioctl", "no_std" ], "selects": { "aarch64-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" ], "arm-unknown-linux-gnueabi": [ + "elf", + "errno", "prctl", "std", "system" ], "armv7-unknown-linux-gnueabi": [ + "elf", + "errno", "prctl", "std", "system" ], "i686-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" @@ -28407,6 +28413,8 @@ "system" ], "x86_64-unknown-linux-gnu": [ + "elf", + "errno", "prctl", "std", "system" diff --git a/docs/trustworthy-metrics/trustworthy-metrics.md b/docs/trustworthy-metrics/trustworthy-metrics.md index 98182a69c..f8c9c0759 100644 --- a/docs/trustworthy-metrics/trustworthy-metrics.md +++ b/docs/trustworthy-metrics/trustworthy-metrics.md @@ -92,7 +92,7 @@ You can obtain the DRE tool by following the instructions from [getting started] To test out the command you can run the following command ```bash -dre trustworthy-metrics [...] +dre node-metrics from-subnet-management-canister [...] ``` ??? tip "Explanation of the arguments" @@ -149,12 +149,12 @@ Authentication with a private key is recommended, since it allows for more paral Here are some real-world examples of how metrics can be retrieved: ```bash -dre --private-key-pem identity.pem trustworthy-metrics nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --private-key-pem identity.pem node-metrics from-subnet-management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` Or with an HSM: ```bash -dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" trustworthy-metrics nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" node-metrics from-subnet-management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` You can check some examples of the analytics possible with the IC Mainnet data in the following [Jupyter Notebook](./TrustworthyMetricsAnalytics.ipynb) diff --git a/docs/trustworthy-metrics/untrusted-metrics.md b/docs/trustworthy-metrics/untrusted-metrics.md new file mode 100644 index 000000000..14cc2ff7a --- /dev/null +++ b/docs/trustworthy-metrics/untrusted-metrics.md @@ -0,0 +1,62 @@ + +# Get untrusted metrics from Node Metrics canister + +## Introduction + +Untrusted Node Metrics retrieval offers an alternative approach to accessing node performance data, relying on a canister that collects these metrics instead of quering the management canister of each subnet directly. + +This method allows users to fetch node metrics dating back to May 18, 2024, providing an historical view compared to the trustworthy method, which only offers data from the past month. + +The key drawback of quering untrusted metrics is that it introduces an intermediary, the canister responsible for data aggregation, which should NOT be considered trustworthy. + +Despite these concerns, the extended temporal coverage can be valuable for certain analytical purposes. Additionally, querying the node metrics canister is cheaper because it allows for a query call instead of an update call and does not require a wallet canister. + +This entire process is shown in the following diagram: + +```mermaid +%%{init: {'theme':'forest'}}%% +graph TD + subgraph "Subnet 1" + S1["Consensus"] -->|Produces Trustworthy Data| M1["Management Canister 1"] --> M4["Node Metrics Canister"] + end + subgraph "Subnet 2" + S2["Consensus"] -->|Produces Trustworthy Data| M2["Management Canister 2"] + end + subgraph "Subnet 3" + S3["Consensus"] -->|Produces Trustworthy Data| M3["Management Canister 3"] + end + M2 --> M4 + M3 --> M4 + M4 --> DRE["DRE tool (open source)"] + DRE --> User + User --> |Analyze & Process Data| F["Node Metrics"] + + + style S1 fill:#f9f,stroke:#333,stroke-width:2px + style S2 fill:#f9f,stroke:#333,stroke-width:2px + style S3 fill:#f9f,stroke:#333,stroke-width:2px + style DRE fill:#ff9,stroke:#333,stroke-width:2px + style F fill:#9ff,stroke:#333,stroke-width:2px +``` + +### Using the cli + +You can obtain the DRE tool by following the instructions from [getting started](../getting-started.md) + +To test out the command you can run the following command + +```bash +dre node-metrics from-node-metrics-canister [...] +``` + +??? tip "Explanation of the arguments" + 3. `start-at-timestamp` - used for filtering the output. To get all metrics, provide 0 + 4. `subnet-id` - subnets to query, if empty will provide metrics for all subnets + +# Example use + +Here are some real-world examples of how metrics can be retrieved: + +```bash +dre node-metrics from-node-metrics-canister 0 > data.json +``` From 1a2abff306051401a9cdcd2cec7898875ef5d58c Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 16:29:07 +0200 Subject: [PATCH 04/10] Change docs and refactor from --- .../trustworthy-metrics.md | 6 +- docs/trustworthy-metrics/untrusted-metrics.md | 4 +- ...t_management.rs => management_canister.rs} | 4 +- ...om_node_metrics.rs => metrics_canister.rs} | 4 +- rs/cli/src/commands/node_metrics/mod.rs | 56 +++++++++++++++---- 5 files changed, 53 insertions(+), 21 deletions(-) rename rs/cli/src/commands/node_metrics/{from_subnet_management.rs => management_canister.rs} (96%) rename rs/cli/src/commands/node_metrics/{from_node_metrics.rs => metrics_canister.rs} (97%) diff --git a/docs/trustworthy-metrics/trustworthy-metrics.md b/docs/trustworthy-metrics/trustworthy-metrics.md index f8c9c0759..762d01803 100644 --- a/docs/trustworthy-metrics/trustworthy-metrics.md +++ b/docs/trustworthy-metrics/trustworthy-metrics.md @@ -92,7 +92,7 @@ You can obtain the DRE tool by following the instructions from [getting started] To test out the command you can run the following command ```bash -dre node-metrics from-subnet-management-canister [...] +dre node-metrics from management-canister [...] ``` ??? tip "Explanation of the arguments" @@ -149,12 +149,12 @@ Authentication with a private key is recommended, since it allows for more paral Here are some real-world examples of how metrics can be retrieved: ```bash -dre --private-key-pem identity.pem node-metrics from-subnet-management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --private-key-pem identity.pem node-metrics from management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` Or with an HSM: ```bash -dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" node-metrics from-subnet-management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" node-metrics from management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` You can check some examples of the analytics possible with the IC Mainnet data in the following [Jupyter Notebook](./TrustworthyMetricsAnalytics.ipynb) diff --git a/docs/trustworthy-metrics/untrusted-metrics.md b/docs/trustworthy-metrics/untrusted-metrics.md index 14cc2ff7a..431bcf148 100644 --- a/docs/trustworthy-metrics/untrusted-metrics.md +++ b/docs/trustworthy-metrics/untrusted-metrics.md @@ -46,7 +46,7 @@ You can obtain the DRE tool by following the instructions from [getting started] To test out the command you can run the following command ```bash -dre node-metrics from-node-metrics-canister [...] +dre node-metrics from metrics-canister [...] ``` ??? tip "Explanation of the arguments" @@ -58,5 +58,5 @@ dre node-metrics from-node-metrics-canister [... Here are some real-world examples of how metrics can be retrieved: ```bash -dre node-metrics from-node-metrics-canister 0 > data.json +dre node-metrics from metrics-canister 0 > data.json ``` diff --git a/rs/cli/src/commands/node_metrics/from_subnet_management.rs b/rs/cli/src/commands/node_metrics/management_canister.rs similarity index 96% rename from rs/cli/src/commands/node_metrics/from_subnet_management.rs rename to rs/cli/src/commands/node_metrics/management_canister.rs index 641fbe54d..7a8920c7f 100644 --- a/rs/cli/src/commands/node_metrics/from_subnet_management.rs +++ b/rs/cli/src/commands/node_metrics/management_canister.rs @@ -12,7 +12,7 @@ use log::{info, warn}; use super::{ExecutableCommand, IcAdminRequirement}; #[derive(Debug, Args)] -pub struct FromSubnetManagement { +pub struct ManagementCanister { /// Wallet that should be used to query node metrics history /// in form of canister id pub wallet: String, @@ -25,7 +25,7 @@ pub struct FromSubnetManagement { pub subnet_ids: Vec, } -impl ExecutableCommand for FromSubnetManagement { +impl ExecutableCommand for ManagementCanister { fn require_ic_admin(&self) -> IcAdminRequirement { IcAdminRequirement::Detect } diff --git a/rs/cli/src/commands/node_metrics/from_node_metrics.rs b/rs/cli/src/commands/node_metrics/metrics_canister.rs similarity index 97% rename from rs/cli/src/commands/node_metrics/from_node_metrics.rs rename to rs/cli/src/commands/node_metrics/metrics_canister.rs index 4ac3d3d4f..548953a91 100644 --- a/rs/cli/src/commands/node_metrics/from_node_metrics.rs +++ b/rs/cli/src/commands/node_metrics/metrics_canister.rs @@ -16,7 +16,7 @@ use log::info; use super::{ExecutableCommand, IcAdminRequirement}; #[derive(Debug, Args)] -pub struct FromNodeMetrics { +pub struct MetricsCanister { /// Start at timestamp in nanoseconds, if 0 it will dump daily metrics /// since May 18, 2024 pub start_at_timestamp: u64, @@ -26,7 +26,7 @@ pub struct FromNodeMetrics { pub subnet_ids: Vec, } -impl ExecutableCommand for FromNodeMetrics { +impl ExecutableCommand for MetricsCanister { fn require_ic_admin(&self) -> IcAdminRequirement { IcAdminRequirement::Detect } diff --git a/rs/cli/src/commands/node_metrics/mod.rs b/rs/cli/src/commands/node_metrics/mod.rs index 1c948521a..531ca5568 100644 --- a/rs/cli/src/commands/node_metrics/mod.rs +++ b/rs/cli/src/commands/node_metrics/mod.rs @@ -1,10 +1,10 @@ use super::{ExecutableCommand, IcAdminRequirement}; use clap::{Args, Subcommand}; -use from_node_metrics::FromNodeMetrics; -use from_subnet_management::FromSubnetManagement; +use management_canister::ManagementCanister; +use metrics_canister::MetricsCanister; -mod from_node_metrics; -mod from_subnet_management; +mod management_canister; +mod metrics_canister; #[derive(Args, Debug)] pub struct NodeMetricsCmd { @@ -14,29 +14,61 @@ pub struct NodeMetricsCmd { #[derive(Subcommand, Debug)] pub enum NodeMetricsCommand { - FromSubnetManagementCanister(FromSubnetManagement), - FromNodeMetricsCanister(FromNodeMetrics), + From(FromNodeMetricsCmd), } impl ExecutableCommand for NodeMetricsCmd { fn require_ic_admin(&self) -> IcAdminRequirement { match &self.subcommand { - NodeMetricsCommand::FromNodeMetricsCanister(c) => c.require_ic_admin(), - NodeMetricsCommand::FromSubnetManagementCanister(c) => c.require_ic_admin(), + NodeMetricsCommand::From(c) => c.require_ic_admin(), } } async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { match &self.subcommand { - NodeMetricsCommand::FromNodeMetricsCanister(c) => c.execute(ctx).await, - NodeMetricsCommand::FromSubnetManagementCanister(c) => c.execute(ctx).await, + NodeMetricsCommand::From(c) => c.execute(ctx).await, } } fn validate(&self, cmd: &mut clap::Command) { match &self.subcommand { - NodeMetricsCommand::FromNodeMetricsCanister(c) => c.validate(cmd), - NodeMetricsCommand::FromSubnetManagementCanister(c) => c.validate(cmd), + NodeMetricsCommand::From(c) => c.validate(cmd), + } + } +} + + +#[derive(Args, Debug)] +pub struct FromNodeMetricsCmd { + #[clap(subcommand)] + pub subcommand: FromNodeMetricsCommand, +} + +#[derive(Subcommand, Debug)] +pub enum FromNodeMetricsCommand { + ManagementCanister(ManagementCanister), + MetricsCanister(MetricsCanister), +} + +impl ExecutableCommand for FromNodeMetricsCmd { + fn require_ic_admin(&self) -> IcAdminRequirement { + match &self.subcommand { + FromNodeMetricsCommand::MetricsCanister(c) => c.require_ic_admin(), + FromNodeMetricsCommand::ManagementCanister(c) => c.require_ic_admin(), + } + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + match &self.subcommand { + FromNodeMetricsCommand::MetricsCanister(c) => c.execute(ctx).await, + FromNodeMetricsCommand::ManagementCanister(c) => c.execute(ctx).await, + } + } + + fn validate(&self, cmd: &mut clap::Command) { + match &self.subcommand { + FromNodeMetricsCommand::MetricsCanister(c) => c.validate(cmd), + FromNodeMetricsCommand::ManagementCanister(c) => c.validate(cmd), } } } From c718bf2c5c8c92b66ce7e2ec6ff59af6957ea598 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 16:31:34 +0200 Subject: [PATCH 05/10] Run rustfmt --- rs/cli/src/commands/node_metrics/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rs/cli/src/commands/node_metrics/mod.rs b/rs/cli/src/commands/node_metrics/mod.rs index 531ca5568..424428c41 100644 --- a/rs/cli/src/commands/node_metrics/mod.rs +++ b/rs/cli/src/commands/node_metrics/mod.rs @@ -37,7 +37,6 @@ impl ExecutableCommand for NodeMetricsCmd { } } - #[derive(Args, Debug)] pub struct FromNodeMetricsCmd { #[clap(subcommand)] From 03abbbdb1b67f4812f2918930a286a1bf5102a56 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 16:41:00 +0200 Subject: [PATCH 06/10] Add node metrics canister lib --- .../src/trustworthy-node-metrics/BUILD.bazel | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel index e69de29bb..361cc1626 100644 --- a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel +++ b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel @@ -0,0 +1,15 @@ +load("@crate_index_dre//:defs.bzl", "aliases", "all_crate_deps") + +package(default_visibility = ["//visibility:public"]) + +rust_library( + name = "trustworthy_node_metrics", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + proc_macro_deps = all_crate_deps( + proc_macro = True, + ), + deps = all_crate_deps( + normal = True, + ), +) From 4fa0c75c3c54a6cc7d9c71fe97bc67c9a94229a0 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 17 Jul 2024 16:52:53 +0200 Subject: [PATCH 07/10] Include canister lib --- .../src/trustworthy-node-metrics/BUILD.bazel | 1 + rs/ic-canisters/BUILD.bazel | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel index 361cc1626..54894d6df 100644 --- a/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel +++ b/rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics/BUILD.bazel @@ -1,4 +1,5 @@ load("@crate_index_dre//:defs.bzl", "aliases", "all_crate_deps") +load("@rules_rust//rust:defs.bzl", "rust_library") package(default_visibility = ["//visibility:public"]) diff --git a/rs/ic-canisters/BUILD.bazel b/rs/ic-canisters/BUILD.bazel index 2148d4d67..28ec9d55b 100644 --- a/rs/ic-canisters/BUILD.bazel +++ b/rs/ic-canisters/BUILD.bazel @@ -3,7 +3,9 @@ load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") package(default_visibility = ["//visibility:public"]) -DEPS = [] +DEPS = [ + "//rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics:trustworthy_node_metrics" +] rust_library( name = "ic-canisters", From d7d90b09d280ac787ae8ad13003ca871afc7cd50 Mon Sep 17 00:00:00 2001 From: Pietro Date: Thu, 18 Jul 2024 12:10:36 +0200 Subject: [PATCH 08/10] Deploy trustworthy metrics --- rs/cli/BUILD.bazel | 1 + rs/cli/src/commands/mod.rs | 6 +- rs/cli/src/commands/node_metrics.rs | 161 ++++++++++++++++++ .../commands/node_metrics/metrics_canister.rs | 94 ---------- .../management_canister.rs | 0 .../node_metrics_old/metrics_canister.rs | 49 ++++++ .../{node_metrics => node_metrics_old}/mod.rs | 0 rs/ic-canisters/BUILD.bazel | 1 - rs/ic-canisters/src/management.rs | 10 ++ 9 files changed, 224 insertions(+), 98 deletions(-) create mode 100644 rs/cli/src/commands/node_metrics.rs delete mode 100644 rs/cli/src/commands/node_metrics/metrics_canister.rs rename rs/cli/src/commands/{node_metrics => node_metrics_old}/management_canister.rs (100%) create mode 100644 rs/cli/src/commands/node_metrics_old/metrics_canister.rs rename rs/cli/src/commands/{node_metrics => node_metrics_old}/mod.rs (100%) diff --git a/rs/cli/BUILD.bazel b/rs/cli/BUILD.bazel index 7a02565b1..1f7494bcd 100644 --- a/rs/cli/BUILD.bazel +++ b/rs/cli/BUILD.bazel @@ -7,6 +7,7 @@ DEPS = [ "//rs/decentralization", "//rs/ic-management-types", "//rs/ic-management-backend:ic-management-backend-lib", + "//rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics:trustworthy_node_metrics" ] package(default_visibility = ["//visibility:public"]) diff --git a/rs/cli/src/commands/mod.rs b/rs/cli/src/commands/mod.rs index f57308f1c..7df2b8d7b 100644 --- a/rs/cli/src/commands/mod.rs +++ b/rs/cli/src/commands/mod.rs @@ -10,7 +10,7 @@ use get::Get; use heal::Heal; use hostos::HostOsCmd; use ic_management_types::{MinNakamotoCoefficients, Network, NodeFeature}; -use node_metrics::NodeMetricsCmd; +use node_metrics::NodeMetrics; use nodes::Nodes; use proposals::Proposals; use propose::Propose; @@ -31,7 +31,6 @@ mod firewall; mod get; mod heal; pub mod hostos; -mod node_metrics; mod nodes; mod proposals; mod propose; @@ -42,6 +41,7 @@ mod update_unassigned_nodes; pub mod upgrade; mod version; mod vote; +mod node_metrics; #[derive(Parser, Debug)] #[clap(version = env!("CARGO_PKG_VERSION"), about, author)] @@ -126,7 +126,7 @@ pub enum Subcommands { Version(VersionCmd), /// Fetch node metrics - NodeMetrics(NodeMetricsCmd), + NodeMetrics(NodeMetrics), /// Manage hostos versions HostOs(HostOsCmd), diff --git a/rs/cli/src/commands/node_metrics.rs b/rs/cli/src/commands/node_metrics.rs new file mode 100644 index 000000000..2e4b98ece --- /dev/null +++ b/rs/cli/src/commands/node_metrics.rs @@ -0,0 +1,161 @@ +use std::{ + collections::{btree_map::Entry, BTreeMap}, + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::Ok; +use clap::{error::ErrorKind, Args}; +use ic_canisters::{ + management::{NodeMetricsHistoryResponse, WalletCanisterWrapper}, + node_metrics::NodeMetricsCanisterWrapper, + registry::RegistryCanisterWrapper, +}; +use ic_types::{CanisterId, PrincipalId}; +use itertools::Itertools; +use log::{info, warn}; + +use super::{ExecutableCommand, IcAdminRequirement}; + +type CLINodeMetrics = BTreeMap>; + +#[derive(Args, Debug)] +pub struct NodeMetrics { + + /// If specified trustworthy node metrics history will be fetched from the IC. + /// If not untrusted node metrics will be fetched from node metrics canister + #[clap(long, global = true)] + pub trustworthy: bool, + + /// Wallet that should be used to query trustworthy node metrics history + /// in form of canister id + #[clap(long)] + pub wallet: Option, + + /// Start at timestamp in nanoseconds + pub start_at_timestamp: u64, + + /// Vector of subnets to query, if empty will dump metrics for + /// all subnets + pub subnet_ids: Vec, +} + +impl NodeMetrics { + async fn get_trustworthy_metrics(&self, canister_agent: ic_canisters::IcAgentCanisterClient) -> anyhow::Result { + let mut metrics_by_subnet = BTreeMap::new(); + let wallet: CanisterId = CanisterId::from_str(self.wallet.as_ref().unwrap().as_str())?; + let wallet_client = Arc::new(WalletCanisterWrapper::new(canister_agent.agent.clone())); + + let subnets = match &self.subnet_ids.is_empty() { + false => self.subnet_ids.clone(), + true => { + let registry_client = RegistryCanisterWrapper::new(canister_agent.agent); + registry_client.get_subnets().await? + } + }; + + let handles = subnets + .into_iter() + .map(|s| (s, wallet_client.clone(), self.start_at_timestamp)) + .map(|(s, w, start)| { + info!("Spawning thread for subnet: {}", s); + tokio::spawn(async move { (s, w.get_node_metrics_history(wallet, start, s).await) }) + }); + + info!("Running in parallel mode"); + + for handle in handles { + let (subnet, maybe_metrics) = handle.await?; + match maybe_metrics { + Result::Ok(m) => { + info!("Received metrics for subnet: {}", subnet); + metrics_by_subnet.insert(subnet, m); + } + Err(e) => { + warn!("Couldn't fetch trustworthy metrics for subnet {}: {}", subnet, e); + } + }; + } + + Ok(metrics_by_subnet) + } + + async fn get_untrusted_metrics(&self, canister_agent: ic_canisters::IcAgentCanisterClient) -> anyhow::Result { + let mut metrics_by_subnet = BTreeMap::new(); + let metrics_client = NodeMetricsCanisterWrapper::new(canister_agent.agent); + + let node_metrics_response = match &self.subnet_ids.is_empty() { + true => metrics_client.get_node_metrics(None, Some(self.start_at_timestamp)).await?, + false => { + let subnets = self.subnet_ids.clone(); + let metrics_client_ref = &metrics_client; + + futures::future::try_join_all( + subnets + .into_iter() + .map(|subnet| async move { metrics_client_ref.get_node_metrics(Some(subnet), Some(self.start_at_timestamp)).await }), + ) + .await? + .into_iter() + .flatten() + .collect_vec() + } + }; + + for metrics in node_metrics_response { + let subnet = PrincipalId::from(metrics.subnet_id); + + let management_metrics = metrics.node_metrics.into_iter().map(|m| m.into()).collect_vec(); + + let management_metrics_history = NodeMetricsHistoryResponse { + timestamp_nanos: metrics.ts, + node_metrics: management_metrics, + }; + + match metrics_by_subnet.entry(subnet) { + Entry::Occupied(mut entry) => { + let v: &mut Vec = entry.get_mut(); + v.push(management_metrics_history) + } + Entry::Vacant(entry) => { + entry.insert(vec![management_metrics_history]); + } + } + } + metrics_by_subnet.values_mut().for_each(|f| f.sort_by_key(|k| k.timestamp_nanos)); + + Ok(metrics_by_subnet) + } +} + +impl ExecutableCommand for NodeMetrics { + fn require_ic_admin(&self) -> IcAdminRequirement { + IcAdminRequirement::Detect + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + let lock = Mutex::new(()); + let canister_agent: ic_canisters::IcAgentCanisterClient = ctx.create_ic_agent_canister_client(Some(lock))?; + info!("Started action..."); + + let metrics_by_subnet = if self.trustworthy { + self.get_trustworthy_metrics(canister_agent).await + } else { + self.get_untrusted_metrics(canister_agent).await + }?; + + println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); + + Ok(()) + } + + fn validate(&self, cmd: &mut clap::Command) { + if self.trustworthy && self.wallet.is_none() { + cmd.error( + ErrorKind::MissingRequiredArgument, + "Wallet is required for fetching trustworthy metrics.", + ) + .exit(); + } + } +} diff --git a/rs/cli/src/commands/node_metrics/metrics_canister.rs b/rs/cli/src/commands/node_metrics/metrics_canister.rs deleted file mode 100644 index 548953a91..000000000 --- a/rs/cli/src/commands/node_metrics/metrics_canister.rs +++ /dev/null @@ -1,94 +0,0 @@ -use std::{ - collections::{btree_map::Entry, BTreeMap}, - sync::Mutex, -}; - -use clap::Args; -use futures::future::try_join_all; -use ic_canisters::{ - management::{NodeMetrics, NodeMetricsHistoryResponse}, - node_metrics::NodeMetricsCanisterWrapper, -}; -use ic_types::PrincipalId; -use itertools::Itertools; -use log::info; - -use super::{ExecutableCommand, IcAdminRequirement}; - -#[derive(Debug, Args)] -pub struct MetricsCanister { - /// Start at timestamp in nanoseconds, if 0 it will dump daily metrics - /// since May 18, 2024 - pub start_at_timestamp: u64, - - /// Vector of subnets to query, if empty will dump metrics for - /// all subnets - pub subnet_ids: Vec, -} - -impl ExecutableCommand for MetricsCanister { - fn require_ic_admin(&self) -> IcAdminRequirement { - IcAdminRequirement::Detect - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - let lock = Mutex::new(()); - let mut metrics_by_subnet = BTreeMap::new(); - - let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; - info!("Started action..."); - - let metrics_client = NodeMetricsCanisterWrapper::new(canister_agent.agent); - - let node_metrics_response = match &self.subnet_ids.is_empty() { - true => metrics_client.get_node_metrics(None, Some(self.start_at_timestamp)).await?, - false => { - let subnets = self.subnet_ids.clone(); - let metrics_client_ref = &metrics_client; - - try_join_all( - subnets - .into_iter() - .map(|subnet| async move { metrics_client_ref.get_node_metrics(Some(subnet), Some(self.start_at_timestamp)).await }), - ) - .await? - .into_iter() - .flatten() - .collect_vec() - } - }; - - for metrics in node_metrics_response { - let node_metrics_history = NodeMetricsHistoryResponse { - timestamp_nanos: metrics.ts, - node_metrics: metrics - .node_metrics - .into_iter() - .map(|m| NodeMetrics { - node_id: PrincipalId::from(m.node_id), - num_block_failures_total: m.num_block_failures_total, - num_blocks_proposed_total: m.num_blocks_proposed_total, - }) - .collect_vec(), - }; - - match metrics_by_subnet.entry(metrics.subnet_id) { - Entry::Occupied(mut entry) => { - let v: &mut Vec = entry.get_mut(); - v.push(node_metrics_history) - } - Entry::Vacant(entry) => { - entry.insert(vec![node_metrics_history]); - } - } - } - - metrics_by_subnet.values_mut().for_each(|f| f.sort_by_key(|k| k.timestamp_nanos)); - - println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); - - Ok(()) - } - - fn validate(&self, _cmd: &mut clap::Command) {} -} diff --git a/rs/cli/src/commands/node_metrics/management_canister.rs b/rs/cli/src/commands/node_metrics_old/management_canister.rs similarity index 100% rename from rs/cli/src/commands/node_metrics/management_canister.rs rename to rs/cli/src/commands/node_metrics_old/management_canister.rs diff --git a/rs/cli/src/commands/node_metrics_old/metrics_canister.rs b/rs/cli/src/commands/node_metrics_old/metrics_canister.rs new file mode 100644 index 000000000..20d8ce60e --- /dev/null +++ b/rs/cli/src/commands/node_metrics_old/metrics_canister.rs @@ -0,0 +1,49 @@ +use std::{ + collections::{btree_map::Entry, BTreeMap}, + sync::Mutex, +}; + +use clap::Args; +use futures::future::try_join_all; +use ic_canisters::{ + management::{NodeMetrics, NodeMetricsHistoryResponse}, + node_metrics::NodeMetricsCanisterWrapper, +}; +use ic_types::PrincipalId; +use itertools::Itertools; +use log::info; + +use super::{ExecutableCommand, IcAdminRequirement}; + +#[derive(Debug, Args)] +pub struct MetricsCanister { + /// Start at timestamp in nanoseconds, if 0 it will dump daily metrics + /// since May 18, 2024 + pub start_at_timestamp: u64, + + /// Vector of subnets to query, if empty will dump metrics for + /// all subnets + pub subnet_ids: Vec, +} + +impl ExecutableCommand for MetricsCanister { + fn require_ic_admin(&self) -> IcAdminRequirement { + IcAdminRequirement::Detect + } + + async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { + let lock = Mutex::new(()); + let mut metrics_by_subnet = BTreeMap::new(); + + let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; + info!("Started action..."); + + + + println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); + + Ok(()) + } + + fn validate(&self, _cmd: &mut clap::Command) {} +} diff --git a/rs/cli/src/commands/node_metrics/mod.rs b/rs/cli/src/commands/node_metrics_old/mod.rs similarity index 100% rename from rs/cli/src/commands/node_metrics/mod.rs rename to rs/cli/src/commands/node_metrics_old/mod.rs diff --git a/rs/ic-canisters/BUILD.bazel b/rs/ic-canisters/BUILD.bazel index 28ec9d55b..035c33af7 100644 --- a/rs/ic-canisters/BUILD.bazel +++ b/rs/ic-canisters/BUILD.bazel @@ -4,7 +4,6 @@ load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") package(default_visibility = ["//visibility:public"]) DEPS = [ - "//rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics:trustworthy_node_metrics" ] rust_library( diff --git a/rs/ic-canisters/src/management.rs b/rs/ic-canisters/src/management.rs index d62ccd391..5cda35f52 100644 --- a/rs/ic-canisters/src/management.rs +++ b/rs/ic-canisters/src/management.rs @@ -86,6 +86,16 @@ pub struct NodeMetrics { pub num_block_failures_total: u64, } +impl From for NodeMetrics{ + fn from(value: trustworthy_node_metrics::types::NodeMetrics) -> Self { + Self { + node_id: PrincipalId::from(value.node_id), + num_block_failures_total: value.num_block_failures_total, + num_blocks_proposed_total: value.num_blocks_proposed_total + } + } +} + #[derive(Default, CandidType, Deserialize, Clone, Debug, Serialize)] pub struct NodeMetricsHistoryResponse { pub timestamp_nanos: u64, From c84e78ba67eebc87a758f48268fd9c62d094eb83 Mon Sep 17 00:00:00 2001 From: Pietro Date: Thu, 18 Jul 2024 15:55:14 +0200 Subject: [PATCH 09/10] Update docs --- .../trustworthy-metrics.md | 6 +- docs/trustworthy-metrics/untrusted-metrics.md | 4 +- rs/cli/src/commands/mod.rs | 10 +-- .../node_metrics_old/management_canister.rs | 79 ------------------- .../node_metrics_old/metrics_canister.rs | 49 ------------ rs/cli/src/commands/node_metrics_old/mod.rs | 73 ----------------- rs/cli/src/commands/trustworthy_metrics.rs | 79 ------------------- 7 files changed, 6 insertions(+), 294 deletions(-) delete mode 100644 rs/cli/src/commands/node_metrics_old/management_canister.rs delete mode 100644 rs/cli/src/commands/node_metrics_old/metrics_canister.rs delete mode 100644 rs/cli/src/commands/node_metrics_old/mod.rs delete mode 100644 rs/cli/src/commands/trustworthy_metrics.rs diff --git a/docs/trustworthy-metrics/trustworthy-metrics.md b/docs/trustworthy-metrics/trustworthy-metrics.md index 762d01803..ca243aaff 100644 --- a/docs/trustworthy-metrics/trustworthy-metrics.md +++ b/docs/trustworthy-metrics/trustworthy-metrics.md @@ -92,7 +92,7 @@ You can obtain the DRE tool by following the instructions from [getting started] To test out the command you can run the following command ```bash -dre node-metrics from management-canister [...] +dre node-metrics --trustworthy --wallet [...] ``` ??? tip "Explanation of the arguments" @@ -149,12 +149,12 @@ Authentication with a private key is recommended, since it allows for more paral Here are some real-world examples of how metrics can be retrieved: ```bash -dre --private-key-pem identity.pem node-metrics from management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --private-key-pem identity.pem node-metrics --trustworthy --wallet nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` Or with an HSM: ```bash -dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" node-metrics from management-canister nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json +dre --hsm-slot 0 --hsm-key-id 0 --hsm-pin "" node-metrics --trustworthy --wallet nanx4-baaaa-aaaap-qb4sq-cai 0 > data.json ``` You can check some examples of the analytics possible with the IC Mainnet data in the following [Jupyter Notebook](./TrustworthyMetricsAnalytics.ipynb) diff --git a/docs/trustworthy-metrics/untrusted-metrics.md b/docs/trustworthy-metrics/untrusted-metrics.md index 431bcf148..ab619014c 100644 --- a/docs/trustworthy-metrics/untrusted-metrics.md +++ b/docs/trustworthy-metrics/untrusted-metrics.md @@ -46,7 +46,7 @@ You can obtain the DRE tool by following the instructions from [getting started] To test out the command you can run the following command ```bash -dre node-metrics from metrics-canister [...] +dre node-metrics [...] ``` ??? tip "Explanation of the arguments" @@ -58,5 +58,5 @@ dre node-metrics from metrics-canister [...] Here are some real-world examples of how metrics can be retrieved: ```bash -dre node-metrics from metrics-canister 0 > data.json +dre node-metrics 0 > data.json ``` diff --git a/rs/cli/src/commands/mod.rs b/rs/cli/src/commands/mod.rs index 7df2b8d7b..a515216bd 100644 --- a/rs/cli/src/commands/mod.rs +++ b/rs/cli/src/commands/mod.rs @@ -15,7 +15,6 @@ use nodes::Nodes; use proposals::Proposals; use propose::Propose; use registry::Registry; -use trustworthy_metrics::TrustworthyMetrics; use update_unassigned_nodes::UpdateUnassignedNodes; use upgrade::Upgrade; use url::Url; @@ -36,7 +35,6 @@ mod proposals; mod propose; mod registry; mod subnet; -mod trustworthy_metrics; mod update_unassigned_nodes; pub mod upgrade; mod version; @@ -125,7 +123,7 @@ pub enum Subcommands { /// Manage versions Version(VersionCmd), - /// Fetch node metrics + /// Fetch node metrics stats NodeMetrics(NodeMetrics), /// Manage hostos versions @@ -140,9 +138,6 @@ pub enum Subcommands { /// Vote on our proposals Vote(Vote), - /// Trustworthy Metrics - TrustworthyMetrics(TrustworthyMetrics), - /// Registry inspection (dump) operations Registry(Registry), @@ -256,7 +251,6 @@ impl ExecutableCommand for Args { Subcommands::Nodes(c) => c.require_ic_admin(), Subcommands::ApiBoundaryNodes(c) => c.require_ic_admin(), Subcommands::Vote(c) => c.require_ic_admin(), - Subcommands::TrustworthyMetrics(c) => c.require_ic_admin(), Subcommands::Registry(c) => c.require_ic_admin(), Subcommands::Firewall(c) => c.require_ic_admin(), Subcommands::Upgrade(c) => c.require_ic_admin(), @@ -279,7 +273,6 @@ impl ExecutableCommand for Args { Subcommands::Nodes(c) => c.execute(ctx).await, Subcommands::ApiBoundaryNodes(c) => c.execute(ctx).await, Subcommands::Vote(c) => c.execute(ctx).await, - Subcommands::TrustworthyMetrics(c) => c.execute(ctx).await, Subcommands::Registry(c) => c.execute(ctx).await, Subcommands::Firewall(c) => c.execute(ctx).await, Subcommands::Upgrade(c) => c.execute(ctx).await, @@ -302,7 +295,6 @@ impl ExecutableCommand for Args { Subcommands::Nodes(c) => c.validate(cmd), Subcommands::ApiBoundaryNodes(c) => c.validate(cmd), Subcommands::Vote(c) => c.validate(cmd), - Subcommands::TrustworthyMetrics(c) => c.validate(cmd), Subcommands::Registry(c) => c.validate(cmd), Subcommands::Firewall(c) => c.validate(cmd), Subcommands::Upgrade(c) => c.validate(cmd), diff --git a/rs/cli/src/commands/node_metrics_old/management_canister.rs b/rs/cli/src/commands/node_metrics_old/management_canister.rs deleted file mode 100644 index 7a8920c7f..000000000 --- a/rs/cli/src/commands/node_metrics_old/management_canister.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::{ - collections::BTreeMap, - str::FromStr, - sync::{Arc, Mutex}, -}; - -use clap::Args; -use ic_canisters::{management::WalletCanisterWrapper, registry::RegistryCanisterWrapper}; -use ic_types::{CanisterId, PrincipalId}; -use log::{info, warn}; - -use super::{ExecutableCommand, IcAdminRequirement}; - -#[derive(Debug, Args)] -pub struct ManagementCanister { - /// Wallet that should be used to query node metrics history - /// in form of canister id - pub wallet: String, - - /// Start at timestamp in nanoseconds - pub start_at_timestamp: u64, - - /// Vector of subnets to query, if empty will dump metrics for - /// all subnets - pub subnet_ids: Vec, -} - -impl ExecutableCommand for ManagementCanister { - fn require_ic_admin(&self) -> IcAdminRequirement { - IcAdminRequirement::Detect - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - let lock = Mutex::new(()); - let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; - info!("Started action..."); - let wallet_client = Arc::new(WalletCanisterWrapper::new(canister_agent.agent.clone())); - - let subnets = match &self.subnet_ids.is_empty() { - false => self.subnet_ids.clone(), - true => { - let registry_client = RegistryCanisterWrapper::new(canister_agent.agent); - registry_client.get_subnets().await? - } - }; - - let mut metrics_by_subnet = BTreeMap::new(); - info!("Running in parallel mode"); - - let wallet: CanisterId = CanisterId::from_str(&self.wallet)?; - - let handles = subnets - .into_iter() - .map(|s| (s, wallet_client.clone(), self.start_at_timestamp)) - .map(|(s, w, start)| { - info!("Spawning thread for subnet: {}", s); - tokio::spawn(async move { (s, w.get_node_metrics_history(wallet, start, s).await) }) - }); - - for handle in handles { - let (subnet, maybe_metrics) = handle.await?; - match maybe_metrics { - Ok(m) => { - info!("Received metrics for subnet: {}", subnet); - metrics_by_subnet.insert(subnet, m); - } - Err(e) => { - warn!("Couldn't fetch trustworthy metrics for subnet {}: {}", subnet, e); - } - }; - } - - println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); - - Ok(()) - } - - fn validate(&self, _cmd: &mut clap::Command) {} -} diff --git a/rs/cli/src/commands/node_metrics_old/metrics_canister.rs b/rs/cli/src/commands/node_metrics_old/metrics_canister.rs deleted file mode 100644 index 20d8ce60e..000000000 --- a/rs/cli/src/commands/node_metrics_old/metrics_canister.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::{ - collections::{btree_map::Entry, BTreeMap}, - sync::Mutex, -}; - -use clap::Args; -use futures::future::try_join_all; -use ic_canisters::{ - management::{NodeMetrics, NodeMetricsHistoryResponse}, - node_metrics::NodeMetricsCanisterWrapper, -}; -use ic_types::PrincipalId; -use itertools::Itertools; -use log::info; - -use super::{ExecutableCommand, IcAdminRequirement}; - -#[derive(Debug, Args)] -pub struct MetricsCanister { - /// Start at timestamp in nanoseconds, if 0 it will dump daily metrics - /// since May 18, 2024 - pub start_at_timestamp: u64, - - /// Vector of subnets to query, if empty will dump metrics for - /// all subnets - pub subnet_ids: Vec, -} - -impl ExecutableCommand for MetricsCanister { - fn require_ic_admin(&self) -> IcAdminRequirement { - IcAdminRequirement::Detect - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - let lock = Mutex::new(()); - let mut metrics_by_subnet = BTreeMap::new(); - - let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; - info!("Started action..."); - - - - println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); - - Ok(()) - } - - fn validate(&self, _cmd: &mut clap::Command) {} -} diff --git a/rs/cli/src/commands/node_metrics_old/mod.rs b/rs/cli/src/commands/node_metrics_old/mod.rs deleted file mode 100644 index 424428c41..000000000 --- a/rs/cli/src/commands/node_metrics_old/mod.rs +++ /dev/null @@ -1,73 +0,0 @@ -use super::{ExecutableCommand, IcAdminRequirement}; -use clap::{Args, Subcommand}; -use management_canister::ManagementCanister; -use metrics_canister::MetricsCanister; - -mod management_canister; -mod metrics_canister; - -#[derive(Args, Debug)] -pub struct NodeMetricsCmd { - #[clap(subcommand)] - pub subcommand: NodeMetricsCommand, -} - -#[derive(Subcommand, Debug)] -pub enum NodeMetricsCommand { - From(FromNodeMetricsCmd), -} - -impl ExecutableCommand for NodeMetricsCmd { - fn require_ic_admin(&self) -> IcAdminRequirement { - match &self.subcommand { - NodeMetricsCommand::From(c) => c.require_ic_admin(), - } - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - match &self.subcommand { - NodeMetricsCommand::From(c) => c.execute(ctx).await, - } - } - - fn validate(&self, cmd: &mut clap::Command) { - match &self.subcommand { - NodeMetricsCommand::From(c) => c.validate(cmd), - } - } -} - -#[derive(Args, Debug)] -pub struct FromNodeMetricsCmd { - #[clap(subcommand)] - pub subcommand: FromNodeMetricsCommand, -} - -#[derive(Subcommand, Debug)] -pub enum FromNodeMetricsCommand { - ManagementCanister(ManagementCanister), - MetricsCanister(MetricsCanister), -} - -impl ExecutableCommand for FromNodeMetricsCmd { - fn require_ic_admin(&self) -> IcAdminRequirement { - match &self.subcommand { - FromNodeMetricsCommand::MetricsCanister(c) => c.require_ic_admin(), - FromNodeMetricsCommand::ManagementCanister(c) => c.require_ic_admin(), - } - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - match &self.subcommand { - FromNodeMetricsCommand::MetricsCanister(c) => c.execute(ctx).await, - FromNodeMetricsCommand::ManagementCanister(c) => c.execute(ctx).await, - } - } - - fn validate(&self, cmd: &mut clap::Command) { - match &self.subcommand { - FromNodeMetricsCommand::MetricsCanister(c) => c.validate(cmd), - FromNodeMetricsCommand::ManagementCanister(c) => c.validate(cmd), - } - } -} diff --git a/rs/cli/src/commands/trustworthy_metrics.rs b/rs/cli/src/commands/trustworthy_metrics.rs deleted file mode 100644 index 7bf6ddf59..000000000 --- a/rs/cli/src/commands/trustworthy_metrics.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::{ - collections::BTreeMap, - str::FromStr, - sync::{Arc, Mutex}, -}; - -use clap::Args; -use ic_canisters::{management::WalletCanisterWrapper, registry::RegistryCanisterWrapper}; -use ic_types::{CanisterId, PrincipalId}; -use log::{info, warn}; - -use super::{ExecutableCommand, IcAdminRequirement}; - -#[derive(Args, Debug)] -pub struct TrustworthyMetrics { - /// Wallet that should be used to query node metrics history - /// in form of canister id - pub wallet: String, - - /// Start at timestamp in nanoseconds - pub start_at_timestamp: u64, - - /// Vector of subnets to query, if empty will dump metrics for - /// all subnets - pub subnet_ids: Vec, -} - -impl ExecutableCommand for TrustworthyMetrics { - fn require_ic_admin(&self) -> IcAdminRequirement { - IcAdminRequirement::Detect - } - - async fn execute(&self, ctx: crate::ctx::DreContext) -> anyhow::Result<()> { - let lock = Mutex::new(()); - let canister_agent = ctx.create_ic_agent_canister_client(Some(lock))?; - info!("Started action..."); - let wallet_client = Arc::new(WalletCanisterWrapper::new(canister_agent.agent.clone())); - - let subnets = match &self.subnet_ids.is_empty() { - false => self.subnet_ids.clone(), - true => { - let registry_client = RegistryCanisterWrapper::new(canister_agent.agent); - registry_client.get_subnets().await? - } - }; - - let mut metrics_by_subnet = BTreeMap::new(); - info!("Running in parallel mode"); - - let wallet: CanisterId = CanisterId::from_str(&self.wallet)?; - - let handles = subnets - .into_iter() - .map(|s| (s, wallet_client.clone(), self.start_at_timestamp)) - .map(|(s, w, start)| { - info!("Spawning thread for subnet: {}", s); - tokio::spawn(async move { (s, w.get_node_metrics_history(wallet, start, s).await) }) - }); - - for handle in handles { - let (subnet, maybe_metrics) = handle.await?; - match maybe_metrics { - Ok(m) => { - info!("Received metrics for subnet: {}", subnet); - metrics_by_subnet.insert(subnet, m); - } - Err(e) => { - warn!("Couldn't fetch trustworthy metrics for subnet {}: {}", subnet, e); - } - }; - } - - println!("{}", serde_json::to_string_pretty(&metrics_by_subnet)?); - - Ok(()) - } - - fn validate(&self, _cmd: &mut clap::Command) {} -} From 5c826e79b6ef7ffbc1f839e6ec6fec60dcc2c5bd Mon Sep 17 00:00:00 2001 From: Pietro Date: Thu, 18 Jul 2024 16:00:01 +0200 Subject: [PATCH 10/10] Run rustfmt --- rs/cli/src/commands/mod.rs | 2 +- rs/cli/src/commands/node_metrics.rs | 8 ++------ rs/ic-canisters/BUILD.bazel | 1 + rs/ic-canisters/src/management.rs | 4 ++-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/rs/cli/src/commands/mod.rs b/rs/cli/src/commands/mod.rs index a515216bd..a8b6ee1e9 100644 --- a/rs/cli/src/commands/mod.rs +++ b/rs/cli/src/commands/mod.rs @@ -30,6 +30,7 @@ mod firewall; mod get; mod heal; pub mod hostos; +mod node_metrics; mod nodes; mod proposals; mod propose; @@ -39,7 +40,6 @@ mod update_unassigned_nodes; pub mod upgrade; mod version; mod vote; -mod node_metrics; #[derive(Parser, Debug)] #[clap(version = env!("CARGO_PKG_VERSION"), about, author)] diff --git a/rs/cli/src/commands/node_metrics.rs b/rs/cli/src/commands/node_metrics.rs index 2e4b98ece..5d80b04df 100644 --- a/rs/cli/src/commands/node_metrics.rs +++ b/rs/cli/src/commands/node_metrics.rs @@ -21,7 +21,6 @@ type CLINodeMetrics = BTreeMap>; #[derive(Args, Debug)] pub struct NodeMetrics { - /// If specified trustworthy node metrics history will be fetched from the IC. /// If not untrusted node metrics will be fetched from node metrics canister #[clap(long, global = true)] @@ -151,11 +150,8 @@ impl ExecutableCommand for NodeMetrics { fn validate(&self, cmd: &mut clap::Command) { if self.trustworthy && self.wallet.is_none() { - cmd.error( - ErrorKind::MissingRequiredArgument, - "Wallet is required for fetching trustworthy metrics.", - ) - .exit(); + cmd.error(ErrorKind::MissingRequiredArgument, "Wallet is required for fetching trustworthy metrics.") + .exit(); } } } diff --git a/rs/ic-canisters/BUILD.bazel b/rs/ic-canisters/BUILD.bazel index 035c33af7..28ec9d55b 100644 --- a/rs/ic-canisters/BUILD.bazel +++ b/rs/ic-canisters/BUILD.bazel @@ -4,6 +4,7 @@ load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") package(default_visibility = ["//visibility:public"]) DEPS = [ + "//rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics:trustworthy_node_metrics" ] rust_library( diff --git a/rs/ic-canisters/src/management.rs b/rs/ic-canisters/src/management.rs index 5cda35f52..9d467746c 100644 --- a/rs/ic-canisters/src/management.rs +++ b/rs/ic-canisters/src/management.rs @@ -86,12 +86,12 @@ pub struct NodeMetrics { pub num_block_failures_total: u64, } -impl From for NodeMetrics{ +impl From for NodeMetrics { fn from(value: trustworthy_node_metrics::types::NodeMetrics) -> Self { Self { node_id: PrincipalId::from(value.node_id), num_block_failures_total: value.num_block_failures_total, - num_blocks_proposed_total: value.num_blocks_proposed_total + num_blocks_proposed_total: value.num_blocks_proposed_total, } } }