diff --git a/Cargo.lock b/Cargo.lock index 14cfb3e90b..573bee5f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3063,6 +3063,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2145869435ace5ea6ea3d35f59be559317ec9a0d04e1812d5f185a87b6d36f1a" +[[package]] +name = "metric_unify" +version = "0.1.0" +dependencies = [ + "clap", + "num-format", + "serde", + "serde_json", +] + [[package]] name = "metrics" version = "0.23.0" diff --git a/Cargo.toml b/Cargo.toml index 26ee96e07f..4a4d54dce2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ members = [ "extensions/sha256/transpiler", "extensions/sha256/guest", "extensions/sha256/tests", + "ci/scripts/metric_unify", ] exclude = ["crates/sdk/example"] resolver = "2" diff --git a/ci/scripts/metric_unify/Cargo.toml b/ci/scripts/metric_unify/Cargo.toml new file mode 100644 index 0000000000..c57d5b6e8f --- /dev/null +++ b/ci/scripts/metric_unify/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "metric_unify" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +clap = { version = "4.0", features = ["derive"] } +num-format = "0.4" diff --git a/ci/scripts/metric_unify/src/main.rs b/ci/scripts/metric_unify/src/main.rs new file mode 100644 index 0000000000..338c71c883 --- /dev/null +++ b/ci/scripts/metric_unify/src/main.rs @@ -0,0 +1,362 @@ +mod types; + +use std::collections::HashMap; + +use clap::Parser; +use serde::{Deserialize, Serialize}; + +use crate::types::{Labels, Metric, MetricDb, MetricsFile}; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Path to the metrics JSON file + #[arg(value_name = "METRICS_JSON")] + metrics_json: String, + + /// Path to the aggregation JSON file + #[arg(long, value_name = "AGGREGATION_JSON")] + aggregation_json: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct AggregationFile { + aggregations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct Aggregation { + name: String, + group_by: Vec, + metrics: Vec, + operation: String, +} + +impl MetricDb { + fn new(metrics_file: &str) -> Result> { + let file = std::fs::File::open(metrics_file)?; + let metrics: MetricsFile = serde_json::from_reader(file)?; + + let mut db = MetricDb::default(); + + // Process counters + for entry in metrics.counter { + if entry.value == 0.0 { + continue; + } + let labels = Labels::from(entry.labels); + db.add_to_flat_dict(labels, entry.metric, entry.value); + } + + // Process gauges + for entry in metrics.gauge { + let labels = Labels::from(entry.labels); + db.add_to_flat_dict(labels, entry.metric, entry.value); + } + + db.separate_by_label_types(); + + Ok(db) + } + + fn add_to_flat_dict(&mut self, labels: Labels, metric: String, value: f64) { + self.flat_dict + .entry(labels) + .or_default() + .push(Metric::new(metric, value)); + } + + // Custom sorting function that ensures 'group' comes first. + // Other keys are sorted alphabetically. + fn custom_sort_label_keys(label_keys: &mut [String]) { + // Prioritize 'group' by giving it the lowest possible sort value + label_keys.sort_by_key(|key| { + if key == "group" { + (0, key.clone()) // Lowest priority for 'group' + } else { + (1, key.clone()) // Normal priority for other keys + } + }); + } + + fn separate_by_label_types(&mut self) { + self.dict_by_label_types.clear(); + + for (labels, metrics) in &self.flat_dict { + // Get sorted label keys + let mut label_keys: Vec = labels.0.iter().map(|(key, _)| key.clone()).collect(); + Self::custom_sort_label_keys(&mut label_keys); + + // Create label_values based on sorted keys + let label_dict: HashMap = labels.0.iter().cloned().collect(); + + let label_values: Vec = label_keys + .iter() + .map(|key| label_dict.get(key).unwrap().clone()) + .collect(); + + // Add to dict_by_label_types + self.dict_by_label_types + .entry(label_keys) + .or_default() + .entry(label_values) + .or_default() + .extend(metrics.clone()); + } + } + + fn generate_markdown_tables(&self) -> String { + let mut markdown_output = String::new(); + // Get sorted keys to iterate in consistent order + let mut sorted_keys: Vec<_> = self.dict_by_label_types.keys().cloned().collect(); + sorted_keys.sort(); + + for label_keys in sorted_keys { + let metrics_dict = &self.dict_by_label_types[&label_keys]; + let mut metric_names: Vec = metrics_dict + .values() + .flat_map(|metrics| metrics.iter().map(|m| m.name.clone())) + .collect::>() + .into_iter() + .collect(); + metric_names.sort_by(|a, b| b.cmp(a)); + + // Create table header + let header = format!( + "| {} | {} |", + label_keys.join(" | "), + metric_names.join(" | ") + ); + + let separator = "| ".to_string() + + &vec!["---"; label_keys.len() + metric_names.len()].join(" | ") + + " |"; + + markdown_output.push_str(&header); + markdown_output.push('\n'); + markdown_output.push_str(&separator); + markdown_output.push('\n'); + + // Fill table rows + for (label_values, metrics) in metrics_dict { + let mut row = String::new(); + row.push_str("| "); + row.push_str(&label_values.join(" | ")); + row.push_str(" | "); + + // Add metric values + for metric_name in &metric_names { + let metric_value = metrics + .iter() + .find(|m| &m.name == metric_name) + .map(|m| Self::format_number(m.value)) + .unwrap_or_default(); + + row.push_str(&format!("{} | ", metric_value)); + } + + markdown_output.push_str(&row); + markdown_output.push('\n'); + } + + markdown_output.push('\n'); + } + + markdown_output + } + + fn generate_aggregation_tables(&self, aggregations: &[Aggregation]) -> String { + let mut markdown_output = String::new(); + let group_tuple = vec!["group".to_string()]; + + // Get metrics grouped by "group" label + if let Some(metrics_dict) = self.dict_by_label_types.get(&group_tuple) { + let mut group_to_metrics: HashMap> = HashMap::new(); + + // Collect metrics for each group + for (group_values, metrics) in metrics_dict { + let group_name = &group_values[0]; + let agg_metrics: Vec = metrics + .iter() + .filter(|metric| aggregations.iter().any(|a| a.name == metric.name)) + .cloned() + .collect(); + + if !agg_metrics.is_empty() { + group_to_metrics + .entry(group_name.clone()) + .or_default() + .extend(agg_metrics); + } + } + + if !group_to_metrics.is_empty() { + // Get all unique metric names + let mut metric_names: Vec = group_to_metrics + .values() + .flat_map(|metrics| metrics.iter().map(|m| m.name.clone())) + .collect::>() + .into_iter() + .collect(); + metric_names.sort(); + + // Create table header + let header = format!("| group | {} |", metric_names.join(" | ")); + let separator = + format!("| --- | {} |", vec!["---"; metric_names.len()].join(" | ")); + markdown_output.push_str(&header); + markdown_output.push('\n'); + markdown_output.push_str(&separator); + markdown_output.push('\n'); + + // Fill table rows + for (group_name, metrics) in group_to_metrics { + let mut row = format!("| {} |", group_name); + + for metric_name in &metric_names { + let metric_str = metrics + .iter() + .find(|m| &m.name == metric_name) + .map(|m| format!(" {} |", Self::format_number(m.value))) + .unwrap_or_default(); + + row.push_str(&metric_str); + } + + markdown_output.push_str(&row); + markdown_output.push('\n'); + } + markdown_output.push('\n'); + } + } + + markdown_output + } + + fn read_aggregations( + aggregation_file: &str, + ) -> Result, Box> { + let file = std::fs::File::open(aggregation_file)?; + let aggregation_data: AggregationFile = serde_json::from_reader(file)?; + Ok(aggregation_data.aggregations) + } + + fn apply_aggregations(&mut self, aggregations: &[Aggregation]) { + for aggregation in aggregations { + let mut group_by_dict: HashMap, f64> = HashMap::new(); + + if aggregation.operation == "sum" || aggregation.operation == "unique" { + for (tuple_keys, metrics_dict) in &self.dict_by_label_types { + // Skip if not all group_by keys are present in tuple_keys + if !aggregation + .group_by + .iter() + .all(|key| tuple_keys.contains(key)) + { + continue; + } + + for (tuple_values, metrics) in metrics_dict { + // Create a mapping from label keys to values + let label_dict: HashMap<_, _> = + tuple_keys.iter().zip(tuple_values.iter()).collect(); + + // Extract values for group_by keys + let group_by_values: Vec = aggregation + .group_by + .iter() + .map(|key| label_dict[key].clone()) + .collect(); + + // Process metrics + for metric in metrics { + if aggregation.metrics.contains(&metric.name) { + match aggregation.operation.as_str() { + "sum" => { + *group_by_dict + .entry(group_by_values.clone()) + .or_default() += metric.value; + } + "unique" => { + let entry = group_by_dict + .entry(group_by_values.clone()) + .or_default(); + if *entry != 0.0 && *entry != metric.value { + println!( + "[WARN] Overwriting {}: previous value = {}, new value = {}", + metric.name, entry, metric.value + ); + } + *entry = metric.value; + } + _ => unreachable!(), + } + } + } + } + } + + // Add aggregated metrics back to the database + for (group_by_values, agg_value) in group_by_dict { + let labels = Labels( + aggregation + .group_by + .iter() + .zip(group_by_values.iter()) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), + ); + + let metric = Metric::new(aggregation.name.clone(), agg_value); + + // Check if metric already exists + if let Some(metrics) = self.flat_dict.get_mut(&labels) { + if let Some(existing_metric) = + metrics.iter_mut().find(|m| m.name == aggregation.name) + { + if existing_metric.value != agg_value { + println!( + "[WARN] Overwriting {}: previous value = {}, new value = {}", + aggregation.name, existing_metric.value, agg_value + ); + } + existing_metric.value = agg_value; + } else { + metrics.push(metric); + } + } else { + self.flat_dict.insert(labels, vec![metric]); + } + } + } + } + + self.separate_by_label_types(); + } +} + +fn main() -> Result<(), Box> { + let args = Args::parse(); + let mut db = MetricDb::new(&args.metrics_json)?; + + let mut markdown_output = String::new(); + + if let Some(aggregation_file) = args.aggregation_json { + let aggregations = MetricDb::read_aggregations(&aggregation_file)?; + db.apply_aggregations(&aggregations); + + // Generate aggregation tables + let agg_tables = db.generate_aggregation_tables(&aggregations); + markdown_output.push_str(&agg_tables); + + // Add detailed metrics in a collapsible section + markdown_output.push_str("\n
\nDetailed Metrics\n\n"); + markdown_output.push_str(&db.generate_markdown_tables()); + markdown_output.push_str("
\n\n"); + } else { + markdown_output.push_str(&db.generate_markdown_tables()); + } + + println!("{}", markdown_output); + Ok(()) +} diff --git a/ci/scripts/metric_unify/src/types.rs b/ci/scripts/metric_unify/src/types.rs new file mode 100644 index 0000000000..9fe206b5dc --- /dev/null +++ b/ci/scripts/metric_unify/src/types.rs @@ -0,0 +1,87 @@ +use std::collections::HashMap; + +use num_format::{Locale, ToFormattedString}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct MetricsFile { + pub counter: Vec, + pub gauge: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetricEntry { + pub labels: Vec<[String; 2]>, + pub metric: String, + #[serde(deserialize_with = "deserialize_f64_from_string")] + pub value: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Metric { + pub name: String, + pub value: f64, +} + +impl Metric { + pub fn new(name: String, value: f64) -> Self { + Self { name, value } + } +} + +#[derive(Debug, Clone, Eq)] +pub struct Labels(pub Vec<(String, String)>); + +impl PartialEq for Labels { + fn eq(&self, other: &Self) -> bool { + if self.0.len() != other.0.len() { + return false; + } + let mut self_sorted = self.0.clone(); + let mut other_sorted = other.0.clone(); + self_sorted.sort(); + other_sorted.sort(); + self_sorted == other_sorted + } +} + +impl std::hash::Hash for Labels { + fn hash(&self, state: &mut H) { + let mut sorted = self.0.clone(); + sorted.sort(); + sorted.hash(state); + } +} + +impl From> for Labels { + fn from(v: Vec<[String; 2]>) -> Self { + Labels(v.into_iter().map(|[k, v]| (k, v)).collect()) + } +} + +#[derive(Debug, Default)] +pub struct MetricDb { + pub flat_dict: HashMap>, + pub dict_by_label_types: HashMap, HashMap, Vec>>, +} + +impl MetricDb { + pub fn format_number(value: f64) -> String { + let whole = value.trunc() as i64; + let decimal = (value.fract() * 100.0).abs().round() as i64; + + if decimal == 0 { + whole.to_formatted_string(&Locale::en) + } else { + format!("{}.{:02}", whole.to_formatted_string(&Locale::en), decimal) + } + } +} + +pub fn deserialize_f64_from_string<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + s.parse::().map_err(serde::de::Error::custom) +}