diff --git a/protocols/gossipsub/src/behaviour.rs b/protocols/gossipsub/src/behaviour.rs index 7fe18d9ed20a..fae45ed452eb 100644 --- a/protocols/gossipsub/src/behaviour.rs +++ b/protocols/gossipsub/src/behaviour.rs @@ -3078,20 +3078,16 @@ where } // Keep track of expired messages for the application layer. + let failed_messages = self.failed_messages.entry(propagation_source).or_default(); + failed_messages.timeout += 1; match rpc { RpcOut::Publish { .. } => { - self.failed_messages - .entry(propagation_source) - .or_default() - .publish += 1; + failed_messages.publish += 1; } RpcOut::Forward { .. } => { - self.failed_messages - .entry(propagation_source) - .or_default() - .forward += 1; + failed_messages.forward += 1; } - _ => {} // + _ => {} } // Record metrics on the failure. @@ -3099,9 +3095,11 @@ where match rpc { RpcOut::Publish { message, .. } => { metrics.publish_msg_dropped(&message.topic); + metrics.timeout_msg_dropped(&message.topic); } RpcOut::Forward { message, .. } => { metrics.forward_msg_dropped(&message.topic); + metrics.timeout_msg_dropped(&message.topic); } _ => {} } diff --git a/protocols/gossipsub/src/metrics.rs b/protocols/gossipsub/src/metrics.rs index 0c762fb0e606..40af1af2caca 100644 --- a/protocols/gossipsub/src/metrics.rs +++ b/protocols/gossipsub/src/metrics.rs @@ -131,6 +131,8 @@ pub(crate) struct Metrics { publish_messages_dropped: Family<TopicHash, Counter>, /// The number of forward messages dropped by the sender. forward_messages_dropped: Family<TopicHash, Counter>, + /// The number of messages that timed out and could not be sent. + timedout_messages_dropped: Family<TopicHash, Counter>, /* Metrics regarding mesh state */ /// Number of peers in our mesh. This metric should be updated with the count of peers for a @@ -241,6 +243,11 @@ impl Metrics { "Number of forward messages dropped per topic" ); + let timedout_messages_dropped = register_family!( + "timedout_messages_dropped_per_topic", + "Number of timedout messages dropped per topic" + ); + let mesh_peer_counts = register_family!( "mesh_peer_counts", "Number of peers in each topic in our mesh" @@ -347,6 +354,7 @@ impl Metrics { rejected_messages, publish_messages_dropped, forward_messages_dropped, + timedout_messages_dropped, mesh_peer_counts, mesh_peer_inclusion_events, mesh_peer_churn_events, @@ -508,6 +516,13 @@ impl Metrics { } } + /// Register dropping a message that timedout over a topic. + pub(crate) fn timeout_msg_dropped(&mut self, topic: &TopicHash) { + if self.register_topic(topic).is_ok() { + self.timedout_messages_dropped.get_or_create(topic).inc(); + } + } + /// Register that a message was received (and was not a duplicate). pub(crate) fn msg_recvd(&mut self, topic: &TopicHash) { if self.register_topic(topic).is_ok() { diff --git a/protocols/gossipsub/src/types.rs b/protocols/gossipsub/src/types.rs index a85f8b5496ee..bb1916fefd08 100644 --- a/protocols/gossipsub/src/types.rs +++ b/protocols/gossipsub/src/types.rs @@ -33,7 +33,7 @@ use crate::rpc_proto::proto; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -/// The type of messages that have expired while attempting to send to a peer. +/// Messages that have expired while attempting to be sent to a peer. #[derive(Clone, Debug, Default)] pub struct FailedMessages { /// The number of publish messages that failed to be published in a heartbeat. @@ -44,14 +44,11 @@ pub struct FailedMessages { pub priority: usize, /// The number of messages that were failed to be sent to the non-priority queue as it was full. pub non_priority: usize, + /// The number of messages that timed out and could not be sent. + pub timeout: usize, } impl FailedMessages { - /// The total number of messages that expired due a timeout. - pub fn total_timeout(&self) -> usize { - self.publish + self.forward - } - /// The total number of messages that failed due to the queue being full. pub fn total_queue_full(&self) -> usize { self.priority + self.non_priority