From 1309b10355945e0d90c0a9bbf844486d797d1a53 Mon Sep 17 00:00:00 2001
From: Tsvetomir Dimitrov <tsvetomir@parity.io>
Date: Mon, 23 May 2022 11:49:26 +0300
Subject: [PATCH] Logs and formatting

---
 node/core/provisioner/src/disputes/mod.rs | 56 +++++++++++++----------
 1 file changed, 31 insertions(+), 25 deletions(-)
diff --git a/node/core/provisioner/src/disputes/mod.rs b/node/core/provisioner/src/disputes/mod.rs
index 07145c3aa489..719669fe876e 100644
--- a/node/core/provisioner/src/disputes/mod.rs
+++ b/node/core/provisioner/src/disputes/mod.rs
@@ -33,18 +33,20 @@ mod tests;
 
 /// The maximum number of disputes Provisioner will include in the inherent data.
 /// Serves as a protection not to flood the Runtime with excessive data.
-pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200 * 1_000; // `VALIDATORS COUNT` * `DISPUTES COUNT`
-
+// The magic numbers below are: `estimated validators count` * `estimated disputes per validator`
+pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200 * 1_000;
 pub async fn select_disputes<Sender>(
 	sender: &mut Sender,
 	metrics: &metrics::Metrics,
-	_leaf: &ActivatedLeaf,
+	leaf: &ActivatedLeaf,
 ) -> Result<MultiDisputeStatementSet, Error>
 where
 	Sender: overseer::ProvisionerSenderTrait,
 {
+	gum::trace!(target: LOG_TARGET, ?leaf, "Selecting disputes for inherent data");
+
 	// Fetch the onchain disputes. We'll do a prioritisation based on them.
-	let onchain = match get_onchain_disputes(sender, _leaf.hash.clone()).await {
+	let onchain = match get_onchain_disputes(sender, leaf.hash.clone()).await {
 		Ok(r) => r,
 		Err(e) => {
 			gum::debug!(
@@ -57,7 +59,11 @@ where
 	};
 
 	let recent_disputes = request_disputes(sender).await;
+	gum::trace!(target: LOG_TARGET, ?leaf, "Got {} recent disputes", recent_disputes.len());
+
 	let partitioned = partition_recent_disputes(recent_disputes, &onchain);
+	gum::trace!(target: LOG_TARGET, ?leaf, "ACTIVE disputes after partitioning: without enough votes to conclude onchain: {}; unknown onchain: {}; with enough votes to conclude onchain: {};", partitioned.cant_conclude_onchain.len(), partitioned.unknown_onchain.len(), partitioned.can_conclude_onchain.len());
+	gum::trace!(target: LOG_TARGET, ?leaf, "CONCLUDED disputes after partitioning: concluded_known_onchain (will be dropped): {}; concluded_unknown_onchain: {}", partitioned.concluded_known_onchain.len() , partitioned.concluded_unknown_onchain.len());
 
 	let result = vote_selection(sender, partitioned, &onchain).await;
 
@@ -76,11 +82,6 @@ where
 {
 	const BATCH_SIZE: usize = 1_100;
 
-	// fast path - no need to be smart if there are small number of disputes
-	if partitioned.disputes_count() <= BATCH_SIZE {
-		// fetch everything and return
-	}
-
 	// fetch in batches until there are enough votes
 	let mut disputes = partitioned.into_iter().collect::<Vec<_>>();
 	let mut total_votes_len = 0;
@@ -89,6 +90,7 @@ where
 		let batch_size = std::cmp::min(BATCH_SIZE, disputes.len());
 		let batch = disputes.drain(0..batch_size).collect();
 
+		// Filter votes which are already onchain
 		let votes = request_votes(sender, batch)
 			.await
 			.into_iter()
@@ -106,6 +108,7 @@ where
 			})
 			.collect::<Vec<_>>();
 
+		// Check if votes are within the limit
 		for (session_index, candidate_hash, selected_votes) in votes {
 			let votes_len = selected_votes.valid.len() + selected_votes.invalid.len();
 			if votes_len + total_votes_len > MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME {
@@ -116,14 +119,26 @@ where
 			total_votes_len += votes_len
 		}
 	}
+
 	result
 }
 
 struct PartitionedDisputes {
+	// Active disputes without enough onchain votes to conclude. These
+	// will be sent to the Runtime with FIRST priority.
 	pub cant_conclude_onchain: Vec<(SessionIndex, CandidateHash)>,
+	// Active disputes completely unknown onchain. Will be sent to the
+	// Runtime with SECOND priority.
 	pub unknown_onchain: Vec<(SessionIndex, CandidateHash)>,
+	// Active disputes with enough votes to conclude onchain. These will
+	// conclude (or have already concluded) even if we don't send any votes.
+	// Anyway - they will be sent to the Runtime with THIRD priority.
 	pub can_conclude_onchain: Vec<(SessionIndex, CandidateHash)>,
-	pub concluded_known_onchain: Vec<(SessionIndex, CandidateHash)>, // these will be ignored
+	// Concluded disputes which are known onchain. These are not
+	// interesting and won't be sent to the Runtime.
+	pub concluded_known_onchain: Vec<(SessionIndex, CandidateHash)>,
+	// Concluded local disputes which are completely unknown for the Runtime.
+	// Hopefully this should never happen. Will be sent to the Runtime with FOURTH priority.
 	pub concluded_unknown_onchain: Vec<(SessionIndex, CandidateHash)>,
 }
 
@@ -143,23 +158,9 @@ impl PartitionedDisputes {
 			.into_iter()
 			.chain(self.unknown_onchain.into_iter())
 			.chain(self.can_conclude_onchain.into_iter())
-			// .chain(self.concluded_known_onchain.into_iter())
+			// .chain(self.concluded_known_onchain.into_iter()) -> dropped on purpose
 			.chain(self.concluded_unknown_onchain.into_iter())
 	}
-
-	fn disputes_count(&self) -> usize {
-		let PartitionedDisputes {
-			cant_conclude_onchain,
-			unknown_onchain,
-			can_conclude_onchain,
-			concluded_known_onchain: _,
-			concluded_unknown_onchain,
-		} = self;
-		cant_conclude_onchain.len() +
-			unknown_onchain.len() +
-			can_conclude_onchain.len() +
-			concluded_unknown_onchain.len()
-	}
 }
 
 fn partition_recent_disputes(
@@ -167,6 +168,8 @@ fn partition_recent_disputes(
 	onchain: &HashMap<(u32, CandidateHash), DisputeState>,
 ) -> PartitionedDisputes {
 	let mut partitioned = PartitionedDisputes::new();
+
+	// Drop any duplicates
 	let unique_recent = recent
 		.into_iter()
 		.map(|(session_index, candidate_hash, dispute_state)| {
@@ -174,6 +177,7 @@ fn partition_recent_disputes(
 		})
 		.collect::<HashMap<_, _>>();
 
+	// Split ACTIVE from CONCLUDED disputes
 	let (active, concluded): (
 		Vec<(SessionIndex, CandidateHash, DisputeStatus)>,
 		Vec<(SessionIndex, CandidateHash, DisputeStatus)>,
@@ -184,6 +188,7 @@ fn partition_recent_disputes(
 		})
 		.partition(|(_, _, status)| *status == DisputeStatus::Active);
 
+	// Split ACTIVE in three groups...
 	for (session_index, candidate_hash, _) in active {
 		match onchain.get(&(session_index, candidate_hash)) {
 			Some(d) => {
@@ -202,6 +207,7 @@ fn partition_recent_disputes(
 
 	// TODO: Prioritise the votes in each partition. Is it still necessary?
 
+	// ... and CONCLUDED in two more
 	for (session_index, candidate_hash, _) in concluded {
 		match onchain.get(&(session_index, candidate_hash)) {
 			Some(_) => partitioned.concluded_known_onchain.push((session_index, candidate_hash)),