From ad086d544184f94aa1d1213d52f8c9bb7dfdf754 Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Sun, 8 Dec 2024 17:11:35 +0000 Subject: [PATCH 1/2] don't populate uncleaned_roots inline reduce --- accounts-db/src/accounts_db.rs | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 4eccc311ab64e2..b1dd2290438398 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -8776,24 +8776,13 @@ impl AccountsDb { struct DuplicatePubkeysVisitedInfo { accounts_data_len_from_duplicates: u64, num_duplicate_accounts: u64, - uncleaned_roots: IntSet, duplicates_lt_hash: Option>, } impl DuplicatePubkeysVisitedInfo { - fn reduce(mut a: Self, mut b: Self) -> Self { - if a.uncleaned_roots.len() >= b.uncleaned_roots.len() { - a.merge(b); - a - } else { - b.merge(a); - b - } - } - fn merge(&mut self, other: Self) { + fn reduce(mut self, other: Self) -> Self { self.accounts_data_len_from_duplicates += other.accounts_data_len_from_duplicates; self.num_duplicate_accounts += other.num_duplicate_accounts; - self.uncleaned_roots.extend(other.uncleaned_roots); match ( self.duplicates_lt_hash.is_some(), @@ -8806,16 +8795,20 @@ impl AccountsDb { .unwrap() .0 .mix_in(&other.duplicates_lt_hash.as_ref().unwrap().0); + self } (true, false) => { // nothing to do; `other` doesn't have a duplicates lt hash + self } (false, true) => { // `self` doesn't have a duplicates lt hash, so pilfer from `other` self.duplicates_lt_hash = other.duplicates_lt_hash; + self } (false, false) => { // nothing to do; no duplicates lt hash at all + self } } } @@ -8835,7 +8828,6 @@ impl AccountsDb { let DuplicatePubkeysVisitedInfo { accounts_data_len_from_duplicates, num_duplicate_accounts, - uncleaned_roots, duplicates_lt_hash, } = unique_pubkeys_by_bin .par_iter() @@ -8848,7 +8840,6 @@ impl AccountsDb { let ( accounts_data_len_from_duplicates, accounts_duplicates_num, - uncleaned_roots, duplicates_lt_hash, ) = self.visit_duplicate_pubkeys_during_startup( pubkeys, @@ -8859,7 +8850,6 @@ impl AccountsDb { let intermediate = DuplicatePubkeysVisitedInfo { accounts_data_len_from_duplicates, num_duplicate_accounts: accounts_duplicates_num, - uncleaned_roots, duplicates_lt_hash, }; DuplicatePubkeysVisitedInfo::reduce(accum, intermediate) @@ -8877,7 +8867,6 @@ impl AccountsDb { ); accounts_data_len_dedup_timer.stop(); timings.accounts_data_len_dedup_time_us = accounts_data_len_dedup_timer.as_us(); - timings.slots_to_clean = uncleaned_roots.len() as u64; timings.num_duplicate_accounts = num_duplicate_accounts; accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); @@ -9001,10 +8990,9 @@ impl AccountsDb { rent_collector: &RentCollector, timings: &GenerateIndexTimings, should_calculate_duplicates_lt_hash: bool, - ) -> (u64, u64, IntSet, Option>) { + ) -> (u64, u64, Option>) { let mut accounts_data_len_from_duplicates = 0; let mut num_duplicate_accounts = 0_u64; - let mut uncleaned_slots = IntSet::default(); let mut duplicates_lt_hash = should_calculate_duplicates_lt_hash.then(|| Box::new(DuplicatesLtHash::default())); let mut removed_rent_paying = 0; @@ -9022,7 +9010,6 @@ impl AccountsDb { // the slot where duplicate accounts are found in the index need to be in 'uncleaned_slots' list, too. let max = slot_list.iter().map(|(slot, _)| slot).max().unwrap(); slot_list.iter().for_each(|(slot, account_info)| { - uncleaned_slots.insert(*slot); if slot == max { // the info in 'max' is the most recent, current info for this pubkey return; @@ -9080,7 +9067,6 @@ impl AccountsDb { ( accounts_data_len_from_duplicates as u64, num_duplicate_accounts, - uncleaned_slots, duplicates_lt_hash, ) } From 32f03bbb0f1154e6f3f1cb99644d5fc4311540fd Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Thu, 2 Jan 2025 15:38:03 +0000 Subject: [PATCH 2/2] pr --- accounts-db/src/accounts_db.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index b1dd2290438398..8b8c86ba22481f 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -8795,22 +8795,19 @@ impl AccountsDb { .unwrap() .0 .mix_in(&other.duplicates_lt_hash.as_ref().unwrap().0); - self } (true, false) => { // nothing to do; `other` doesn't have a duplicates lt hash - self } (false, true) => { // `self` doesn't have a duplicates lt hash, so pilfer from `other` self.duplicates_lt_hash = other.duplicates_lt_hash; - self } (false, false) => { // nothing to do; no duplicates lt hash at all - self } } + self } }