From eef7c84255fa4f66622f9b46221cf25a7b0341e4 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Wed, 17 Apr 2024 18:05:49 -0400 Subject: [PATCH 1/4] Remove validator D --- local-cluster/tests/local_cluster.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 79c6fcd08908d5..c0635e7f24f830 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -3140,8 +3140,7 @@ fn test_optimistic_confirmation_violation_without_tower() { // A bit convoluted test case; but this roughly follows this test theoretical scenario: // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half -// of the test B is always leader, second half C is. Additionally we have a non voting validator D with 0 -// stake to propagate gossip info. +// of the test B is always leader, second half C is. // // Step 1: Kill C, only A, B and D should be running // @@ -3183,7 +3182,6 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b 31 * DEFAULT_NODE_STAKE, 36 * DEFAULT_NODE_STAKE, 33 * DEFAULT_NODE_STAKE, - 0, ]; let base_slot: Slot = 26; // S2 From faf66dfcc574a3666c60aaf7f7cb2c93bd1590e7 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Wed, 17 Apr 2024 20:33:34 -0400 Subject: [PATCH 2/4] Fix test --- local-cluster/tests/local_cluster.rs | 106 ++++++++++----------------- 1 file changed, 37 insertions(+), 69 deletions(-) diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index c0635e7f24f830..35d7191a3f7556 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -3129,13 +3129,13 @@ fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) { #[test] #[serial] fn test_no_optimistic_confirmation_violation_with_tower() { - do_test_optimistic_confirmation_violation_with_or_without_tower(true); + do_test_lockout_violation_with_or_without_tower(true); } #[test] #[serial] fn test_optimistic_confirmation_violation_without_tower() { - do_test_optimistic_confirmation_violation_with_or_without_tower(false); + do_test_lockout_violation_with_or_without_tower(false); } // A bit convoluted test case; but this roughly follows this test theoretical scenario: @@ -3173,7 +3173,7 @@ fn test_optimistic_confirmation_violation_without_tower() { // With the persisted tower: // `A` should not be able to generate a switching proof. // -fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) { +fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { solana_logger::setup_with("info"); // First set up the cluster with 4 nodes @@ -3184,21 +3184,15 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b 33 * DEFAULT_NODE_STAKE, ]; - let base_slot: Slot = 26; // S2 - let next_slot_on_a: Slot = 27; // S3 + let validator_b_last_leader_slot: Slot = 8; let truncated_slots: Slot = 100; // just enough to purge all following slots after the S2 and S3 - // Each pubkeys are prefixed with A, B, C and D. - // D is needed to: - // 1) Propagate A's votes for S2 to validator C after A shuts down so that - // C can avoid NoPropagatedConfirmation errors and continue to generate blocks - // 2) Provide gossip discovery for `A` when it restarts because `A` will restart - // at a different gossip port than the entrypoint saved in C's gossip table + // Each pubkeys are prefixed with A, B, C + let validator_keys = [ "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye", - "3zsEPEDsjfEay7te9XqNjRTCE7vwuT6u4DHzBJC19yp7GS8BuNRMRjnpVrKCBzb3d44kxc4KPGSHkCmk6tEfswCg", ] .iter() .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) @@ -3220,15 +3214,21 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b // // 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential // because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger - // below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains + // below only for slots <= `next_slot_on_a`, validator A will not know how its last vote chains // to the other forks, and may violate switching proofs on restart. let mut default_config = ValidatorConfig::default_for_test(); - // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. + // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is + // also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked + // slot, since `A` will be making a simulated vote that's not actually present in gossip. + // // Don't give validator A any slots because it's going to be deleting its ledger, so it may create // versions of slots it's already created, but on a different fork. let validator_to_slots = vec![ // Ensure validator b is leader for slots <= `next_slot_on_a` - (validator_b_pubkey, next_slot_on_a as usize + 1), + ( + validator_b_pubkey, + validator_b_last_leader_slot as usize + 1, + ), (validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize), ]; // Trick C into not producing any blocks, in case its leader slots come up before it gets killed @@ -3236,7 +3236,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter()); let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter()); - for slot in 0..=next_slot_on_a { + for slot in 0..=validator_b_last_leader_slot { assert_eq!(leader_schedule[slot], validator_b_pubkey); } @@ -3246,9 +3246,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let mut validator_configs = make_identical_validator_configs(&default_config, node_stakes.len()); - // Disable voting on validators C, and D + // Disable voting on validator C validator_configs[2].voting_disabled = true; - validator_configs[3].voting_disabled = true; // C should not produce any blocks at this time validator_configs[2].fixed_leader_schedule = Some(FixedSchedule { leader_schedule: Arc::new(c_leader_schedule), @@ -3283,55 +3282,38 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b validator_c_pubkey, val_c_ledger_path ); - // Immediately kill validator C. No need to kill validator A because - // 1) It has no slots in the leader schedule, so no way to make forks - // 2) We need it to vote info!("Exiting validator C"); let mut validator_c_info = cluster.exit_node(&validator_c_pubkey); - // Step 1: - // Let validator A, B, (D) run. Wait for both `A` and `B` to have voted on `next_slot_on_a` or - // one of its descendants - info!( - "Waiting on both validators A and B to vote on fork at slot {}", - next_slot_on_a - ); - let now = Instant::now(); - let mut last_b_vote = 0; - let mut last_a_vote = 0; - loop { - let elapsed = now.elapsed(); - assert!( - elapsed <= Duration::from_secs(30), - "One of the validators failed to vote on a slot >= {} in {} secs, - last validator A vote: {}, - last validator B vote: {}", - next_slot_on_a, - elapsed.as_secs(), - last_a_vote, - last_b_vote, - ); - sleep(Duration::from_millis(100)); - - if let Some((last_vote, _)) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey) { - last_b_vote = last_vote; - if last_vote < next_slot_on_a { - continue; - } - } + info!("Waiting on validator A to vote"); + // Step 1: Wait for validator A to vote so the tower file exists, and so we can determine the + // `base_slot` and `next_slot_on_a` + loop { if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { - last_a_vote = last_vote; - if last_vote >= next_slot_on_a { + // The vote needs to have a parent so that we validator C can create a fork + if last_vote >= 1 { break; } } + + sleep(Duration::from_millis(100)); } // kill A and B let _validator_b_info = cluster.exit_node(&validator_b_pubkey); let validator_a_info = cluster.exit_node(&validator_a_pubkey); + let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) + .unwrap() + .0; // S3 + let base_slot = next_slot_on_a - 1; // S2 + + info!( + "base slot: {}, next_slot_on_a: {}", + base_slot, next_slot_on_a + ); + // Step 2: // Truncate ledger, copy over B's ledger to C info!("Create validator C's ledger"); @@ -3345,33 +3327,19 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b remove_tower(&val_c_ledger_path, &validator_b_pubkey); let blockstore = open_blockstore(&val_c_ledger_path); - purge_slots_with_count(&blockstore, base_slot + 1, truncated_slots); + purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots); } info!("Create validator A's ledger"); { - // Find latest vote in B, and wait for it to reach blockstore - let b_last_vote = - wait_for_last_vote_in_tower_to_land_in_ledger(&val_b_ledger_path, &validator_b_pubkey) - .unwrap(); - // Now we copy these blocks to A let b_blockstore = open_blockstore(&val_b_ledger_path); let a_blockstore = open_blockstore(&val_a_ledger_path); - copy_blocks(b_last_vote, &b_blockstore, &a_blockstore, false); + copy_blocks(next_slot_on_a, &b_blockstore, &a_blockstore, false); // Purge uneccessary slots purge_slots_with_count(&a_blockstore, next_slot_on_a + 1, truncated_slots); } - // This should be guaranteed because we waited for validator `A` to vote on a slot > `next_slot_on_a` - // before killing it earlier. - info!("Checking A's tower for a vote on slot descended from slot `next_slot_on_a`"); - let last_vote_slot = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) - .unwrap() - .0; - assert!(last_vote_slot >= next_slot_on_a); - info!("Success, A voted on slot {}", last_vote_slot); - { let blockstore = open_blockstore(&val_a_ledger_path); if !with_tower { From 2b771c95ff01fc1e178a3ca7ee3fca9ef038b995 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Tue, 24 Dec 2024 18:58:36 -0500 Subject: [PATCH 3/4] Fixup comments --- local-cluster/tests/local_cluster.rs | 32 +++++++++++----------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 35d7191a3f7556..c1426c42937dfd 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -3128,21 +3128,23 @@ fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) { #[test] #[serial] -fn test_no_optimistic_confirmation_violation_with_tower() { +fn test_no_lockout_violation_with_tower() { do_test_lockout_violation_with_or_without_tower(true); } #[test] #[serial] -fn test_optimistic_confirmation_violation_without_tower() { +fn test_lockout_violation_without_tower() { do_test_lockout_violation_with_or_without_tower(false); } // A bit convoluted test case; but this roughly follows this test theoretical scenario: // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half // of the test B is always leader, second half C is. +// We don't give validator A any slots because it's going to be deleting its ledger, +// so it may create versions of slots it's already created on a different fork // -// Step 1: Kill C, only A, B and D should be running +// Step 1: Kill C, only A, B should be running // // S0 -> S1 -> S2 -> S3 (A & B vote, optimistically confirmed) // @@ -3188,7 +3190,6 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { let truncated_slots: Slot = 100; // just enough to purge all following slots after the S2 and S3 // Each pubkeys are prefixed with A, B, C - let validator_keys = [ "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", @@ -3205,33 +3206,23 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) = (validators[0], validators[1], validators[2]); - // Disable voting on all validators other than validator B to ensure neither of the below two - // scenarios occur: - // 1. If the cluster immediately forks on restart while we're killing validators A and C, - // with Validator B on one side, and `A` and `C` on a heavier fork, it's possible that the lockouts - // on `A` and `C`'s latest votes do not extend past validator B's latest vote. Then validator B - // will be stuck unable to vote, but also unable generate a switching proof to the heavier fork. - // - // 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential - // because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger - // below only for slots <= `next_slot_on_a`, validator A will not know how its last vote chains - // to the other forks, and may violate switching proofs on restart. + // Disable voting on all validators other than validator B let mut default_config = ValidatorConfig::default_for_test(); // Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is // also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked - // slot, since `A` will be making a simulated vote that's not actually present in gossip. + // slot. // // Don't give validator A any slots because it's going to be deleting its ledger, so it may create // versions of slots it's already created, but on a different fork. let validator_to_slots = vec![ - // Ensure validator b is leader for slots <= `next_slot_on_a` ( validator_b_pubkey, validator_b_last_leader_slot as usize + 1, ), (validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize), ]; - // Trick C into not producing any blocks, in case its leader slots come up before it gets killed + // Trick C into not producing any blocks during this time, in case its leader slots come up before we can + // kill the validator. We don't want any forks during the time validator B is producing its initial blocks. let c_validator_to_slots = vec![(validator_b_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize)]; let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter()); @@ -3291,7 +3282,7 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { // `base_slot` and `next_slot_on_a` loop { if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { - // The vote needs to have a parent so that we validator C can create a fork + // The vote needs to have a parent so that validator C can create a fork if last_vote >= 1 { break; } @@ -3301,6 +3292,7 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { } // kill A and B + info!("Exiting validators A and B"); let _validator_b_info = cluster.exit_node(&validator_b_pubkey); let validator_a_info = cluster.exit_node(&validator_a_pubkey); @@ -3379,7 +3371,7 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { let elapsed = now.elapsed(); assert!( elapsed <= Duration::from_secs(30), - "C failed to create a fork past {} in {} second,s + "C failed to create a fork past {} in {} seconds last_vote {}, votes_on_c_fork: {:?}", base_slot, From d9cdfeddf7e1ad5b8d8fe95154cf90f166ad6fd3 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Sun, 5 Jan 2025 21:10:30 -0500 Subject: [PATCH 4/4] Clarify comments --- local-cluster/tests/local_cluster.rs | 42 ++++++++++++---------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index c1426c42937dfd..4b77efc35dde95 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -3142,35 +3142,30 @@ fn test_lockout_violation_without_tower() { // Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half // of the test B is always leader, second half C is. // We don't give validator A any slots because it's going to be deleting its ledger, -// so it may create versions of slots it's already created on a different fork +// so it may create different blocks for slots it's already created blocks for on a different fork // -// Step 1: Kill C, only A, B should be running +// Step 1: Kill C, only A and B should be running // -// S0 -> S1 -> S2 -> S3 (A & B vote, optimistically confirmed) +// base_slot -> next_slot_on_a (Wait for A to vote) // // Step 2: -// Kill A and B once we verify that they have voted on S3 or beyond. Copy B's ledger to C but only -// up to slot S2 -// Have `C` generate some blocks like: +// Kill A and B once we verify that A has voted voted on some `next_slot_on_a` >= 1. +// Copy B's ledger to A and C but only up to slot `next_slot_on_a`. // -// S0 -> S1 -> S2 -> S4 +// Step 3: +// Restart validator C to make it produce blocks on a fork from `base_slot` +// that doesn't include `next_slot_on_a`. Wait for it to vote on its own fork. // -// Step 3: Then restart `A` which had 31% of the stake, and remove S3 from its ledger, so -// that it only sees `C`'s fork at S2. From `A`'s perspective it sees: +// base_slot -> next_slot_on_c // -// S0 -> S1 -> S2 -// | -// -> S4 -> S5 (C's vote for S4) +// Step 4: Restart `A` which had 31% of the stake, it's missing `next_slot_on_a` in +// its ledger since we copied the ledger from B excluding this slot, so it sees // -// The fork choice rule weights look like: +// base_slot -> next_slot_on_c // -// S0 -> S1 -> S2 (ABC) -// | -// -> S4 (C) -> S5 -// -// Step 4: +// Step 5: // Without the persisted tower: -// `A` would choose to vote on the fork with `S4 -> S5`. +// `A` would choose to vote on the new fork from C on `next_slot_on_c` // // With the persisted tower: // `A` should not be able to generate a switching proof. @@ -3187,7 +3182,7 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { ]; let validator_b_last_leader_slot: Slot = 8; - let truncated_slots: Slot = 100; // just enough to purge all following slots after the S2 and S3 + let truncated_slots: Slot = 100; // Each pubkeys are prefixed with A, B, C let validator_keys = [ @@ -3282,7 +3277,6 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { // `base_slot` and `next_slot_on_a` loop { if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { - // The vote needs to have a parent so that validator C can create a fork if last_vote >= 1 { break; } @@ -3298,8 +3292,8 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) .unwrap() - .0; // S3 - let base_slot = next_slot_on_a - 1; // S2 + .0; + let base_slot = next_slot_on_a - 1; info!( "base slot: {}, next_slot_on_a: {}", @@ -3364,7 +3358,7 @@ fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { SocketAddrSpace::Unspecified, ); - let mut votes_on_c_fork = std::collections::BTreeSet::new(); // S4 and S5 + let mut votes_on_c_fork = std::collections::BTreeSet::new(); let mut last_vote = 0; let now = Instant::now(); loop {