-
Notifications
You must be signed in to change notification settings - Fork 271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify persisting tower local cluster tests #875
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3128,22 +3128,23 @@ fn run_test_load_program_accounts(scan_commitment: CommitmentConfig) { | |
|
||
#[test] | ||
#[serial] | ||
fn test_no_optimistic_confirmation_violation_with_tower() { | ||
do_test_optimistic_confirmation_violation_with_or_without_tower(true); | ||
fn test_no_lockout_violation_with_tower() { | ||
do_test_lockout_violation_with_or_without_tower(true); | ||
} | ||
|
||
#[test] | ||
#[serial] | ||
fn test_optimistic_confirmation_violation_without_tower() { | ||
do_test_optimistic_confirmation_violation_with_or_without_tower(false); | ||
fn test_lockout_violation_without_tower() { | ||
do_test_lockout_violation_with_or_without_tower(false); | ||
} | ||
|
||
// A bit convoluted test case; but this roughly follows this test theoretical scenario: | ||
// Validator A, B, C have 31, 36, 33 % of stake respectively. Leader schedule is split, first half | ||
// of the test B is always leader, second half C is. Additionally we have a non voting validator D with 0 | ||
// stake to propagate gossip info. | ||
// of the test B is always leader, second half C is. | ||
// We don't give validator A any slots because it's going to be deleting its ledger, | ||
// so it may create versions of slots it's already created on a different fork | ||
// | ||
// Step 1: Kill C, only A, B and D should be running | ||
// Step 1: Kill C, only A, B should be running | ||
// | ||
// S0 -> S1 -> S2 -> S3 (A & B vote, optimistically confirmed) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused, we are now checking:
is it possible we only have S2 and S3 (no S0 and S1)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, the only important slots are S2 and S3 because S2 is the last common ancestor slot slot and S3 is the fork that gets deleted, but still exists/doesn't exist in tower depending on which test we run. You're right this S0, S1, S2, S3 stuff is quite confusing, I deleted all of this and replaced it with just the variable names |
||
// | ||
|
@@ -3174,7 +3175,7 @@ fn test_optimistic_confirmation_violation_without_tower() { | |
// With the persisted tower: | ||
// `A` should not be able to generate a switching proof. | ||
// | ||
fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) { | ||
fn do_test_lockout_violation_with_or_without_tower(with_tower: bool) { | ||
solana_logger::setup_with("info"); | ||
|
||
// First set up the cluster with 4 nodes | ||
|
@@ -3183,24 +3184,16 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
31 * DEFAULT_NODE_STAKE, | ||
36 * DEFAULT_NODE_STAKE, | ||
33 * DEFAULT_NODE_STAKE, | ||
0, | ||
]; | ||
|
||
let base_slot: Slot = 26; // S2 | ||
let next_slot_on_a: Slot = 27; // S3 | ||
let validator_b_last_leader_slot: Slot = 8; | ||
let truncated_slots: Slot = 100; // just enough to purge all following slots after the S2 and S3 | ||
|
||
// Each pubkeys are prefixed with A, B, C and D. | ||
// D is needed to: | ||
// 1) Propagate A's votes for S2 to validator C after A shuts down so that | ||
// C can avoid NoPropagatedConfirmation errors and continue to generate blocks | ||
// 2) Provide gossip discovery for `A` when it restarts because `A` will restart | ||
// at a different gossip port than the entrypoint saved in C's gossip table | ||
// Each pubkeys are prefixed with A, B, C | ||
let validator_keys = [ | ||
"28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", | ||
"2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", | ||
"4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye", | ||
"3zsEPEDsjfEay7te9XqNjRTCE7vwuT6u4DHzBJC19yp7GS8BuNRMRjnpVrKCBzb3d44kxc4KPGSHkCmk6tEfswCg", | ||
] | ||
.iter() | ||
.map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) | ||
|
@@ -3213,32 +3206,28 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) = | ||
(validators[0], validators[1], validators[2]); | ||
|
||
// Disable voting on all validators other than validator B to ensure neither of the below two | ||
// scenarios occur: | ||
// 1. If the cluster immediately forks on restart while we're killing validators A and C, | ||
// with Validator B on one side, and `A` and `C` on a heavier fork, it's possible that the lockouts | ||
// on `A` and `C`'s latest votes do not extend past validator B's latest vote. Then validator B | ||
// will be stuck unable to vote, but also unable generate a switching proof to the heavier fork. | ||
// | ||
// 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential | ||
// because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger | ||
// below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains | ||
// to the other forks, and may violate switching proofs on restart. | ||
// Disable voting on all validators other than validator B | ||
let mut default_config = ValidatorConfig::default_for_test(); | ||
// Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. | ||
// Ensure B can make leader blocks up till the fork slot, and give the remaining slots to C. This is | ||
// also important so `C` doesn't run into NoPropagatedConfirmation errors on making its first forked | ||
// slot. | ||
// | ||
// Don't give validator A any slots because it's going to be deleting its ledger, so it may create | ||
// versions of slots it's already created, but on a different fork. | ||
let validator_to_slots = vec![ | ||
// Ensure validator b is leader for slots <= `next_slot_on_a` | ||
(validator_b_pubkey, next_slot_on_a as usize + 1), | ||
( | ||
validator_b_pubkey, | ||
validator_b_last_leader_slot as usize + 1, | ||
), | ||
(validator_c_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize), | ||
]; | ||
// Trick C into not producing any blocks, in case its leader slots come up before it gets killed | ||
// Trick C into not producing any blocks during this time, in case its leader slots come up before we can | ||
// kill the validator. We don't want any forks during the time validator B is producing its initial blocks. | ||
let c_validator_to_slots = vec![(validator_b_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize)]; | ||
|
||
let c_leader_schedule = create_custom_leader_schedule(c_validator_to_slots.into_iter()); | ||
let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter()); | ||
for slot in 0..=next_slot_on_a { | ||
for slot in 0..=validator_b_last_leader_slot { | ||
assert_eq!(leader_schedule[slot], validator_b_pubkey); | ||
} | ||
|
||
|
@@ -3248,9 +3237,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
let mut validator_configs = | ||
make_identical_validator_configs(&default_config, node_stakes.len()); | ||
|
||
// Disable voting on validators C, and D | ||
// Disable voting on validator C | ||
validator_configs[2].voting_disabled = true; | ||
validator_configs[3].voting_disabled = true; | ||
// C should not produce any blocks at this time | ||
validator_configs[2].fixed_leader_schedule = Some(FixedSchedule { | ||
leader_schedule: Arc::new(c_leader_schedule), | ||
|
@@ -3285,55 +3273,39 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
validator_c_pubkey, val_c_ledger_path | ||
); | ||
|
||
// Immediately kill validator C. No need to kill validator A because | ||
// 1) It has no slots in the leader schedule, so no way to make forks | ||
// 2) We need it to vote | ||
info!("Exiting validator C"); | ||
let mut validator_c_info = cluster.exit_node(&validator_c_pubkey); | ||
|
||
// Step 1: | ||
// Let validator A, B, (D) run. Wait for both `A` and `B` to have voted on `next_slot_on_a` or | ||
// one of its descendants | ||
info!( | ||
"Waiting on both validators A and B to vote on fork at slot {}", | ||
next_slot_on_a | ||
); | ||
let now = Instant::now(); | ||
let mut last_b_vote = 0; | ||
let mut last_a_vote = 0; | ||
loop { | ||
let elapsed = now.elapsed(); | ||
assert!( | ||
elapsed <= Duration::from_secs(30), | ||
"One of the validators failed to vote on a slot >= {} in {} secs, | ||
last validator A vote: {}, | ||
last validator B vote: {}", | ||
next_slot_on_a, | ||
elapsed.as_secs(), | ||
last_a_vote, | ||
last_b_vote, | ||
); | ||
sleep(Duration::from_millis(100)); | ||
|
||
if let Some((last_vote, _)) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey) { | ||
last_b_vote = last_vote; | ||
if last_vote < next_slot_on_a { | ||
continue; | ||
} | ||
} | ||
info!("Waiting on validator A to vote"); | ||
|
||
// Step 1: Wait for validator A to vote so the tower file exists, and so we can determine the | ||
// `base_slot` and `next_slot_on_a` | ||
loop { | ||
if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { | ||
last_a_vote = last_vote; | ||
if last_vote >= next_slot_on_a { | ||
// The vote needs to have a parent so that validator C can create a fork | ||
if last_vote >= 1 { | ||
break; | ||
} | ||
} | ||
|
||
sleep(Duration::from_millis(100)); | ||
} | ||
|
||
// kill A and B | ||
info!("Exiting validators A and B"); | ||
let _validator_b_info = cluster.exit_node(&validator_b_pubkey); | ||
let validator_a_info = cluster.exit_node(&validator_a_pubkey); | ||
|
||
let next_slot_on_a = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) | ||
.unwrap() | ||
.0; // S3 | ||
let base_slot = next_slot_on_a - 1; // S2 | ||
|
||
info!( | ||
"base slot: {}, next_slot_on_a: {}", | ||
base_slot, next_slot_on_a | ||
); | ||
|
||
// Step 2: | ||
// Truncate ledger, copy over B's ledger to C | ||
info!("Create validator C's ledger"); | ||
|
@@ -3347,33 +3319,19 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
remove_tower(&val_c_ledger_path, &validator_b_pubkey); | ||
|
||
let blockstore = open_blockstore(&val_c_ledger_path); | ||
purge_slots_with_count(&blockstore, base_slot + 1, truncated_slots); | ||
purge_slots_with_count(&blockstore, next_slot_on_a, truncated_slots); | ||
} | ||
info!("Create validator A's ledger"); | ||
{ | ||
// Find latest vote in B, and wait for it to reach blockstore | ||
let b_last_vote = | ||
wait_for_last_vote_in_tower_to_land_in_ledger(&val_b_ledger_path, &validator_b_pubkey) | ||
.unwrap(); | ||
|
||
// Now we copy these blocks to A | ||
let b_blockstore = open_blockstore(&val_b_ledger_path); | ||
let a_blockstore = open_blockstore(&val_a_ledger_path); | ||
copy_blocks(b_last_vote, &b_blockstore, &a_blockstore, false); | ||
copy_blocks(next_slot_on_a, &b_blockstore, &a_blockstore, false); | ||
|
||
// Purge uneccessary slots | ||
purge_slots_with_count(&a_blockstore, next_slot_on_a + 1, truncated_slots); | ||
} | ||
|
||
// This should be guaranteed because we waited for validator `A` to vote on a slot > `next_slot_on_a` | ||
// before killing it earlier. | ||
info!("Checking A's tower for a vote on slot descended from slot `next_slot_on_a`"); | ||
let last_vote_slot = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) | ||
.unwrap() | ||
.0; | ||
assert!(last_vote_slot >= next_slot_on_a); | ||
info!("Success, A voted on slot {}", last_vote_slot); | ||
|
||
{ | ||
let blockstore = open_blockstore(&val_a_ledger_path); | ||
if !with_tower { | ||
|
@@ -3413,7 +3371,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b | |
let elapsed = now.elapsed(); | ||
assert!( | ||
elapsed <= Duration::from_secs(30), | ||
"C failed to create a fork past {} in {} second,s | ||
"C failed to create a fork past {} in {} seconds | ||
last_vote {}, | ||
votes_on_c_fork: {:?}", | ||
base_slot, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you mean: it may create different versions of blocks for some blocks already created on a different fork?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yup, clarified!