From 91d0f966a49e546931588f07ec869f7a557ad6e9 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Wed, 27 Nov 2024 15:13:44 -0500 Subject: [PATCH 01/35] chore: use get_block_burn_view() --- testnet/stacks-node/src/nakamoto_node/miner.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 745ae03fc9..c0f8fe52b6 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -621,7 +621,12 @@ impl BlockMinerThread { return Ok(()); } - let mut sortition_handle = sort_db.index_handle_at_ch(&block.header.consensus_hash)?; + let parent_block_info = + NakamotoChainState::get_block_header(chain_state.db(), &block.header.parent_block_id)? + .ok_or_else(|| ChainstateError::NoSuchBlockError)?; + let burn_view_ch = + NakamotoChainState::get_block_burn_view(sort_db, &block, &parent_block_info)?; + let mut sortition_handle = sort_db.index_handle_at_ch(&burn_view_ch)?; let chainstate_config = chain_state.config(); let (headers_conn, staging_tx) = chain_state.headers_conn_and_staging_tx_begin()?; let accepted = NakamotoChainState::accept_block( From c9f72e48f1e85db9f02b6ed16bd221b0edb28e6f Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 01:11:42 -0500 Subject: [PATCH 02/35] chore: add new integration test --- .github/workflows/bitcoin-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index 04e74f94e8..74d7f7635b 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -144,6 +144,7 @@ jobs: - tests::nakamoto_integrations::signer_chainstate - tests::nakamoto_integrations::clarity_cost_spend_down - tests::nakamoto_integrations::v3_blockbyheight_api_endpoint + - tests::nakamoto_integrations::test_tenure_change_and_extend_from_flashblocks # TODO: enable these once v1 signer is supported by a new nakamoto epoch # - tests::signer::v1::dkg # - tests::signer::v1::sign_request_rejected From 853326969bd092fd472f0f0f9afcb3e303497d44 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 01:11:57 -0500 Subject: [PATCH 03/35] chore: make `MinerReason` debug-printable, and factor out fault injection --- .../stacks-node/src/nakamoto_node/miner.rs | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index c0f8fe52b6..a4aac8171a 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -68,6 +68,7 @@ pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync:: const ABORT_TRY_AGAIN_MS: u64 = 200; #[allow(clippy::large_enum_variant)] +#[derive(Debug)] pub enum MinerDirective { /// The miner won sortition so they should begin a new tenure BeginTenure { @@ -272,6 +273,21 @@ impl BlockMinerThread { Ok(()) } + #[cfg(test)] + fn fault_injection_stall_miner() { + if *TEST_MINE_STALL.lock().unwrap() == Some(true) { + // Do an extra check just so we don't log EVERY time. + warn!("Mining is stalled due to testing directive"); + while *TEST_MINE_STALL.lock().unwrap() == Some(true) { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!("Mining is no longer stalled due to testing directive. Continuing..."); + } + } + + #[cfg(not(test))] + fn fault_injection_stall_miner() {} + pub fn run_miner( mut self, prior_miner: Option>>, @@ -284,6 +300,7 @@ impl BlockMinerThread { "parent_tenure_id" => %self.parent_tenure_id, "thread_id" => ?thread::current().id(), "burn_block_consensus_hash" => %self.burn_block.consensus_hash, + "burn_election_block_consensus_hash" => %self.burn_election_block.consensus_hash, "reason" => %self.reason, ); if let Some(prior_miner) = prior_miner { @@ -294,15 +311,7 @@ impl BlockMinerThread { // now, actually run this tenure loop { - #[cfg(test)] - if *TEST_MINE_STALL.lock().unwrap() == Some(true) { - // Do an extra check just so we don't log EVERY time. - warn!("Mining is stalled due to testing directive"); - while *TEST_MINE_STALL.lock().unwrap() == Some(true) { - std::thread::sleep(std::time::Duration::from_millis(10)); - } - warn!("Mining is no longer stalled due to testing directive. Continuing..."); - } + Self::fault_injection_stall_miner(); let new_block = loop { // If we're mock mining, we may not have processed the block that the // actual tenure winner committed to yet. So, before attempting to @@ -1191,6 +1200,11 @@ impl BlockMinerThread { } }; + debug!( + "make_tenure_start_info: reason = {:?}, tenure_change_tx = {:?}", + &self.reason, &tenure_change_tx + ); + Ok(NakamotoTenureInfo { coinbase_tx, tenure_change_tx, From 2f1674200ad5032d3b3f0949d59ca439ba32dd94 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 01:12:26 -0500 Subject: [PATCH 04/35] fix: consider the possibility that the miner can neither begin a new tenure nor extend the ongoing tenure, and fail-out of continue_tenure --- .../stacks-node/src/nakamoto_node/relayer.rs | 111 +++++++++++++----- 1 file changed, 81 insertions(+), 30 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index b346cdc346..6eaad31e03 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -66,6 +66,9 @@ use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; use crate::BitcoinRegtestController; +#[cfg(test)] +pub static TEST_MINER_THREAD_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); + /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] pub enum RelayerDirective { @@ -487,6 +490,7 @@ impl RelayerThread { } self.globals.set_last_sortition(sn.clone()); self.globals.counters.bump_blocks_processed(); + self.globals.counters.bump_sortitions_processed(); // there may be a bufferred stacks block to process, so wake up the coordinator to check self.globals.coord_comms.announce_new_stacks_block(); @@ -512,6 +516,10 @@ impl RelayerThread { } let directive_opt = self.choose_miner_directive(sn, won_sortition, committed_index_hash); + debug!( + "Relayer: Processed sortition {}: Miner directive is {:?}", + &consensus_hash, &directive_opt + ); Ok(directive_opt) } @@ -767,6 +775,23 @@ impl RelayerThread { )) } + #[cfg(test)] + fn fault_injection_stall_miner_startup() { + if *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { + // Do an extra check just so we don't log EVERY time. + warn!("Miner thread startup is stalled due to testing directive"); + while *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!( + "Miner thread startup is no longer stalled due to testing directive. Continuing..." + ); + } + } + + #[cfg(not(test))] + fn fault_injection_stall_miner_startup() {} + /// Create the block miner thread state. /// Only proceeds if all of the following are true: /// * the miner is not blocked @@ -790,6 +815,7 @@ impl RelayerThread { ); return Err(NakamotoNodeError::FaultInjection); } + Self::fault_injection_stall_miner_startup(); let burn_header_hash = burn_tip.burn_header_hash; let burn_chain_sn = SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()) @@ -940,30 +966,44 @@ impl RelayerThread { } /// Determine the type of tenure change to issue based on whether this - /// miner was the last successful miner (miner of the canonical tip). + /// miner was the last successful miner (miner of the canonical Stacks tip). fn determine_tenure_type( &self, - canonical_snapshot: BlockSnapshot, - last_snapshot: BlockSnapshot, - new_burn_view: ConsensusHash, + canonical_stacks_snapshot: BlockSnapshot, + last_good_block_election_snapshot: BlockSnapshot, + burn_view_snapshot: BlockSnapshot, mining_pkh: Hash160, - ) -> (StacksBlockId, BlockSnapshot, MinerReason) { - if canonical_snapshot.miner_pk_hash != Some(mining_pkh) { - debug!("Relayer: Miner was not the last successful miner. Issue a new tenure change payload."); - ( - StacksBlockId(last_snapshot.winning_stacks_block_hash.0), - last_snapshot, - MinerReason::EmptyTenure, - ) + ) -> Option<(StacksBlockId, BlockSnapshot, MinerReason)> { + let mining_pkh_opt = Some(mining_pkh); + if canonical_stacks_snapshot.miner_pk_hash != mining_pkh_opt { + // miner didn't build the current Stacks chain tip, but we can only start a *new* + // tenure if we won sortition in the canonical burnchain snapshot + if last_good_block_election_snapshot.consensus_hash == burn_view_snapshot.consensus_hash + && burn_view_snapshot.sortition + { + debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, but it won the last sortition. Issue a new tenure change payload."); + Some(( + StacksBlockId( + last_good_block_election_snapshot + .winning_stacks_block_hash + .0, + ), + last_good_block_election_snapshot, + MinerReason::EmptyTenure, + )) + } else { + debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, and did NOT win the last sortition, so it cannot mine."); + None + } } else { - debug!("Relayer: Miner was the last successful miner. Issue a tenure extend from the chain tip."); - ( + debug!("Relayer(determine_tenure_type): Miner was the last successful miner. Issue a tenure extend from the chain tip."); + Some(( self.sortdb.get_canonical_stacks_tip_block_id(), - canonical_snapshot, + canonical_stacks_snapshot, MinerReason::Extended { - burn_view_consensus_hash: new_burn_view, + burn_view_consensus_hash: burn_view_snapshot.consensus_hash, }, - ) + )) } } @@ -1016,7 +1056,7 @@ impl RelayerThread { error!("Relayer: Failed to stop tenure: {e:?}"); return Ok(()); } - debug!("Relayer: successfully stopped tenure."); + debug!("Relayer: successfully stopped tenure; will try to continue."); // Get the necessary snapshots and state let burn_tip = @@ -1058,7 +1098,7 @@ impl RelayerThread { return Ok(()); } - let canonical_snapshot = SortitionDB::get_block_snapshot_consensus( + let canonical_stacks_snapshot = SortitionDB::get_block_snapshot_consensus( self.sortdb.conn(), &canonical_stacks_tip_ch, )? @@ -1066,22 +1106,32 @@ impl RelayerThread { error!("Relayer: failed to get block snapshot for canonical tip"); NakamotoNodeError::SnapshotNotFoundForChainTip })?; - let (parent_tenure_start, block_election_snapshot, reason) = self.determine_tenure_type( - canonical_snapshot, - last_good_block_election_snapshot, - new_burn_view, - mining_pkh, - ); + + let Some((parent_tenure_start, block_election_snapshot, reason)) = self + .determine_tenure_type( + canonical_stacks_snapshot, + last_good_block_election_snapshot, + burn_tip.clone(), + mining_pkh, + ) + else { + info!("Relayer: Not the last Stacks miner, and not the sortition winner of the current burn view. Cannot continue tenure."); + return Ok(()); + }; if let Err(e) = self.start_new_tenure( - parent_tenure_start, - block_election_snapshot, - burn_tip, - reason, + parent_tenure_start.clone(), + block_election_snapshot.clone(), + burn_tip.clone(), + reason.clone(), ) { error!("Relayer: Failed to start new tenure: {e:?}"); } else { - debug!("Relayer: successfully started new tenure."); + debug!("Relayer: successfully started new tenure."; + "parent_tenure_start" => %parent_tenure_start, + "burn_tip" => %burn_tip.consensus_hash, + "block_election_snapshot" => %block_election_snapshot.consensus_hash, + "reason" => %reason); } Ok(()) } @@ -1142,6 +1192,7 @@ impl RelayerThread { }, } + self.globals.counters.bump_naka_miner_directives(); true } From 3b811556dde5a7151495e271c5eb9d80a7316b62 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 01:12:59 -0500 Subject: [PATCH 05/35] chore: track the number of miner directives --- testnet/stacks-node/src/run_loop/neon.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 5e021e50ab..ab5b664e28 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -114,6 +114,8 @@ pub struct Counters { pub missed_microblock_tenures: RunLoopCounter, pub cancelled_commits: RunLoopCounter, + pub sortitions_processed: RunLoopCounter, + pub naka_submitted_vrfs: RunLoopCounter, pub naka_submitted_commits: RunLoopCounter, pub naka_mined_blocks: RunLoopCounter, @@ -121,6 +123,7 @@ pub struct Counters { pub naka_proposed_blocks: RunLoopCounter, pub naka_mined_tenures: RunLoopCounter, pub naka_signer_pushed_blocks: RunLoopCounter, + pub naka_miner_directives: RunLoopCounter, #[cfg(test)] pub naka_skip_commit_op: TestFlag, @@ -151,6 +154,10 @@ impl Counters { Counters::inc(&self.blocks_processed); } + pub fn bump_sortitions_processed(&self) { + Counters::inc(&self.sortitions_processed); + } + pub fn bump_microblocks_processed(&self) { Counters::inc(&self.microblocks_processed); } @@ -195,6 +202,10 @@ impl Counters { Counters::inc(&self.naka_mined_tenures); } + pub fn bump_naka_miner_directives(&self) { + Counters::inc(&self.naka_miner_directives); + } + pub fn set_microblocks_processed(&self, value: u64) { Counters::set(&self.microblocks_processed, value) } From 08fa52a90a9cadd248d7ede6812687227d415dd2 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 01:13:14 -0500 Subject: [PATCH 06/35] chore: integration test to verify that a continue-tenure might not be possible --- .../src/tests/nakamoto_integrations.rs | 355 +++++++++++++++++- testnet/stacks-node/src/tests/signer/mod.rs | 9 +- 2 files changed, 358 insertions(+), 6 deletions(-) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index ef6199d331..5c3523e6a9 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -98,6 +98,7 @@ use crate::config::{EventKeyType, InitialBalance}; use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST, }; +use crate::nakamoto_node::relayer::TEST_MINER_THREAD_STALL; use crate::neon::{Counters, RunLoopCounter}; use crate::operations::BurnchainOpSigner; use crate::run_loop::boot_nakamoto; @@ -698,11 +699,30 @@ pub fn next_block_and_mine_commit( timeout_secs, &[coord_channels], &[commits_submitted], + true, ) } /// Mine a bitcoin block, and wait until: -/// (1) a new block has been processed by the coordinator +/// (1) 2 block commits have been issued ** or ** more than 10 seconds have +/// passed since (1) occurred +pub fn next_block_and_commits_only( + btc_controller: &mut BitcoinRegtestController, + timeout_secs: u64, + coord_channels: &Arc>, + commits_submitted: &Arc, +) -> Result<(), String> { + next_block_and_wait_for_commits( + btc_controller, + timeout_secs, + &[coord_channels], + &[commits_submitted], + false, + ) +} + +/// Mine a bitcoin block, and wait until: +/// (1) a new block has been processed by the coordinator (if `wait_for_stacks_block` is true) /// (2) 2 block commits have been issued ** or ** more than 10 seconds have /// passed since (1) occurred /// This waits for this check to pass on *all* supplied channels @@ -711,6 +731,7 @@ pub fn next_block_and_wait_for_commits( timeout_secs: u64, coord_channels: &[&Arc>], commits_submitted: &[&Arc], + wait_for_stacks_block: bool, ) -> Result<(), String> { let commits_submitted: Vec<_> = commits_submitted.to_vec(); let blocks_processed_before: Vec<_> = coord_channels @@ -746,6 +767,24 @@ pub fn next_block_and_wait_for_commits( } } + if !wait_for_stacks_block { + for i in 0..commits_submitted.len() { + // just wait for the commit + let commits_sent = commits_submitted[i].load(Ordering::SeqCst); + if commits_sent <= commits_before[i] { + return Ok(false); + } + + // if two commits have been sent, one of them must have been after + if commits_sent >= commits_before[i] + 1 { + continue; + } + return Ok(false); + } + return Ok(true); + } + + // waiting for both commit and stacks block for i in 0..commits_submitted.len() { let blocks_processed = coord_channels[i] .lock() @@ -754,15 +793,17 @@ pub fn next_block_and_wait_for_commits( let commits_sent = commits_submitted[i].load(Ordering::SeqCst); if blocks_processed > blocks_processed_before[i] { + // either we don't care about the stacks block count, or the block count advanced. + // Check the block-commits. let block_processed_time = block_processed_time[i] .as_ref() - .ok_or("TEST-ERROR: Processed time wasn't set")?; + .ok_or("TEST-ERROR: Processed block time wasn't set")?; if commits_sent <= commits_before[i] { return Ok(false); } let commit_sent_time = commit_sent_time[i] .as_ref() - .ok_or("TEST-ERROR: Processed time wasn't set")?; + .ok_or("TEST-ERROR: Processed commit time wasn't set")?; // try to ensure the commit was sent after the block was processed if commit_sent_time > block_processed_time { continue; @@ -9650,8 +9691,6 @@ fn test_shadow_recovery() { let coord_channel = signer_test.running_nodes.coord_channel.clone(); let commits_submitted = signer_test.running_nodes.commits_submitted.clone(); - let burnchain = naka_conf.get_burnchain(); - // make another tenure next_block_and_mine_commit( btc_regtest_controller, @@ -10109,3 +10148,309 @@ fn clarity_cost_spend_down() { run_loop_thread.join().unwrap(); } + +/// If we get a flash block -- a sortition in which we win, immediately followed by a different +/// sortition, make sure we first mine a tenure-change block and then a tenure-extend block. +#[test] +#[ignore] +fn test_tenure_change_and_extend_from_flashblocks() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + let mut account_keys: Vec<_> = (0..11) + .map(|i| StacksPrivateKey::from_seed(&[6, 6, 6, i as u8])) + .collect(); + let initial_balances: Vec<_> = account_keys + .iter() + .map(|privk| { + let address = to_addr(&privk).into(); + (address, 1_000_000) + }) + .collect(); + + let deployer_sk = account_keys.pop().unwrap(); + let deployer_addr = tests::to_addr(&deployer_sk); + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + 1, + initial_balances, + |_config| {}, + |_| {}, + None, + None, + ); + signer_test.boot_to_epoch_3(); + + let naka_conf = signer_test.running_nodes.conf.clone(); + let http_origin = format!("http://{}", &naka_conf.node.rpc_bind); + let btc_regtest_controller = &mut signer_test.running_nodes.btc_regtest_controller; + let coord_channel = signer_test.running_nodes.coord_channel.clone(); + let commits_submitted = signer_test.running_nodes.commits_submitted.clone(); + let sortitions_processed = signer_test.running_nodes.sortitions_processed.clone(); + let nakamoto_test_skip_commit_op = signer_test + .running_nodes + .nakamoto_test_skip_commit_op + .clone(); + let nakamoto_miner_directives = signer_test.running_nodes.nakamoto_miner_directives.clone(); + + let tx_fee = 1_000; + + let burnchain = naka_conf.get_burnchain(); + let mut sortdb = burnchain.open_sortition_db(true).unwrap(); + for _ in 0..3 { + next_block_and_mine_commit( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + } + + let burn_view_contract = r#" +(define-data-var my-var uint u0) +(define-public (f) (begin (var-set my-var burn-block-height) (ok 1))) (begin (f)) +"# + .to_string(); + + let contract_tx = make_contract_publish( + &deployer_sk, + 0, + tx_fee, + naka_conf.burnchain.chain_id, + "burn-view-contract", + &burn_view_contract, + ); + submit_tx(&http_origin, &contract_tx); + + let blocks_processed_before = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + + wait_for(120, || { + let blocks_processed = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + Ok(blocks_processed > blocks_processed_before) + }) + .expect("Timed out waiting for interim blocks to be mined"); + + next_block_and_mine_commit( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + + // stall miner and relayer + TEST_MINE_STALL.lock().unwrap().replace(true); + + let mut accounts_before = vec![]; + + // fill mempool with transactions that depend on the burn view + for sender_sk in account_keys.iter() { + let sender_addr = tests::to_addr(&sender_sk); + let account = loop { + let Ok(account) = get_account_result(&http_origin, &sender_addr) else { + debug!("follower_bootup: Failed to load miner account"); + thread::sleep(Duration::from_millis(100)); + continue; + }; + break account; + }; + + // Fill up the mempool with contract calls + let contract_tx = make_contract_call( + &sender_sk, + account.nonce, + tx_fee, + naka_conf.burnchain.chain_id, + &deployer_addr, + "burn-view-contract", + "f", + &[], + ); + submit_tx(&http_origin, &contract_tx); + accounts_before.push(account); + } + + // make tenure but don't wait for a stacks block + next_block_and_commits_only( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + + // prevent the relayer from spawning a new thread just yet + TEST_MINER_THREAD_STALL.lock().unwrap().replace(true); + nakamoto_test_skip_commit_op.set(true); + + // mine another Bitcoin block right away, since it will contain a block-commit + btc_regtest_controller.bootstrap_chain(1); + + // make sure the relayer processes both sortitions + let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); + wait_for(60, || { + sleep_ms(100); + let sortitions_cnt = sortitions_processed.load(Ordering::SeqCst); + Ok(sortitions_cnt > sortitions_processed_before) + }) + .unwrap(); + + // HACK: simulate the presence of a different miner. + // Make it so that from the perspective of this node's miner, a *different* miner produced the + // canonical Stacks chain tip. This triggers the `None` return value in + // `Relayer::determine_tenure_type`. + { + let tx = sortdb.tx_begin().unwrap(); + + let (canonical_stacks_tip_ch, _) = + SortitionDB::get_canonical_stacks_chain_tip_hash(&tx).unwrap(); + tx.execute( + "UPDATE snapshots SET miner_pk_hash = ?1 WHERE consensus_hash = ?2", + rusqlite::params![&Hash160([0x11; 20]), &canonical_stacks_tip_ch], + ) + .unwrap(); + tx.commit().unwrap(); + } + + // mine another Bitcoin block right away, and force it to be a flash block + btc_regtest_controller.bootstrap_chain(1); + + let miner_directives_before = nakamoto_miner_directives.load(Ordering::SeqCst); + TEST_MINER_THREAD_STALL.lock().unwrap().replace(false); + + let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); + wait_for(60, || { + sleep_ms(100); + let sortitions_cnt = sortitions_processed.load(Ordering::SeqCst); + Ok(sortitions_cnt > sortitions_processed_before) + }) + .unwrap(); + + // unstall miner and relayer + nakamoto_test_skip_commit_op.set(false); + TEST_MINE_STALL.lock().unwrap().replace(false); + + sleep_ms(10_000); + + // wait for the miner directive to be processed + wait_for(60, || { + sleep_ms(100); + let directives_cnt = nakamoto_miner_directives.load(Ordering::SeqCst); + Ok(directives_cnt > miner_directives_before) + }) + .unwrap(); + + // start up the next tenure + next_block_and_commits_only( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + + // wait for all of the aforementioned transactions to get mined + wait_for(120, || { + // fill mempool with transactions that depend on the burn view + for (sender_sk, account_before) in account_keys.iter().zip(accounts_before.iter()) { + let sender_addr = tests::to_addr(&sender_sk); + let account = loop { + let Ok(account) = get_account_result(&http_origin, &sender_addr) else { + thread::sleep(Duration::from_millis(100)); + continue; + }; + break account; + }; + + if account.nonce > account_before.nonce { + continue; + } + return Ok(false); + } + Ok(true) + }) + .unwrap(); + + // see if we can boot a follower off of this node now + let mut follower_conf = naka_conf.clone(); + follower_conf.node.miner = false; + follower_conf.events_observers.clear(); + follower_conf.node.working_dir = format!("{}-follower", &naka_conf.node.working_dir); + follower_conf.node.seed = vec![0x01; 32]; + follower_conf.node.local_peer_seed = vec![0x02; 32]; + + let rpc_port = gen_random_port(); + let p2p_port = gen_random_port(); + + let localhost = "127.0.0.1"; + follower_conf.node.rpc_bind = format!("{localhost}:{rpc_port}"); + follower_conf.node.p2p_bind = format!("{localhost}:{p2p_port}"); + follower_conf.node.data_url = format!("http://{localhost}:{rpc_port}"); + follower_conf.node.p2p_address = format!("{localhost}:{p2p_port}"); + follower_conf.node.pox_sync_sample_secs = 30; + + let node_info = get_chain_info(&naka_conf); + follower_conf.node.add_bootstrap_node( + &format!( + "{}@{}", + &node_info.node_public_key.unwrap(), + naka_conf.node.p2p_bind + ), + naka_conf.burnchain.chain_id, + PEER_VERSION_TESTNET, + ); + + let mut follower_run_loop = boot_nakamoto::BootRunLoop::new(follower_conf.clone()).unwrap(); + let follower_run_loop_stopper = follower_run_loop.get_termination_switch(); + let follower_coord_channel = follower_run_loop.coordinator_channels(); + + debug!( + "Booting follower-thread ({},{})", + &follower_conf.node.p2p_bind, &follower_conf.node.rpc_bind + ); + debug!( + "Booting follower-thread: neighbors = {:?}", + &follower_conf.node.bootstrap_node + ); + + // spawn a follower thread + let follower_thread = thread::Builder::new() + .name("follower-thread".into()) + .spawn(move || follower_run_loop.start(None, 0)) + .unwrap(); + + debug!("Booted follower-thread"); + + let miner_info = get_chain_info_result(&naka_conf).unwrap(); + + wait_for(300, || { + let Ok(info) = get_chain_info_result(&follower_conf) else { + sleep_ms(1000); + return Ok(false); + }; + Ok(miner_info.stacks_tip == info.stacks_tip + && miner_info.stacks_tip_consensus_hash == info.stacks_tip_consensus_hash) + }) + .unwrap(); + + coord_channel + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + + follower_coord_channel + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + follower_run_loop_stopper.store(false, Ordering::SeqCst); + + follower_thread.join().unwrap(); +} diff --git a/testnet/stacks-node/src/tests/signer/mod.rs b/testnet/stacks-node/src/tests/signer/mod.rs index 946a566c13..bb854b6bef 100644 --- a/testnet/stacks-node/src/tests/signer/mod.rs +++ b/testnet/stacks-node/src/tests/signer/mod.rs @@ -84,11 +84,13 @@ pub struct RunningNodes { pub vrfs_submitted: Arc, pub commits_submitted: Arc, pub blocks_processed: Arc, + pub sortitions_processed: Arc, pub nakamoto_blocks_proposed: Arc, pub nakamoto_blocks_mined: Arc, pub nakamoto_blocks_rejected: Arc, pub nakamoto_blocks_signer_pushed: Arc, pub nakamoto_test_skip_commit_op: TestFlag, + pub nakamoto_miner_directives: Arc, pub coord_channel: Arc>, pub conf: NeonConfig, } @@ -122,7 +124,7 @@ impl + Send + 'static, T: SignerEventTrait + 'static> SignerTest( + pub fn new_with_config_modifications( num_signers: usize, initial_balances: Vec<(StacksAddress, u64)>, mut signer_config_modifier: F, @@ -341,6 +343,7 @@ impl + Send + 'static, T: SignerEventTrait + 'static> SignerTest( let run_loop_stopper = run_loop.get_termination_switch(); let Counters { blocks_processed, + sortitions_processed, naka_submitted_vrfs: vrfs_submitted, naka_submitted_commits: commits_submitted, naka_proposed_blocks: naka_blocks_proposed, naka_mined_blocks: naka_blocks_mined, naka_rejected_blocks: naka_blocks_rejected, + naka_miner_directives, naka_skip_commit_op: nakamoto_test_skip_commit_op, naka_signer_pushed_blocks, .. @@ -750,11 +755,13 @@ fn setup_stx_btc_node( vrfs_submitted: vrfs_submitted.0, commits_submitted: commits_submitted.0, blocks_processed: blocks_processed.0, + sortitions_processed: sortitions_processed.0, nakamoto_blocks_proposed: naka_blocks_proposed.0, nakamoto_blocks_mined: naka_blocks_mined.0, nakamoto_blocks_rejected: naka_blocks_rejected.0, nakamoto_blocks_signer_pushed: naka_signer_pushed_blocks.0, nakamoto_test_skip_commit_op, + nakamoto_miner_directives: naka_miner_directives.0, coord_channel, conf: naka_conf, } From b110f66cef101544f0c214f92bb45ab89028acee Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Thu, 5 Dec 2024 18:22:48 -0500 Subject: [PATCH 07/35] chore: more fixes to differentiate the miner's burn view from the burn tip --- .../stacks-node/src/nakamoto_node/miner.rs | 119 +++++++++++++++--- .../stacks-node/src/nakamoto_node/relayer.rs | 54 ++++---- .../src/nakamoto_node/sign_coordinator.rs | 28 ++++- .../src/tests/nakamoto_integrations.rs | 92 ++++++++------ 4 files changed, 207 insertions(+), 86 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index a4aac8171a..63df64dee4 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -23,13 +23,14 @@ use libsigner::v0::messages::{MinerSlotID, SignerMessage}; use libsigner::StackerDBSession; use rand::{thread_rng, Rng}; use stacks::burnchains::Burnchain; -use stacks::chainstate::burn::db::sortdb::SortitionDB; +use stacks::chainstate::burn::db::sortdb::{get_ancestor_sort_id, SortitionDB}; use stacks::chainstate::burn::{BlockSnapshot, ConsensusHash}; use stacks::chainstate::coordinator::OnChainRewardSetProvider; use stacks::chainstate::nakamoto::coordinator::load_nakamoto_reward_set; use stacks::chainstate::nakamoto::miner::{NakamotoBlockBuilder, NakamotoTenureInfo}; use stacks::chainstate::nakamoto::staging_blocks::NakamotoBlockObtainMethod; -use stacks::chainstate::nakamoto::{NakamotoBlock, NakamotoChainState}; +use stacks::chainstate::nakamoto::tenure::NakamotoTenureEventId; +use stacks::chainstate::nakamoto::{NakamotoBlock, NakamotoChainState, StacksDBIndexed}; use stacks::chainstate::stacks::boot::{RewardSet, MINERS_NAME}; use stacks::chainstate::stacks::db::{StacksChainState, StacksHeaderInfo}; use stacks::chainstate::stacks::{ @@ -110,7 +111,10 @@ pub enum MinerReason { /// sortition. burn_view_consensus_hash: ConsensusHash, }, - /// The miner thread was spawned to initialize a prior empty tenure + /// The miner thread was spawned to initialize a prior empty tenure. + /// It may be the case that the tenure to be initialized is no longer the canonical burnchain + /// tip, so if this is the miner reason, the miner thread will not exit on its own unless it + /// first mines a `BlockFound` tenure change. EmptyTenure, } @@ -156,6 +160,9 @@ pub struct BlockMinerThread { event_dispatcher: EventDispatcher, /// The reason the miner thread was spawned reason: MinerReason, + /// Whether or not we sent our initial block with a tenure-change + /// (only applies if self.reason is MinerReason::EmptyTenure) + sent_initial_block: bool, /// Handle to the p2p thread for block broadcast p2p_handle: NetworkHandle, signer_set_cache: Option, @@ -183,6 +190,7 @@ impl BlockMinerThread { event_dispatcher: rt.event_dispatcher.clone(), parent_tenure_id, reason, + sent_initial_block: false, p2p_handle: rt.get_p2p_handle(), signer_set_cache: None, } @@ -249,6 +257,11 @@ impl BlockMinerThread { false } + /// Does this miner need to send its tenure's initial block still? + fn needs_initial_block(&self) -> bool { + !self.sent_initial_block && self.reason == MinerReason::EmptyTenure + } + /// Stop a miner tenure by blocking the miner and then joining the tenure thread pub fn stop_miner( globals: &Globals, @@ -307,6 +320,8 @@ impl BlockMinerThread { Self::stop_miner(&self.globals, prior_miner)?; } let mut stackerdbs = StackerDBs::connect(&self.config.get_stacker_db_file_path(), true)?; + let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) + .expect("FATAL: could not open chainstate DB"); let mut last_block_rejected = false; // now, actually run this tenure @@ -324,9 +339,7 @@ impl BlockMinerThread { self.burnchain.pox_constants.clone(), ) .expect("FATAL: could not open sortition DB"); - let burn_tip_changed = self.check_burn_tip_changed(&burn_db); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) - .expect("FATAL: could not open chainstate DB"); + let burn_tip_changed = self.check_burn_tip_changed(&burn_db, &mut chain_state); match burn_tip_changed .and_then(|_| self.load_block_parent_info(&mut burn_db, &mut chain_state)) { @@ -447,6 +460,7 @@ impl BlockMinerThread { Self::fault_injection_block_announce_stall(&new_block); self.globals.coord().announce_new_stacks_block(); + self.sent_initial_block = true; self.last_block_mined = Some(new_block); } @@ -462,7 +476,10 @@ impl BlockMinerThread { let wait_start = Instant::now(); while wait_start.elapsed() < self.config.miner.wait_on_interim_blocks { thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); - if self.check_burn_tip_changed(&sort_db).is_err() { + if self + .check_burn_tip_changed(&sort_db, &mut chain_state) + .is_err() + { return Err(NakamotoNodeError::BurnchainTipChanged); } } @@ -565,6 +582,7 @@ impl BlockMinerThread { let mut coordinator = SignCoordinator::new( &reward_set, miner_privkey, + self.needs_initial_block(), &self.config, self.globals.should_keep_running.clone(), self.event_dispatcher.stackerdb_channel.clone(), @@ -1023,12 +1041,12 @@ impl BlockMinerThread { SortitionDB::open(&burn_db_path, true, self.burnchain.pox_constants.clone()) .expect("FATAL: could not open sortition DB"); - self.check_burn_tip_changed(&burn_db)?; - neon_node::fault_injection_long_tenure(); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) .expect("FATAL: could not open chainstate DB"); + self.check_burn_tip_changed(&burn_db, &mut chain_state)?; + neon_node::fault_injection_long_tenure(); + let mut mem_pool = self .config .connect_mempool_db() @@ -1129,7 +1147,7 @@ impl BlockMinerThread { // last chance -- confirm that the stacks tip is unchanged (since it could have taken long // enough to build this block that another block could have arrived), and confirm that all // Stacks blocks with heights higher than the canonical tip are processed. - self.check_burn_tip_changed(&burn_db)?; + self.check_burn_tip_changed(&burn_db, &mut chain_state)?; Ok(block) } @@ -1201,8 +1219,8 @@ impl BlockMinerThread { }; debug!( - "make_tenure_start_info: reason = {:?}, tenure_change_tx = {:?}", - &self.reason, &tenure_change_tx + "make_tenure_start_info: reason = {:?}, burn_view = {:?}, tenure_change_tx = {:?}", + &self.reason, &self.burn_block.consensus_hash, &tenure_change_tx ); Ok(NakamotoTenureInfo { @@ -1211,9 +1229,80 @@ impl BlockMinerThread { }) } + /// Check to see if the given burn view is at or ahead of the stacks blockchain's burn view. + /// If so, then return Ok(()) + /// If not, then return Err(NakamotoNodeError::BurnchainTipChanged) + pub fn check_burn_view_changed( + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + burn_view: &BlockSnapshot, + ) -> Result<(), NakamotoNodeError> { + // if the local burn view has advanced, then this miner thread is defunct. Someone else + // extended their tenure in a sortition at or after our burn view, and the node accepted + // it, so we should stop. + let cur_stacks_tip_header = + NakamotoChainState::get_canonical_block_header(chain_state.db(), sortdb)? + .ok_or_else(|| NakamotoNodeError::UnexpectedChainState)?; + + let cur_stacks_tip_id = cur_stacks_tip_header.index_block_hash(); + let ongoing_tenure_id = if let Some(tenure_id) = chain_state + .index_conn() + .get_ongoing_tenure_id(&cur_stacks_tip_id)? + { + // ongoing tenure is a Nakamoto tenure + tenure_id + } else { + // ongoing tenure is an epoch 2.x tenure, so it's the same as the canonical stacks 2.x + // tip + NakamotoTenureEventId { + burn_view_consensus_hash: cur_stacks_tip_header.consensus_hash, + block_id: cur_stacks_tip_id, + } + }; + + if ongoing_tenure_id.burn_view_consensus_hash != burn_view.consensus_hash { + let ongoing_tenure_sortition = SortitionDB::get_block_snapshot_consensus( + sortdb.conn(), + &ongoing_tenure_id.burn_view_consensus_hash, + )? + .ok_or_else(|| NakamotoNodeError::UnexpectedChainState)?; + + // it's possible that our burn view is higher than the ongoing tenure's burn view, but + // if this *isn't* the case, then the Stacks burn view has necessarily advanced + let burn_view_tenure_handle = sortdb.index_handle_at_ch(&burn_view.consensus_hash)?; + if get_ancestor_sort_id( + &burn_view_tenure_handle, + ongoing_tenure_sortition.block_height, + &burn_view_tenure_handle.context.chain_tip, + )? + .is_none() + { + // ongoing tenure is not an ancestor of the given burn view, so it must have + // advanced (or forked) relative to the given burn view. Either way, this burn + // view has changed. + info!("Nakamoto chainstate burn view has changed from miner burn view"; + "nakamoto_burn_view" => %ongoing_tenure_id.burn_view_consensus_hash, + "miner_burn_view" => %burn_view.consensus_hash); + + return Err(NakamotoNodeError::BurnchainTipChanged); + } + } + Ok(()) + } + /// Check if the tenure needs to change -- if so, return a BurnchainTipChanged error - /// The tenure should change if there is a new burnchain tip with a valid sortition - fn check_burn_tip_changed(&self, sortdb: &SortitionDB) -> Result<(), NakamotoNodeError> { + /// The tenure should change if there is a new burnchain tip with a valid sortition, + /// or if the stacks chain state's burn view has advanced beyond our burn view. + fn check_burn_tip_changed( + &self, + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + ) -> Result<(), NakamotoNodeError> { + Self::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; + if self.needs_initial_block() { + // don't abandon this tenure until our tenure-change has been mined! + return Ok(()); + } let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 6eaad31e03..912855a0f2 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -804,6 +804,7 @@ impl RelayerThread { &mut self, registered_key: RegisteredKey, burn_election_block: BlockSnapshot, + burn_view: BlockSnapshot, burn_tip: BlockSnapshot, parent_tenure_id: StacksBlockId, reason: MinerReason, @@ -838,6 +839,7 @@ impl RelayerThread { "parent_tenure_id" => %parent_tenure_id, "reason" => %reason, "burn_election_block.consensus_hash" => %burn_election_block.consensus_hash, + "burn_view.consensus_hash" => %burn_view.consensus_hash, "burn_tip.consensus_hash" => %burn_tip.consensus_hash, ); @@ -845,7 +847,7 @@ impl RelayerThread { self, registered_key, burn_election_block, - burn_tip, + burn_view, parent_tenure_id, reason, ); @@ -856,6 +858,7 @@ impl RelayerThread { &mut self, parent_tenure_start: StacksBlockId, block_election_snapshot: BlockSnapshot, + burn_view: BlockSnapshot, burn_tip: BlockSnapshot, reason: MinerReason, ) -> Result<(), NakamotoNodeError> { @@ -873,6 +876,7 @@ impl RelayerThread { let new_miner_state = self.create_block_miner( vrf_key, block_election_snapshot, + burn_view, burn_tip, parent_tenure_start, reason, @@ -973,37 +977,30 @@ impl RelayerThread { last_good_block_election_snapshot: BlockSnapshot, burn_view_snapshot: BlockSnapshot, mining_pkh: Hash160, - ) -> Option<(StacksBlockId, BlockSnapshot, MinerReason)> { + ) -> (StacksBlockId, BlockSnapshot, BlockSnapshot, MinerReason) { let mining_pkh_opt = Some(mining_pkh); if canonical_stacks_snapshot.miner_pk_hash != mining_pkh_opt { - // miner didn't build the current Stacks chain tip, but we can only start a *new* - // tenure if we won sortition in the canonical burnchain snapshot - if last_good_block_election_snapshot.consensus_hash == burn_view_snapshot.consensus_hash - && burn_view_snapshot.sortition - { - debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, but it won the last sortition. Issue a new tenure change payload."); - Some(( - StacksBlockId( - last_good_block_election_snapshot - .winning_stacks_block_hash - .0, - ), - last_good_block_election_snapshot, - MinerReason::EmptyTenure, - )) - } else { - debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, and did NOT win the last sortition, so it cannot mine."); - None - } + debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, but it won the last sortition. Issue a new tenure change payload."); + ( + StacksBlockId( + last_good_block_election_snapshot + .winning_stacks_block_hash + .0, + ), + last_good_block_election_snapshot.clone(), + last_good_block_election_snapshot, + MinerReason::EmptyTenure, + ) } else { debug!("Relayer(determine_tenure_type): Miner was the last successful miner. Issue a tenure extend from the chain tip."); - Some(( + ( self.sortdb.get_canonical_stacks_tip_block_id(), canonical_stacks_snapshot, + burn_view_snapshot.clone(), MinerReason::Extended { burn_view_consensus_hash: burn_view_snapshot.consensus_hash, }, - )) + ) } } @@ -1107,21 +1104,18 @@ impl RelayerThread { NakamotoNodeError::SnapshotNotFoundForChainTip })?; - let Some((parent_tenure_start, block_election_snapshot, reason)) = self + let (parent_tenure_start, block_election_snapshot, burn_view_snapshot, reason) = self .determine_tenure_type( canonical_stacks_snapshot, last_good_block_election_snapshot, burn_tip.clone(), mining_pkh, - ) - else { - info!("Relayer: Not the last Stacks miner, and not the sortition winner of the current burn view. Cannot continue tenure."); - return Ok(()); - }; + ); if let Err(e) = self.start_new_tenure( parent_tenure_start.clone(), block_election_snapshot.clone(), + burn_view_snapshot.clone(), burn_tip.clone(), reason.clone(), ) { @@ -1130,6 +1124,7 @@ impl RelayerThread { debug!("Relayer: successfully started new tenure."; "parent_tenure_start" => %parent_tenure_start, "burn_tip" => %burn_tip.consensus_hash, + "burn_view_snapshot" => %burn_view_snapshot.consensus_hash, "block_election_snapshot" => %block_election_snapshot.consensus_hash, "reason" => %reason); } @@ -1161,6 +1156,7 @@ impl RelayerThread { } => match self.start_new_tenure( parent_tenure_start, burnchain_tip.clone(), + burnchain_tip.clone(), burnchain_tip, MinerReason::BlockFound, ) { diff --git a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs index 2b1efcbfc5..fecb4b8955 100644 --- a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs @@ -44,6 +44,7 @@ use stacks_common::types::chainstate::{StacksPrivateKey, StacksPublicKey}; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; +use crate::nakamoto_node::miner::BlockMinerThread; use crate::neon::Counters; use crate::Config; @@ -61,6 +62,7 @@ static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); pub struct SignCoordinator { receiver: Option>, message_key: StacksPrivateKey, + needs_initial_block: bool, is_mainnet: bool, miners_session: StackerDBSession, signer_entries: HashMap, @@ -90,6 +92,7 @@ impl SignCoordinator { pub fn new( reward_set: &RewardSet, message_key: StacksPrivateKey, + needs_initial_block: bool, config: &Config, keep_running: Arc, stackerdb_channel: Arc>, @@ -164,8 +167,9 @@ impl SignCoordinator { warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); } let sign_coordinator = Self { - message_key, receiver: Some(receiver), + message_key, + needs_initial_block, is_mainnet, miners_session, next_signer_bitvec, @@ -190,6 +194,7 @@ impl SignCoordinator { Ok(Self { receiver: Some(receiver), message_key, + needs_initial_block, is_mainnet, miners_session, next_signer_bitvec, @@ -268,7 +273,22 @@ impl SignCoordinator { } /// Check if the tenure needs to change - fn check_burn_tip_changed(sortdb: &SortitionDB, burn_block: &BlockSnapshot) -> bool { + fn check_burn_tip_changed( + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + burn_block: &BlockSnapshot, + needs_initial_block: bool, + ) -> bool { + if BlockMinerThread::check_burn_view_changed(sortdb, chain_state, burn_block).is_err() { + // can't continue mining -- burn view changed, or a DB error occurred + return true; + } + + if !needs_initial_block { + // must get that first initial block in, assuming the burn view is still valid. + return false; + } + let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); @@ -314,6 +334,8 @@ impl SignCoordinator { reward_cycle: reward_cycle_id, }; + let needs_initial_block = self.needs_initial_block; + let block_proposal_message = SignerMessageV0::BlockProposal(block_proposal); debug!("Sending block proposal message to signers"; "signer_signature_hash" => %block.header.signer_signature_hash(), @@ -382,7 +404,7 @@ impl SignCoordinator { return Ok(stored_block.header.signer_signature); } - if Self::check_burn_tip_changed(sortdb, burn_tip) { + if Self::check_burn_tip_changed(sortdb, chain_state, burn_tip, needs_initial_block) { debug!("SignCoordinator: Exiting due to new burnchain tip"); return Err(NakamotoNodeError::BurnchainTipChanged); } diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 5c3523e6a9..fd7b811f1a 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -10210,7 +10210,19 @@ fn test_tenure_change_and_extend_from_flashblocks() { let burn_view_contract = r#" (define-data-var my-var uint u0) -(define-public (f) (begin (var-set my-var burn-block-height) (ok 1))) (begin (f)) +(define-data-var my-counter uint u0) + +(define-public (f) + (begin + (var-set my-var burn-block-height) + (if (is-eq u0 (mod burn-block-height u2)) + (var-set my-counter (+ u1 (var-get my-counter))) + (var-set my-counter (+ u2 (var-get my-counter)))) + (ok 1) + ) +) + +(begin (f)) "# .to_string(); @@ -10249,35 +10261,6 @@ fn test_tenure_change_and_extend_from_flashblocks() { // stall miner and relayer TEST_MINE_STALL.lock().unwrap().replace(true); - let mut accounts_before = vec![]; - - // fill mempool with transactions that depend on the burn view - for sender_sk in account_keys.iter() { - let sender_addr = tests::to_addr(&sender_sk); - let account = loop { - let Ok(account) = get_account_result(&http_origin, &sender_addr) else { - debug!("follower_bootup: Failed to load miner account"); - thread::sleep(Duration::from_millis(100)); - continue; - }; - break account; - }; - - // Fill up the mempool with contract calls - let contract_tx = make_contract_call( - &sender_sk, - account.nonce, - tx_fee, - naka_conf.burnchain.chain_id, - &deployer_addr, - "burn-view-contract", - "f", - &[], - ); - submit_tx(&http_origin, &contract_tx); - accounts_before.push(account); - } - // make tenure but don't wait for a stacks block next_block_and_commits_only( btc_regtest_controller, @@ -10334,6 +10317,37 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); + let mut accounts_before = vec![]; + let mut sent_txids = vec![]; + + // fill mempool with transactions that depend on the burn view + for sender_sk in account_keys.iter() { + let sender_addr = tests::to_addr(&sender_sk); + let account = loop { + let Ok(account) = get_account_result(&http_origin, &sender_addr) else { + debug!("follower_bootup: Failed to load miner account"); + thread::sleep(Duration::from_millis(100)); + continue; + }; + break account; + }; + + // Fill up the mempool with contract calls + let contract_tx = make_contract_call( + &sender_sk, + account.nonce, + tx_fee, + naka_conf.burnchain.chain_id, + &deployer_addr, + "burn-view-contract", + "f", + &[], + ); + let txid = submit_tx(&http_origin, &contract_tx); + sent_txids.push(txid); + accounts_before.push(account); + } + // unstall miner and relayer nakamoto_test_skip_commit_op.set(false); TEST_MINE_STALL.lock().unwrap().replace(false); @@ -10348,15 +10362,6 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); - // start up the next tenure - next_block_and_commits_only( - btc_regtest_controller, - 60, - &coord_channel, - &commits_submitted, - ) - .unwrap(); - // wait for all of the aforementioned transactions to get mined wait_for(120, || { // fill mempool with transactions that depend on the burn view @@ -10379,6 +10384,15 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); + // start up the next tenure + next_block_and_commits_only( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + // see if we can boot a follower off of this node now let mut follower_conf = naka_conf.clone(); follower_conf.node.miner = false; From 9b53d70cdd4c03acfd117f77afd2e9960184b6f3 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 6 Dec 2024 16:48:12 -0500 Subject: [PATCH 08/35] chore: more checks on burn view changes --- .../stacks-node/src/nakamoto_node/miner.rs | 56 +++++++++++-------- .../stacks-node/src/nakamoto_node/relayer.rs | 21 ++++++- .../src/nakamoto_node/sign_coordinator.rs | 2 +- 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 63df64dee4..7f7d59bb13 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -935,6 +935,7 @@ impl BlockMinerThread { match ParentStacksBlockInfo::lookup( chain_state, burn_db, + &self.reason, &self.burn_block, miner_address, &self.parent_tenure_id, @@ -1229,17 +1230,11 @@ impl BlockMinerThread { }) } - /// Check to see if the given burn view is at or ahead of the stacks blockchain's burn view. - /// If so, then return Ok(()) - /// If not, then return Err(NakamotoNodeError::BurnchainTipChanged) - pub fn check_burn_view_changed( + /// Get the ongoing burn view in the chain state + pub fn get_ongoing_tenure_id( sortdb: &SortitionDB, chain_state: &mut StacksChainState, - burn_view: &BlockSnapshot, - ) -> Result<(), NakamotoNodeError> { - // if the local burn view has advanced, then this miner thread is defunct. Someone else - // extended their tenure in a sortition at or after our burn view, and the node accepted - // it, so we should stop. + ) -> Result { let cur_stacks_tip_header = NakamotoChainState::get_canonical_block_header(chain_state.db(), sortdb)? .ok_or_else(|| NakamotoNodeError::UnexpectedChainState)?; @@ -1259,7 +1254,21 @@ impl BlockMinerThread { block_id: cur_stacks_tip_id, } }; + Ok(ongoing_tenure_id) + } + /// Check to see if the given burn view is at or ahead of the stacks blockchain's burn view. + /// If so, then return Ok(()) + /// If not, then return Err(NakamotoNodeError::BurnchainTipChanged) + pub fn check_burn_view_changed( + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + burn_view: &BlockSnapshot, + ) -> Result<(), NakamotoNodeError> { + // if the local burn view has advanced, then this miner thread is defunct. Someone else + // extended their tenure in a sortition at or after our burn view, and the node accepted + // it, so we should stop. + let ongoing_tenure_id = Self::get_ongoing_tenure_id(sortdb, chain_state)?; if ongoing_tenure_id.burn_view_consensus_hash != burn_view.consensus_hash { let ongoing_tenure_sortition = SortitionDB::get_block_snapshot_consensus( sortdb.conn(), @@ -1328,6 +1337,7 @@ impl ParentStacksBlockInfo { pub fn lookup( chain_state: &mut StacksChainState, burn_db: &mut SortitionDB, + reason: &MinerReason, check_burn_block: &BlockSnapshot, miner_address: StacksAddress, parent_tenure_id: &StacksBlockId, @@ -1341,19 +1351,21 @@ impl ParentStacksBlockInfo { .expect("Failed to look up block's parent snapshot") .expect("Failed to look up block's parent snapshot"); - // don't mine off of an old burnchain block - let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) - .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - - if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { - info!( - "New canonical burn chain tip detected. Will not try to mine."; - "new_consensus_hash" => %burn_chain_tip.consensus_hash, - "old_consensus_hash" => %check_burn_block.consensus_hash, - "new_burn_height" => burn_chain_tip.block_height, - "old_burn_height" => check_burn_block.block_height - ); - return Err(NakamotoNodeError::BurnchainTipChanged); + if *reason != MinerReason::EmptyTenure { + // don't mine off of an old burnchain block + let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) + .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); + + if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { + info!( + "New canonical burn chain tip detected. Will not try to mine."; + "new_consensus_hash" => %burn_chain_tip.consensus_hash, + "old_consensus_hash" => %check_burn_block.consensus_hash, + "new_burn_height" => burn_chain_tip.block_height, + "old_burn_height" => check_burn_block.block_height + ); + return Err(NakamotoNodeError::BurnchainTipChanged); + } } let Ok(Some(parent_tenure_header)) = diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 912855a0f2..14746f24f0 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -239,6 +239,8 @@ pub struct RelayerThread { /// Information about the last-sent block commit, and the relayer's view of the chain at the /// time it was sent. last_committed: Option, + /// Last burnchain view for which we considered starting a miner + last_burn_view: Option, /// Timeout for waiting for the first block in a tenure before submitting a block commit new_tenure_timeout: Option, } @@ -298,6 +300,7 @@ impl RelayerThread { is_miner, next_initiative: Instant::now() + Duration::from_millis(next_initiative_delay), last_committed: None, + last_burn_view: None, new_tenure_timeout: None, } } @@ -904,7 +907,7 @@ impl RelayerThread { new_miner_handle.thread().id() ); self.miner_thread.replace(new_miner_handle); - + self.last_burn_view.replace(burn_view); Ok(()) } @@ -1286,6 +1289,8 @@ impl RelayerThread { /// * If this isn't a miner, then it's always nothing. /// * Otherwise, if we haven't done so already, go register a VRF public key /// * If the stacks chain tip or burnchain tip has changed, then issue a block-commit + /// * If the last burn view we started a miner for is not the canonical burn view, then + /// try and start a new tenure (or continue an existing one). fn initiative(&mut self) -> Option { if !self.is_miner { return None; @@ -1329,6 +1334,20 @@ impl RelayerThread { }; let stacks_tip = StacksBlockId::new(&stacks_tip_ch, &stacks_tip_bh); + // see if we have to try and continue a tenure + if let Ok(ongoing_tenure_id) = + BlockMinerThread::get_ongoing_tenure_id(&self.sortdb, &mut self.chainstate).map_err( + |e| { + error!("Failed to get ongoing tenure ID: {:?}", &e); + e + }, + ) + { + if ongoing_tenure_id.burn_view_consensus_hash != sort_tip.consensus_hash { + todo!(); + } + } + // check stacks and sortition tips to see if any chainstate change has happened. // did our view of the sortition history change? // if so, then let's try and confirm the highest tenure so far. diff --git a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs index fecb4b8955..b4311a53d9 100644 --- a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs @@ -284,7 +284,7 @@ impl SignCoordinator { return true; } - if !needs_initial_block { + if needs_initial_block { // must get that first initial block in, assuming the burn view is still valid. return false; } From 4c9155b4aae59e93781f1f2aef5724c325cb5c61 Mon Sep 17 00:00:00 2001 From: Jacinta Ferrant Date: Tue, 10 Dec 2024 16:50:33 -0500 Subject: [PATCH 09/35] Cargo fmt Signed-off-by: Jacinta Ferrant --- testnet/stacks-node/src/nakamoto_node/miner.rs | 5 ++++- .../stacks-node/src/nakamoto_node/signer_coordinator.rs | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index f14e51c8d7..7cf55ed438 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -538,7 +538,10 @@ impl BlockMinerThread { let wait_start = Instant::now(); while wait_start.elapsed() < self.config.miner.wait_on_interim_blocks { thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); - if self.check_burn_tip_changed(&sort_db, &mut chain_state).is_err() { + if self + .check_burn_tip_changed(&sort_db, &mut chain_state) + .is_err() + { return Err(NakamotoNodeError::BurnchainTipChanged); } } diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 3736af1d85..06a5318516 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -313,7 +313,12 @@ impl SignerCoordinator { return Ok(stored_block.header.signer_signature); } - if Self::check_burn_tip_changed(sortdb, chain_state, burn_tip, self.needs_initial_block) { + if Self::check_burn_tip_changed( + sortdb, + chain_state, + burn_tip, + self.needs_initial_block, + ) { debug!("SignCoordinator: Exiting due to new burnchain tip"); return Err(NakamotoNodeError::BurnchainTipChanged); } From eb6262809a7019e8afe58308ef47bd88e6572413 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:45:17 -0500 Subject: [PATCH 10/35] chore: record last sortition --- stacks-signer/src/chainstate.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stacks-signer/src/chainstate.rs b/stacks-signer/src/chainstate.rs index f2f042dffb..fca9be3827 100644 --- a/stacks-signer/src/chainstate.rs +++ b/stacks-signer/src/chainstate.rs @@ -322,7 +322,7 @@ impl SortitionsView { return Ok(false); } } - ProposedBy::LastSortition(_last_sortition) => { + ProposedBy::LastSortition(last_sortition) => { // should only consider blocks from the last sortition if the new sortition was invalidated // before we signed their first block. if self.cur_sortition.miner_status @@ -333,6 +333,7 @@ impl SortitionsView { "proposed_block_consensus_hash" => %block.header.consensus_hash, "proposed_block_signer_sighash" => %block.header.signer_signature_hash(), "current_sortition_miner_status" => ?self.cur_sortition.miner_status, + "last_sortition" => %last_sortition.consensus_hash ); return Ok(false); } From 93cf523b9edff3c80a31cb58321f439cd51d58bd Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:45:36 -0500 Subject: [PATCH 11/35] chore: remove EmptyTenure miner reason, since it shouldn't ever be used --- .../stacks-node/src/nakamoto_node/miner.rs | 58 ++++++------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index f14e51c8d7..3f383ac95b 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -111,11 +111,6 @@ pub enum MinerReason { /// sortition. burn_view_consensus_hash: ConsensusHash, }, - /// The miner thread was spawned to initialize a prior empty tenure. - /// It may be the case that the tenure to be initialized is no longer the canonical burnchain - /// tip, so if this is the miner reason, the miner thread will not exit on its own unless it - /// first mines a `BlockFound` tenure change. - EmptyTenure, } impl std::fmt::Display for MinerReason { @@ -128,7 +123,6 @@ impl std::fmt::Display for MinerReason { f, "Extended: burn_view_consensus_hash = {burn_view_consensus_hash:?}", ), - MinerReason::EmptyTenure => write!(f, "EmptyTenure"), } } } @@ -162,9 +156,6 @@ pub struct BlockMinerThread { event_dispatcher: EventDispatcher, /// The reason the miner thread was spawned reason: MinerReason, - /// Whether or not we sent our initial block with a tenure-change - /// (only applies if self.reason is MinerReason::EmptyTenure) - sent_initial_block: bool, /// Handle to the p2p thread for block broadcast p2p_handle: NetworkHandle, signer_set_cache: Option, @@ -193,7 +184,6 @@ impl BlockMinerThread { event_dispatcher: rt.event_dispatcher.clone(), parent_tenure_id, reason, - sent_initial_block: false, p2p_handle: rt.get_p2p_handle(), signer_set_cache: None, } @@ -260,11 +250,6 @@ impl BlockMinerThread { false } - /// Does this miner need to send its tenure's initial block still? - fn needs_initial_block(&self) -> bool { - !self.sent_initial_block && self.reason == MinerReason::EmptyTenure - } - /// Stop a miner tenure by blocking the miner and then joining the tenure thread pub fn stop_miner( globals: &Globals, @@ -346,7 +331,6 @@ impl BlockMinerThread { self.globals.should_keep_running.clone(), &reward_set, &burn_tip, - self.needs_initial_block(), &self.burnchain, miner_privkey, &self.config, @@ -450,6 +434,7 @@ impl BlockMinerThread { if let Some(mut new_block) = new_block { Self::fault_injection_block_broadcast_stall(&new_block); + let signer_signature = match self.propose_block( coordinator, &mut new_block, @@ -521,7 +506,6 @@ impl BlockMinerThread { Self::fault_injection_block_announce_stall(&new_block); self.globals.coord().announce_new_stacks_block(); - self.sent_initial_block = true; self.last_block_mined = Some(new_block); self.mined_blocks += 1; } @@ -538,7 +522,10 @@ impl BlockMinerThread { let wait_start = Instant::now(); while wait_start.elapsed() < self.config.miner.wait_on_interim_blocks { thread::sleep(Duration::from_millis(ABORT_TRY_AGAIN_MS)); - if self.check_burn_tip_changed(&sort_db, &mut chain_state).is_err() { + if self + .check_burn_tip_changed(&sort_db, &mut chain_state) + .is_err() + { return Err(NakamotoNodeError::BurnchainTipChanged); } } @@ -967,7 +954,6 @@ impl BlockMinerThread { match ParentStacksBlockInfo::lookup( chain_state, burn_db, - &self.reason, &self.burn_block, miner_address, &self.parent_tenure_id, @@ -994,6 +980,7 @@ impl BlockMinerThread { self.burn_election_block.sortition_hash.as_bytes(), ) } else { + // TODO: shouldn't this be self.burn_block.sortition_hash? self.keychain.generate_proof( self.registered_key.target_block_height, self.burn_election_block.sortition_hash.as_bytes(), @@ -1246,7 +1233,7 @@ impl BlockMinerThread { }; let (tenure_change_tx, coinbase_tx) = match &self.reason { - MinerReason::BlockFound | MinerReason::EmptyTenure => { + MinerReason::BlockFound => { let tenure_change_tx = self.generate_tenure_change_tx(current_miner_nonce, payload)?; let coinbase_tx = @@ -1366,10 +1353,6 @@ impl BlockMinerThread { chain_state: &mut StacksChainState, ) -> Result<(), NakamotoNodeError> { Self::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; - if self.needs_initial_block() { - // don't abandon this tenure until our tenure-change has been mined! - return Ok(()); - } let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); @@ -1402,7 +1385,6 @@ impl ParentStacksBlockInfo { pub fn lookup( chain_state: &mut StacksChainState, burn_db: &mut SortitionDB, - reason: &MinerReason, check_burn_block: &BlockSnapshot, miner_address: StacksAddress, parent_tenure_id: &StacksBlockId, @@ -1416,21 +1398,19 @@ impl ParentStacksBlockInfo { .expect("Failed to look up block's parent snapshot") .expect("Failed to look up block's parent snapshot"); - if *reason != MinerReason::EmptyTenure { - // don't mine off of an old burnchain block - let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) - .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); + // don't mine off of an old burnchain block + let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) + .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { - info!( - "New canonical burn chain tip detected. Will not try to mine."; - "new_consensus_hash" => %burn_chain_tip.consensus_hash, - "old_consensus_hash" => %check_burn_block.consensus_hash, - "new_burn_height" => burn_chain_tip.block_height, - "old_burn_height" => check_burn_block.block_height - ); - return Err(NakamotoNodeError::BurnchainTipChanged); - } + if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { + info!( + "New canonical burn chain tip detected. Will not try to mine."; + "new_consensus_hash" => %burn_chain_tip.consensus_hash, + "old_consensus_hash" => %check_burn_block.consensus_hash, + "new_burn_height" => burn_chain_tip.block_height, + "old_burn_height" => check_burn_block.block_height + ); + return Err(NakamotoNodeError::BurnchainTipChanged); } let Ok(Some(parent_tenure_header)) = From a7a0b19a650b839e6417dbcf773a75514c580361 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:46:01 -0500 Subject: [PATCH 12/35] chore: factor logic for checking for a tenure-extend into a single function, and drop unused code --- .../stacks-node/src/nakamoto_node/relayer.rs | 352 +++++++----------- 1 file changed, 141 insertions(+), 211 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 644d178d2d..86aed60325 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -66,9 +66,15 @@ use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; use crate::BitcoinRegtestController; +/// Mutex to stall the relayer thread right before it creates a miner thread. #[cfg(test)] pub static TEST_MINER_THREAD_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +/// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) +#[cfg(test)] +pub static TEST_MINER_THREAD_START_STALL: std::sync::Mutex> = + std::sync::Mutex::new(None); + /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] pub enum RelayerDirective { @@ -239,8 +245,6 @@ pub struct RelayerThread { /// Information about the last-sent block commit, and the relayer's view of the chain at the /// time it was sent. last_committed: Option, - /// Last burnchain view for which we considered starting a miner - last_burn_view: Option, /// Timeout for waiting for the first block in a tenure before submitting a block commit new_tenure_timeout: Option, } @@ -300,7 +304,6 @@ impl RelayerThread { is_miner, next_initiative: Instant::now() + Duration::from_millis(next_initiative_delay), last_committed: None, - last_burn_view: None, new_tenure_timeout: None, } } @@ -388,7 +391,7 @@ impl RelayerThread { /// parent block could be an epoch 2 block. In this case, the right thing to do is to wait for /// the next block-commit. pub(crate) fn choose_miner_directive( - &self, + &mut self, sn: BlockSnapshot, won_sortition: bool, committed_index_hash: StacksBlockId, @@ -398,51 +401,42 @@ impl RelayerThread { .expect("FATAL: failed to query sortition DB for stacks tip"); let stacks_tip = StacksBlockId::new(&cur_stacks_tip_ch, &cur_stacks_tip_bh); - let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( - &mut self.chainstate.index_conn(), - &stacks_tip, - &cur_stacks_tip_ch, - ) - .expect( - "Relayer: Failed to get tenure-start block header for stacks tip {stacks_tip}: {e:?}", - ) - .expect("Relayer: Failed to find tenure-start block header for stacks tip {stacks_tip}"); + + let stacks_tip_sortition = + SortitionDB::get_block_snapshot_consensus(&self.sortdb.conn(), &cur_stacks_tip_ch) + .expect("Relayer: Failed to load canonical Stacks tip's tenure snapshot") + .expect("Relayer: Canonical Stacks tip has no tenure snapshot"); let directive = if sn.sortition { - Some( - if won_sortition || self.config.get_node_config(false).mock_mining { - info!("Relayer: Won sortition; begin tenure."); - MinerDirective::BeginTenure { - parent_tenure_start: committed_index_hash, - burnchain_tip: sn, - } - } else if committed_index_hash - != highest_tenure_start_block_header.index_block_hash() - { - info!( - "Relayer: Winner of sortition {} did not commit to the correct parent tenure. Attempt to continue tenure.", - &sn.consensus_hash - ); - // We didn't win the sortition, but the miner that did win - // did not commit to the correct parent tenure. This means - // it will be unable to produce a valid block, so we should - // continue our tenure. - MinerDirective::ContinueTenure { + if won_sortition || self.config.get_node_config(false).mock_mining { + info!("Relayer: Won sortition; begin tenure."); + return Some(MinerDirective::BeginTenure { + parent_tenure_start: committed_index_hash, + burnchain_tip: sn, + }); + } + match Self::can_continue_tenure( + &self.sortdb, + sn.consensus_hash, + self.get_mining_key_pkh(), + ) { + Ok(Some(_)) => { + return Some(MinerDirective::ContinueTenure { new_burn_view: sn.consensus_hash, - } - } else { - info!("Relayer: Stop tenure"); - MinerDirective::StopTenure - }, - ) + }); + } + Ok(None) => { + return Some(MinerDirective::StopTenure); + } + Err(e) => { + warn!("Relayer: failed to check to see if we can continue tenure: {e:?}"); + return Some(MinerDirective::StopTenure); + } + } } else { // find out what epoch the Stacks tip is in. // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so // right now since this sortition has no winner. - let (cur_stacks_tip_ch, _cur_stacks_tip_bh) = - SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) - .expect("FATAL: failed to query sortition DB for stacks tip"); - let stacks_tip_sn = SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) .expect("FATAL: failed to query sortiiton DB for epoch") @@ -487,7 +481,8 @@ impl RelayerThread { .expect("FATAL: unknown consensus hash"); // always clear this even if this isn't the latest sortition - let won_sortition = sn.sortition && self.last_commits.remove(&sn.winning_block_txid); + let cleared = self.last_commits.remove(&sn.winning_block_txid); + let won_sortition = sn.sortition && cleared; if won_sortition { increment_stx_blocks_mined_counter(); } @@ -782,10 +777,27 @@ impl RelayerThread { fn fault_injection_stall_miner_startup() { if *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { // Do an extra check just so we don't log EVERY time. - warn!("Miner thread startup is stalled due to testing directive"); + warn!("Relayer miner thread startup is stalled due to testing directive to stall the miner"); while *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { std::thread::sleep(std::time::Duration::from_millis(10)); } + warn!( + "Relayer miner thread startup is no longer stalled due to testing directive. Continuing..." + ); + } + } + + #[cfg(not(test))] + fn fault_injection_stall_miner_startup() {} + + #[cfg(test)] + fn fault_injection_stall_miner_thread_startup() { + if *TEST_MINER_THREAD_START_STALL.lock().unwrap() == Some(true) { + // Do an extra check just so we don't log EVERY time. + warn!("Miner thread startup is stalled due to testing directive"); + while *TEST_MINER_THREAD_START_STALL.lock().unwrap() == Some(true) { + std::thread::sleep(std::time::Duration::from_millis(10)); + } warn!( "Miner thread startup is no longer stalled due to testing directive. Continuing..." ); @@ -807,7 +819,6 @@ impl RelayerThread { &mut self, registered_key: RegisteredKey, burn_election_block: BlockSnapshot, - burn_view: BlockSnapshot, burn_tip: BlockSnapshot, parent_tenure_id: StacksBlockId, reason: MinerReason, @@ -842,7 +853,6 @@ impl RelayerThread { "parent_tenure_id" => %parent_tenure_id, "reason" => %reason, "burn_election_block.consensus_hash" => %burn_election_block.consensus_hash, - "burn_view.consensus_hash" => %burn_view.consensus_hash, "burn_tip.consensus_hash" => %burn_tip.consensus_hash, ); @@ -850,7 +860,7 @@ impl RelayerThread { self, registered_key, burn_election_block, - burn_view, + burn_tip, parent_tenure_id, reason, ); @@ -861,7 +871,6 @@ impl RelayerThread { &mut self, parent_tenure_start: StacksBlockId, block_election_snapshot: BlockSnapshot, - burn_view: BlockSnapshot, burn_tip: BlockSnapshot, reason: MinerReason, ) -> Result<(), NakamotoNodeError> { @@ -879,7 +888,6 @@ impl RelayerThread { let new_miner_state = self.create_block_miner( vrf_key, block_election_snapshot, - burn_view.clone(), burn_tip, parent_tenure_start, reason, @@ -891,6 +899,7 @@ impl RelayerThread { .name(format!("miner.{parent_tenure_start}",)) .stack_size(BLOCK_PROCESSOR_STACK_SIZE) .spawn(move || { + Self::fault_injection_stall_miner_thread_startup(); if let Err(e) = new_miner_state.run_miner(prior_tenure_thread) { info!("Miner thread failed: {e:?}"); Err(e) @@ -907,7 +916,6 @@ impl RelayerThread { new_miner_handle.thread().id() ); self.miner_thread.replace(new_miner_handle); - self.last_burn_view.replace(burn_view); Ok(()) } @@ -944,113 +952,74 @@ impl RelayerThread { )) } - /// Get the tenure-start block header hash of a given consensus hash. - /// For Nakamoto blocks, this is the first block in the tenure identified by the consensus - /// hash. - /// For epoch2 blocks, this is simply the block whose winning sortition happened in the - /// sortition identified by the consensus hash. + /// Determine if the miner can contine an existing tenure with the new sortition (identified + /// by `new_burn_view`) /// - /// `tip_block_id` is the chain tip from which to perform the query. - fn get_tenure_bhh( - &self, - tip_block_id: &StacksBlockId, - ch: &ConsensusHash, - ) -> Result { - let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( - &mut self.chainstate.index_conn(), - tip_block_id, - &ch, - )? - .ok_or_else(|| { - error!( - "Relayer: Failed to find tenure-start block header for stacks tip {tip_block_id}" - ); - NakamotoNodeError::ParentNotFound - })?; - Ok(BlockHeaderHash( - highest_tenure_start_block_header.index_block_hash().0, - )) - } - - /// Determine the type of tenure change to issue based on whether this - /// miner was the last successful miner (miner of the canonical Stacks tip). - fn determine_tenure_type( - &self, - canonical_stacks_snapshot: BlockSnapshot, - last_good_block_election_snapshot: BlockSnapshot, - burn_view_snapshot: BlockSnapshot, - mining_pkh: Hash160, - ) -> (StacksBlockId, BlockSnapshot, BlockSnapshot, MinerReason) { - let mining_pkh_opt = Some(mining_pkh); - if canonical_stacks_snapshot.miner_pk_hash != mining_pkh_opt { - debug!("Relayer(determine_tenure_type): Miner was not the last successful Stacks miner, but it won the last sortition. Issue a new tenure change payload."); - ( - StacksBlockId( - last_good_block_election_snapshot - .winning_stacks_block_hash - .0, - ), - last_good_block_election_snapshot.clone(), - last_good_block_election_snapshot, - MinerReason::EmptyTenure, - ) - } else { - debug!("Relayer(determine_tenure_type): Miner was the last successful miner. Issue a tenure extend from the chain tip."); - ( - self.sortdb.get_canonical_stacks_tip_block_id(), - canonical_stacks_snapshot, - burn_view_snapshot.clone(), - MinerReason::Extended { - burn_view_consensus_hash: burn_view_snapshot.consensus_hash, - }, - ) - } - } + /// Assumes that the caller has already checked that the given miner has _not_ won the new + /// sortition. + /// + /// Will return Ok(Some(..)) even if `new_burn_view`'s sortition had a winner that was not this + /// miner. It's on signers to either accept the resulting tenure-extend from this miner, or a + /// block-found from the other winning miner. + /// + /// Returns Ok(Some(stacks-tip-election-snapshot)) if so + /// Returns OK(None) if not. + /// Returns Err(..) on DB error + pub(crate) fn can_continue_tenure( + sortdb: &SortitionDB, + new_burn_view: ConsensusHash, + mining_key_opt: Option, + ) -> Result, NakamotoNodeError> { + let Some(mining_pkh) = mining_key_opt else { + return Ok(None); + }; - /// Get the block snapshot of the most recent sortition that committed to - /// the canonical tip. If the latest sortition did not commit to the - /// canonical tip, then the tip's tenure is the last good sortition. - fn get_last_good_block_snapshot( - &self, - burn_tip: &BlockSnapshot, - highest_tenure_bhh: &BlockHeaderHash, - canonical_stacks_tip_ch: &ConsensusHash, - ) -> Result { - let ih = self.sortdb.index_handle(&burn_tip.sortition_id); - let sn = ih - .get_last_snapshot_with_sortition(burn_tip.block_height) - .map_err(|e| { - error!("Relayer: failed to get last snapshot with sortition: {e:?}"); + // Get the necessary snapshots and state + let burn_tip = SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &new_burn_view)? + .ok_or_else(|| { + error!("Relayer: failed to get block snapshot for new burn view"); NakamotoNodeError::SnapshotNotFoundForChainTip })?; - if &sn.winning_stacks_block_hash != highest_tenure_bhh { - info!( - "Relayer: Sortition winner is not committed to the canonical tip; allowing last miner to extend"; - "burn_block_height" => burn_tip.block_height, - "consensus_hash" => %burn_tip.consensus_hash, - ); - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), canonical_stacks_tip_ch) - .map_err(|e| { - error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })? + let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = + SortitionDB::get_canonical_stacks_chain_tip_hash(sortdb.conn()).unwrap(); + + let canonical_stacks_tip = + StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); + + let canonical_stacks_snapshot = + SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &canonical_stacks_tip_ch)? .ok_or_else(|| { error!("Relayer: failed to get block snapshot for canonical tip"); NakamotoNodeError::SnapshotNotFoundForChainTip - }) - } else { - Ok(sn) + })?; + + let won_last_good_sortition = canonical_stacks_snapshot.miner_pk_hash == Some(mining_pkh); + + info!( + "Relayer: Checking for tenure continuation."; + "won_last_good_sortition" => won_last_good_sortition, + "current_mining_pkh" => %mining_pkh, + "canonical_stacks_tip_id" => %canonical_stacks_tip, + "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, + "canonical_stacks_tip_miner" => ?canonical_stacks_snapshot.miner_pk_hash, + "burn_view_ch" => %new_burn_view, + ); + + if !won_last_good_sortition { + info!("Relayer: Did not win the last sortition that commits to our Stacks fork. Cannot continue tenure."); + return Ok(None); } + + Ok(Some(canonical_stacks_snapshot)) } /// Attempt to continue a miner's tenure into the next burn block. - /// This is allowed if the miner won the last good sortition and one of the - /// following conditions is met: - /// - There was no sortition in the latest burn block - /// - The winner of the latest sortition did not commit to the canonical tip - /// - The winner of the latest sortition did not mine any blocks within the - /// timeout period (not yet implemented) + /// This is allowed if the miner won the last good sortition -- that is, the sortition which + /// elected the local view of the canonical Stacks fork's ongoing tenure. + /// + /// This function assumes that the caller has checked that the sortition referred to by + /// `new_burn_view` does not have a sortition winner. fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> { if let Err(e) = self.stop_tenure() { error!("Relayer: Failed to stop tenure: {e:?}"); @@ -1058,6 +1027,19 @@ impl RelayerThread { } debug!("Relayer: successfully stopped tenure; will try to continue."); + let Some(mining_pkh) = self.get_mining_key_pkh() else { + return Ok(()); + }; + + let Some(canonical_stacks_tip_election_snapshot) = Self::can_continue_tenure( + &self.sortdb, + new_burn_view.clone(), + self.get_mining_key_pkh(), + )? + else { + return Ok(()); + }; + // Get the necessary snapshots and state let burn_tip = SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &new_burn_view)? @@ -1069,66 +1051,24 @@ impl RelayerThread { SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); let canonical_stacks_tip = StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); - let Some(mining_pkh) = self.get_mining_key_pkh() else { - return Ok(()); - }; - let highest_tenure_bhh = - self.get_tenure_bhh(&canonical_stacks_tip, &canonical_stacks_tip_ch)?; - let last_good_block_election_snapshot = self.get_last_good_block_snapshot( - &burn_tip, - &highest_tenure_bhh, - &canonical_stacks_tip_ch, - )?; - - let won_last_sortition = - last_good_block_election_snapshot.miner_pk_hash == Some(mining_pkh); - info!( - "Relayer: Current burn block had no sortition or a bad sortition. Checking for tenure continuation."; - "won_last_sortition" => won_last_sortition, - "current_mining_pkh" => %mining_pkh, - "last_good_block_election_snapshot.consensus_hash" => %last_good_block_election_snapshot.consensus_hash, - "last_good_block_election_snapshot.miner_pk_hash" => ?last_good_block_election_snapshot.miner_pk_hash, - "canonical_stacks_tip_id" => %canonical_stacks_tip, - "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, - "burn_view_ch" => %new_burn_view, - ); - - if !won_last_sortition { - info!("Relayer: Did not win the last sortition. Cannot continue tenure."); - return Ok(()); - } - let canonical_stacks_snapshot = SortitionDB::get_block_snapshot_consensus( - self.sortdb.conn(), - &canonical_stacks_tip_ch, - )? - .ok_or_else(|| { - error!("Relayer: failed to get block snapshot for canonical tip"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; - - let (parent_tenure_start, block_election_snapshot, burn_view_snapshot, reason) = self - .determine_tenure_type( - canonical_stacks_snapshot, - last_good_block_election_snapshot, - burn_tip.clone(), - mining_pkh, - ); + let reason = MinerReason::Extended { + burn_view_consensus_hash: new_burn_view, + }; if let Err(e) = self.start_new_tenure( - parent_tenure_start.clone(), - block_election_snapshot.clone(), - burn_view_snapshot.clone(), + canonical_stacks_tip.clone(), + canonical_stacks_tip_election_snapshot.clone(), burn_tip.clone(), reason.clone(), ) { error!("Relayer: Failed to start new tenure: {e:?}"); } else { debug!("Relayer: successfully started new tenure."; - "parent_tenure_start" => %parent_tenure_start, + "parent_tenure_start" => %canonical_stacks_tip, "burn_tip" => %burn_tip.consensus_hash, - "burn_view_snapshot" => %burn_view_snapshot.consensus_hash, - "block_election_snapshot" => %block_election_snapshot.consensus_hash, + "burn_view_snapshot" => %burn_tip.consensus_hash, + "block_election_snapshot" => %canonical_stacks_tip_election_snapshot.consensus_hash, "reason" => %reason); } Ok(()) @@ -1160,11 +1100,15 @@ impl RelayerThread { parent_tenure_start, burnchain_tip.clone(), burnchain_tip.clone(), - burnchain_tip, MinerReason::BlockFound, ) { Ok(()) => { - debug!("Relayer: successfully started new tenure."); + debug!("Relayer: successfully started new tenure."; + "parent_tenure_start" => %parent_tenure_start, + "burn_tip" => %burnchain_tip.consensus_hash, + "burn_view_snapshot" => %burnchain_tip.consensus_hash, + "block_election_snapshot" => %burnchain_tip.consensus_hash, + "reason" => %MinerReason::BlockFound); } Err(e) => { error!("Relayer: Failed to start new tenure: {e:?}"); @@ -1334,20 +1278,6 @@ impl RelayerThread { }; let stacks_tip = StacksBlockId::new(&stacks_tip_ch, &stacks_tip_bh); - // see if we have to try and continue a tenure - if let Ok(ongoing_tenure_id) = - BlockMinerThread::get_ongoing_tenure_id(&self.sortdb, &mut self.chainstate).map_err( - |e| { - error!("Failed to get ongoing tenure ID: {:?}", &e); - e - }, - ) - { - if ongoing_tenure_id.burn_view_consensus_hash != sort_tip.consensus_hash { - todo!(); - } - } - // check stacks and sortition tips to see if any chainstate change has happened. // did our view of the sortition history change? // if so, then let's try and confirm the highest tenure so far. From a2f010e3d66c6536b00a0e436a6e1982a1a52972 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:46:28 -0500 Subject: [PATCH 13/35] chore; drop needs_initial_block --- .../src/nakamoto_node/signer_coordinator.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 3736af1d85..8927df484a 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -61,8 +61,6 @@ pub struct SignerCoordinator { keep_running: Arc, /// Handle for the signer DB listener thread listener_thread: Option>, - /// whether or not we need to wait for the signer to receive the initial block from this tenure - needs_initial_block: bool, } impl SignerCoordinator { @@ -73,7 +71,6 @@ impl SignerCoordinator { node_keep_running: Arc, reward_set: &RewardSet, burn_tip: &BlockSnapshot, - needs_initial_block: bool, burnchain: &Burnchain, message_key: StacksPrivateKey, config: &Config, @@ -105,7 +102,6 @@ impl SignerCoordinator { total_weight: listener.total_weight, weight_threshold: listener.weight_threshold, stackerdb_comms: listener.get_comms(), - needs_initial_block, keep_running, listener_thread: None, }; @@ -313,7 +309,7 @@ impl SignerCoordinator { return Ok(stored_block.header.signer_signature); } - if Self::check_burn_tip_changed(sortdb, chain_state, burn_tip, self.needs_initial_block) { + if Self::check_burn_tip_changed(sortdb, chain_state, burn_tip) { debug!("SignCoordinator: Exiting due to new burnchain tip"); return Err(NakamotoNodeError::BurnchainTipChanged); } @@ -359,18 +355,12 @@ impl SignerCoordinator { sortdb: &SortitionDB, chain_state: &mut StacksChainState, burn_block: &BlockSnapshot, - needs_initial_block: bool, ) -> bool { if BlockMinerThread::check_burn_view_changed(sortdb, chain_state, burn_block).is_err() { // can't continue mining -- burn view changed, or a DB error occurred return true; } - if needs_initial_block { - // must get that first initial block in, assuming the burn view is still valid. - return false; - } - let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); From 48e74681cc5948a308617fab445609e9e54a2659 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:46:46 -0500 Subject: [PATCH 14/35] test: finish check that the hotfix ensures that the correct burn view will be used --- .../src/tests/nakamoto_integrations.rs | 135 +++++++++++++----- 1 file changed, 97 insertions(+), 38 deletions(-) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 84192ecfa4..c1d0c41eff 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -97,9 +97,12 @@ use stacks_signer::v0::SpawnedSigner; use super::bitcoin_regtest::BitcoinCoreController; use crate::config::{EventKeyType, InitialBalance}; use crate::nakamoto_node::miner::{ - TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST, + MinerReason, TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, + TEST_SKIP_P2P_BROADCAST, +}; +use crate::nakamoto_node::relayer::{ + RelayerThread, TEST_MINER_THREAD_STALL, TEST_MINER_THREAD_START_STALL, }; -use crate::nakamoto_node::relayer::TEST_MINER_THREAD_STALL; use crate::neon::{Counters, RunLoopCounter}; use crate::operations::BurnchainOpSigner; use crate::run_loop::boot_nakamoto; @@ -10351,11 +10354,14 @@ fn clarity_cost_spend_down() { run_loop_thread.join().unwrap(); } -/// If we get a flash block -- a sortition in which we win, immediately followed by a different -/// sortition, make sure we first mine a tenure-change block and then a tenure-extend block. +/// Miner wins sortition at Bitcoin height N +/// Relayer processes sortition N +/// Miner wins sortition at Bitcoin height N+1 +/// A flash block at height N+2 happens before the miner can publish its block-found for N+1 +/// Result: the miner issues a tenure-extend from N+1 with burn view for N+2 #[test] #[ignore] -fn test_tenure_change_and_extend_from_flashblocks() { +fn test_tenure_extend_from_flashblocks() { if env::var("BITCOIND_TEST") != Ok("1".into()) { return; } @@ -10385,6 +10391,9 @@ fn test_tenure_change_and_extend_from_flashblocks() { signer_test.boot_to_epoch_3(); let naka_conf = signer_test.running_nodes.conf.clone(); + let mining_key = naka_conf.miner.mining_key.clone().unwrap(); + let mining_key_pkh = Hash160::from_node_public_key(&StacksPublicKey::from_private(&mining_key)); + let http_origin = format!("http://{}", &naka_conf.node.rpc_bind); let btc_regtest_controller = &mut signer_test.running_nodes.btc_regtest_controller; let coord_channel = signer_test.running_nodes.coord_channel.clone(); @@ -10399,7 +10408,7 @@ fn test_tenure_change_and_extend_from_flashblocks() { let tx_fee = 1_000; let burnchain = naka_conf.get_burnchain(); - let mut sortdb = burnchain.open_sortition_db(true).unwrap(); + let sortdb = burnchain.open_sortition_db(true).unwrap(); for _ in 0..3 { next_block_and_mine_commit( btc_regtest_controller, @@ -10461,7 +10470,6 @@ fn test_tenure_change_and_extend_from_flashblocks() { .unwrap(); // stall miner and relayer - TEST_MINE_STALL.lock().unwrap().replace(true); // make tenure but don't wait for a stacks block next_block_and_commits_only( @@ -10472,15 +10480,21 @@ fn test_tenure_change_and_extend_from_flashblocks() { ) .unwrap(); - // prevent the relayer from spawning a new thread just yet - TEST_MINER_THREAD_STALL.lock().unwrap().replace(true); + // prevent the mienr from sending another block-commit nakamoto_test_skip_commit_op.set(true); - // mine another Bitcoin block right away, since it will contain a block-commit - btc_regtest_controller.bootstrap_chain(1); + // make sure we get a block-found tenure change + let blocks_processed_before = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); // make sure the relayer processes both sortitions let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); + + // mine another Bitcoin block right away, since it will contain a block-commit + btc_regtest_controller.bootstrap_chain(1); + wait_for(60, || { sleep_ms(100); let sortitions_cnt = sortitions_processed.load(Ordering::SeqCst); @@ -10488,27 +10502,38 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); - // HACK: simulate the presence of a different miner. - // Make it so that from the perspective of this node's miner, a *different* miner produced the - // canonical Stacks chain tip. This triggers the `None` return value in - // `Relayer::determine_tenure_type`. - { - let tx = sortdb.tx_begin().unwrap(); + wait_for(120, || { + let blocks_processed = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + Ok(blocks_processed > blocks_processed_before) + }) + .expect("Timed out waiting for interim blocks to be mined"); - let (canonical_stacks_tip_ch, _) = - SortitionDB::get_canonical_stacks_chain_tip_hash(&tx).unwrap(); - tx.execute( - "UPDATE snapshots SET miner_pk_hash = ?1 WHERE consensus_hash = ?2", - rusqlite::params![&Hash160([0x11; 20]), &canonical_stacks_tip_ch], - ) - .unwrap(); - tx.commit().unwrap(); - } + let (canonical_stacks_tip_ch, _) = + SortitionDB::get_canonical_stacks_chain_tip_hash(&sortdb.conn()).unwrap(); + let election_tip = + SortitionDB::get_block_snapshot_consensus(&sortdb.conn(), &canonical_stacks_tip_ch) + .unwrap() + .unwrap(); + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + + // Stacks chain tip originates from the tenure started at the burnchain tip + assert!(sort_tip.sortition); + assert_eq!(sort_tip.consensus_hash, election_tip.consensus_hash); + + // stop the relayer thread from starting a miner thread, and stop the miner thread from mining + TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINER_THREAD_STALL.lock().unwrap().replace(true); // mine another Bitcoin block right away, and force it to be a flash block btc_regtest_controller.bootstrap_chain(1); let miner_directives_before = nakamoto_miner_directives.load(Ordering::SeqCst); + + // unblock the relayer so it can process the flash block sortition. + // Given the above, this will be an `Extend` tenure. TEST_MINER_THREAD_STALL.lock().unwrap().replace(false); let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); @@ -10519,6 +10544,41 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); + let (new_canonical_stacks_tip_ch, _) = + SortitionDB::get_canonical_stacks_chain_tip_hash(&sortdb.conn()).unwrap(); + let election_tip = + SortitionDB::get_block_snapshot_consensus(&sortdb.conn(), &new_canonical_stacks_tip_ch) + .unwrap() + .unwrap(); + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + + // this was a flash block -- no sortition + assert!(!sort_tip.sortition); + // canonical stacks tip burn view has not advanced + assert_eq!(new_canonical_stacks_tip_ch, canonical_stacks_tip_ch); + // the sortition that elected the ongoing tenure is not the canonical sortition tip + assert_ne!(sort_tip.consensus_hash, election_tip.consensus_hash); + + // we can, however, continue the tenure + let canonical_stacks_tip = RelayerThread::can_continue_tenure( + &sortdb, + sort_tip.consensus_hash.clone(), + Some(mining_key_pkh.clone()), + ) + .unwrap() + .unwrap(); + assert_eq!(canonical_stacks_tip, election_tip); + + // if we didn't win the last block -- tantamount to the sortition winner miner key being + // different -- then we can't continue the tenure. + assert!(RelayerThread::can_continue_tenure( + &sortdb, + sort_tip.consensus_hash.clone(), + Some(Hash160([0x11; 20])) + ) + .unwrap() + .is_none()); + let mut accounts_before = vec![]; let mut sent_txids = vec![]; @@ -10550,15 +10610,13 @@ fn test_tenure_change_and_extend_from_flashblocks() { accounts_before.push(account); } - // unstall miner and relayer + // unstall miner thread and allow block-commits again nakamoto_test_skip_commit_op.set(false); TEST_MINE_STALL.lock().unwrap().replace(false); - sleep_ms(10_000); - // wait for the miner directive to be processed wait_for(60, || { - sleep_ms(100); + sleep_ms(10_000); let directives_cnt = nakamoto_miner_directives.load(Ordering::SeqCst); Ok(directives_cnt > miner_directives_before) }) @@ -10586,14 +10644,8 @@ fn test_tenure_change_and_extend_from_flashblocks() { }) .unwrap(); - // start up the next tenure - next_block_and_commits_only( - btc_regtest_controller, - 60, - &coord_channel, - &commits_submitted, - ) - .unwrap(); + // boot a follower. it should reach the chain tip + info!("----- BEGIN FOLLOWR BOOTUP ------"); // see if we can boot a follower off of this node now let mut follower_conf = naka_conf.clone(); @@ -10652,6 +10704,13 @@ fn test_tenure_change_and_extend_from_flashblocks() { sleep_ms(1000); return Ok(false); }; + debug!( + "Miner tip is {}/{}; follower tip is {}/{}", + &miner_info.stacks_tip_consensus_hash, + &miner_info.stacks_tip, + &info.stacks_tip_consensus_hash, + &info.stacks_tip + ); Ok(miner_info.stacks_tip == info.stacks_tip && miner_info.stacks_tip_consensus_hash == info.stacks_tip_consensus_hash) }) From f488b35c9e86fd179e9753254275c2b40c1a4101 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 14 Dec 2024 23:47:10 -0500 Subject: [PATCH 15/35] chore: delete old code --- .../src/nakamoto_node/sign_coordinator.rs | 637 ------------------ 1 file changed, 637 deletions(-) delete mode 100644 testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs diff --git a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs deleted file mode 100644 index b4311a53d9..0000000000 --- a/testnet/stacks-node/src/nakamoto_node/sign_coordinator.rs +++ /dev/null @@ -1,637 +0,0 @@ -// Copyright (C) 2024 Stacks Open Internet Foundation -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -use std::collections::BTreeMap; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::mpsc::Receiver; -use std::sync::{Arc, Mutex}; -use std::time::Duration; - -use hashbrown::{HashMap, HashSet}; -use libsigner::v0::messages::{ - BlockAccepted, BlockResponse, MinerSlotID, SignerMessage as SignerMessageV0, -}; -use libsigner::{BlockProposal, SignerEntries, SignerEvent, SignerSession, StackerDBSession}; -use stacks::burnchains::Burnchain; -use stacks::chainstate::burn::db::sortdb::SortitionDB; -use stacks::chainstate::burn::{BlockSnapshot, ConsensusHash}; -use stacks::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader, NakamotoChainState}; -use stacks::chainstate::stacks::boot::{NakamotoSignerEntry, RewardSet, MINERS_NAME, SIGNERS_NAME}; -use stacks::chainstate::stacks::db::StacksChainState; -use stacks::chainstate::stacks::events::StackerDBChunksEvent; -use stacks::chainstate::stacks::Error as ChainstateError; -use stacks::libstackerdb::StackerDBChunkData; -use stacks::net::stackerdb::StackerDBs; -use stacks::types::PublicKey; -use stacks::util::hash::MerkleHashFunc; -use stacks::util::secp256k1::MessageSignature; -use stacks::util_lib::boot::boot_code_id; -use stacks_common::bitvec::BitVec; -use stacks_common::codec::StacksMessageCodec; -use stacks_common::types::chainstate::{StacksPrivateKey, StacksPublicKey}; - -use super::Error as NakamotoNodeError; -use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::miner::BlockMinerThread; -use crate::neon::Counters; -use crate::Config; - -/// Fault injection flag to prevent the miner from seeing enough signer signatures. -/// Used to test that the signers will broadcast a block if it gets enough signatures -#[cfg(test)] -pub static TEST_IGNORE_SIGNERS: std::sync::Mutex> = std::sync::Mutex::new(None); - -/// How long should the coordinator poll on the event receiver before -/// waking up to check timeouts? -static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500); - -/// The `SignCoordinator` struct sole function is to serve as the coordinator for Nakamoto block signing. -/// This struct is used by Nakamoto miners to act as the coordinator for the blocks they produce. -pub struct SignCoordinator { - receiver: Option>, - message_key: StacksPrivateKey, - needs_initial_block: bool, - is_mainnet: bool, - miners_session: StackerDBSession, - signer_entries: HashMap, - weight_threshold: u32, - total_weight: u32, - keep_running: Arc, - pub next_signer_bitvec: BitVec<4000>, - stackerdb_channel: Arc>, -} - -impl Drop for SignCoordinator { - fn drop(&mut self) { - let stackerdb_channel = self - .stackerdb_channel - .lock() - .expect("FATAL: failed to lock stackerdb channel"); - stackerdb_channel.replace_receiver(self.receiver.take().expect( - "FATAL: lost possession of the StackerDB channel before dropping SignCoordinator", - )); - } -} - -impl SignCoordinator { - /// * `reward_set` - the active reward set data, used to construct the signer - /// set parameters. - /// * `aggregate_public_key` - the active aggregate key for this cycle - pub fn new( - reward_set: &RewardSet, - message_key: StacksPrivateKey, - needs_initial_block: bool, - config: &Config, - keep_running: Arc, - stackerdb_channel: Arc>, - ) -> Result { - let is_mainnet = config.is_mainnet(); - let Some(ref reward_set_signers) = reward_set.signers else { - error!("Could not initialize signing coordinator for reward set without signer"); - debug!("reward set: {reward_set:?}"); - return Err(ChainstateError::NoRegisteredSigners(0)); - }; - - let signer_entries = SignerEntries::parse(is_mainnet, reward_set_signers).map_err(|e| { - ChainstateError::InvalidStacksBlock(format!( - "Failed to parse NakamotoSignerEntries: {e:?}" - )) - })?; - let rpc_socket = config - .node - .get_rpc_loopback() - .ok_or_else(|| ChainstateError::MinerAborted)?; - let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); - let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); - - let next_signer_bitvec: BitVec<4000> = BitVec::zeros( - reward_set_signers - .clone() - .len() - .try_into() - .expect("FATAL: signer set length greater than u16"), - ) - .expect("FATAL: unable to construct initial bitvec for signer set"); - - debug!( - "Initializing miner/coordinator"; - "num_signers" => signer_entries.signer_pks.len(), - "signer_public_keys" => ?signer_entries.signer_pks, - ); - - let total_weight = reward_set.total_signing_weight().map_err(|e| { - warn!("Failed to calculate total weight for the reward set: {e:?}"); - ChainstateError::NoRegisteredSigners(0) - })?; - - let threshold = NakamotoBlockHeader::compute_voting_weight_threshold(total_weight)?; - - let signer_public_keys = reward_set_signers - .iter() - .cloned() - .enumerate() - .map(|(idx, signer)| { - let Ok(slot_id) = u32::try_from(idx) else { - return Err(ChainstateError::InvalidStacksBlock( - "Signer index exceeds u32".into(), - )); - }; - Ok((slot_id, signer)) - }) - .collect::, ChainstateError>>()?; - #[cfg(test)] - { - // In test mode, short-circuit spinning up the SignCoordinator if the TEST_SIGNING - // channel has been created. This allows integration tests for the stacks-node - // independent of the stacks-signer. - use crate::tests::nakamoto_integrations::TEST_SIGNING; - if TEST_SIGNING.lock().unwrap().is_some() { - debug!("Short-circuiting spinning up coordinator from signer commitments. Using test signers channel."); - let (receiver, replaced_other) = stackerdb_channel - .lock() - .expect("FATAL: failed to lock StackerDB channel") - .register_miner_coordinator(); - if replaced_other { - warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); - } - let sign_coordinator = Self { - receiver: Some(receiver), - message_key, - needs_initial_block, - is_mainnet, - miners_session, - next_signer_bitvec, - signer_entries: signer_public_keys, - weight_threshold: threshold, - total_weight, - keep_running, - stackerdb_channel, - }; - return Ok(sign_coordinator); - } - } - - let (receiver, replaced_other) = stackerdb_channel - .lock() - .expect("FATAL: failed to lock StackerDB channel") - .register_miner_coordinator(); - if replaced_other { - warn!("Replaced the miner/coordinator receiver of a prior thread. Prior thread may have crashed."); - } - - Ok(Self { - receiver: Some(receiver), - message_key, - needs_initial_block, - is_mainnet, - miners_session, - next_signer_bitvec, - signer_entries: signer_public_keys, - weight_threshold: threshold, - total_weight, - keep_running, - stackerdb_channel, - }) - } - - /// Send a message over the miners contract using a `StacksPrivateKey` - #[allow(clippy::too_many_arguments)] - pub fn send_miners_message( - miner_sk: &StacksPrivateKey, - sortdb: &SortitionDB, - tip: &BlockSnapshot, - stackerdbs: &StackerDBs, - message: M, - miner_slot_id: MinerSlotID, - is_mainnet: bool, - miners_session: &mut StackerDBSession, - election_sortition: &ConsensusHash, - ) -> Result<(), String> { - let Some(slot_range) = NakamotoChainState::get_miner_slot(sortdb, tip, election_sortition) - .map_err(|e| format!("Failed to read miner slot information: {e:?}"))? - else { - return Err("No slot for miner".into()); - }; - - let slot_id = slot_range - .start - .saturating_add(miner_slot_id.to_u8().into()); - if !slot_range.contains(&slot_id) { - return Err("Not enough slots for miner messages".into()); - } - // Get the LAST slot version number written to the DB. If not found, use 0. - // Add 1 to get the NEXT version number - // Note: we already check above for the slot's existence - let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); - let slot_version = stackerdbs - .get_slot_version(&miners_contract_id, slot_id) - .map_err(|e| format!("Failed to read slot version: {e:?}"))? - .unwrap_or(0) - .saturating_add(1); - let mut chunk = StackerDBChunkData::new(slot_id, slot_version, message.serialize_to_vec()); - chunk - .sign(miner_sk) - .map_err(|_| "Failed to sign StackerDB chunk")?; - - match miners_session.put_chunk(&chunk) { - Ok(ack) => { - if ack.accepted { - debug!("Wrote message to stackerdb: {ack:?}"); - Ok(()) - } else { - Err(format!("{ack:?}")) - } - } - Err(e) => Err(format!("{e:?}")), - } - } - - /// Do we ignore signer signatures? - #[cfg(test)] - fn fault_injection_ignore_signatures() -> bool { - if *TEST_IGNORE_SIGNERS.lock().unwrap() == Some(true) { - return true; - } - false - } - - #[cfg(not(test))] - fn fault_injection_ignore_signatures() -> bool { - false - } - - /// Check if the tenure needs to change - fn check_burn_tip_changed( - sortdb: &SortitionDB, - chain_state: &mut StacksChainState, - burn_block: &BlockSnapshot, - needs_initial_block: bool, - ) -> bool { - if BlockMinerThread::check_burn_view_changed(sortdb, chain_state, burn_block).is_err() { - // can't continue mining -- burn view changed, or a DB error occurred - return true; - } - - if needs_initial_block { - // must get that first initial block in, assuming the burn view is still valid. - return false; - } - - let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) - .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - - if cur_burn_chain_tip.consensus_hash != burn_block.consensus_hash { - info!("SignCoordinator: Cancel signature aggregation; burnchain tip has changed"); - true - } else { - false - } - } - - /// Start gathering signatures for a Nakamoto block. - /// This function begins by sending a `BlockProposal` message - /// to the signers, and then waits for the signers to respond - /// with their signatures. It does so in two ways, concurrently: - /// * It waits for signer StackerDB messages with signatures. If enough signatures can be - /// found, then the block can be broadcast. - /// * It waits for the chainstate to contain the relayed block. If so, then its signatures are - /// loaded and returned. This can happen if the node receives the block via a signer who - /// fetched all signatures and assembled the signature vector, all before we could. - // Mutants skip here: this function is covered via integration tests, - // which the mutation testing does not see. - #[cfg_attr(test, mutants::skip)] - #[allow(clippy::too_many_arguments)] - pub fn run_sign_v0( - &mut self, - block: &NakamotoBlock, - burn_tip: &BlockSnapshot, - burnchain: &Burnchain, - sortdb: &SortitionDB, - chain_state: &mut StacksChainState, - stackerdbs: &StackerDBs, - counters: &Counters, - election_sortition: &ConsensusHash, - ) -> Result, NakamotoNodeError> { - let reward_cycle_id = burnchain - .block_height_to_reward_cycle(burn_tip.block_height) - .expect("FATAL: tried to initialize coordinator before first burn block height"); - - let block_proposal = BlockProposal { - block: block.clone(), - burn_height: burn_tip.block_height, - reward_cycle: reward_cycle_id, - }; - - let needs_initial_block = self.needs_initial_block; - - let block_proposal_message = SignerMessageV0::BlockProposal(block_proposal); - debug!("Sending block proposal message to signers"; - "signer_signature_hash" => %block.header.signer_signature_hash(), - ); - Self::send_miners_message::( - &self.message_key, - sortdb, - burn_tip, - stackerdbs, - block_proposal_message, - MinerSlotID::BlockProposal, - self.is_mainnet, - &mut self.miners_session, - election_sortition, - ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure)?; - counters.bump_naka_proposed_blocks(); - - #[cfg(test)] - { - info!( - "SignCoordinator: sent block proposal to .miners, waiting for test signing channel" - ); - // In test mode, short-circuit waiting for the signers if the TEST_SIGNING - // channel has been created. This allows integration tests for the stacks-node - // independent of the stacks-signer. - if let Some(signatures) = - crate::tests::nakamoto_integrations::TestSigningChannel::get_signature() - { - debug!("Short-circuiting waiting for signers, using test signature"); - return Ok(signatures); - } - } - - let Some(ref mut receiver) = self.receiver else { - return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Failed to obtain the StackerDB event receiver".into(), - )); - }; - - let mut total_weight_signed: u32 = 0; - let mut total_reject_weight: u32 = 0; - let mut responded_signers = HashSet::new(); - let mut gathered_signatures = BTreeMap::new(); - - info!("SignCoordinator: beginning to watch for block signatures OR posted blocks."; - "threshold" => self.weight_threshold, - ); - - loop { - // look in the nakamoto staging db -- a block can only get stored there if it has - // enough signing weight to clear the threshold - if let Ok(Some((stored_block, _sz))) = chain_state - .nakamoto_blocks_db() - .get_nakamoto_block(&block.block_id()) - .map_err(|e| { - warn!( - "Failed to query chainstate for block {}: {e:?}", - &block.block_id() - ); - e - }) - { - debug!("SignCoordinator: Found signatures in relayed block"); - counters.bump_naka_signer_pushed_blocks(); - return Ok(stored_block.header.signer_signature); - } - - if Self::check_burn_tip_changed(sortdb, chain_state, burn_tip, needs_initial_block) { - debug!("SignCoordinator: Exiting due to new burnchain tip"); - return Err(NakamotoNodeError::BurnchainTipChanged); - } - - // one of two things can happen: - // * we get enough signatures from stackerdb from the signers, OR - // * we see our block get processed in our chainstate (meaning, the signers broadcasted - // the block and our node got it and processed it) - let event = match receiver.recv_timeout(EVENT_RECEIVER_POLL) { - Ok(event) => event, - Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { - continue; - } - Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { - return Err(NakamotoNodeError::SigningCoordinatorFailure( - "StackerDB event receiver disconnected".into(), - )) - } - }; - - // was the node asked to stop? - if !self.keep_running.load(Ordering::SeqCst) { - info!("SignerCoordinator: received node exit request. Aborting"); - return Err(NakamotoNodeError::ChannelClosed); - } - - // check to see if this event we got is a signer event - let is_signer_event = - event.contract_id.name.starts_with(SIGNERS_NAME) && event.contract_id.is_boot(); - - if !is_signer_event { - debug!("Ignoring StackerDB event for non-signer contract"; "contract" => %event.contract_id); - continue; - } - - let modified_slots = &event.modified_slots.clone(); - - let Ok(signer_event) = SignerEvent::::try_from(event).map_err(|e| { - warn!("Failure parsing StackerDB event into signer event. Ignoring message."; "err" => ?e); - }) else { - continue; - }; - let SignerEvent::SignerMessages(signer_set, messages) = signer_event else { - debug!("Received signer event other than a signer message. Ignoring."); - continue; - }; - if signer_set != u32::try_from(reward_cycle_id % 2).unwrap() { - debug!("Received signer event for other reward cycle. Ignoring."); - continue; - }; - let slot_ids = modified_slots - .iter() - .map(|chunk| chunk.slot_id) - .collect::>(); - - debug!("SignCoordinator: Received messages from signers"; - "count" => messages.len(), - "slot_ids" => ?slot_ids, - "threshold" => self.weight_threshold - ); - - for (message, slot_id) in messages.into_iter().zip(slot_ids) { - let Some(signer_entry) = &self.signer_entries.get(&slot_id) else { - return Err(NakamotoNodeError::SignerSignatureError( - "Signer entry not found".into(), - )); - }; - let Ok(signer_pubkey) = StacksPublicKey::from_slice(&signer_entry.signing_key) - else { - return Err(NakamotoNodeError::SignerSignatureError( - "Failed to parse signer public key".into(), - )); - }; - - if responded_signers.contains(&signer_pubkey) { - debug!( - "Signer {slot_id} already responded for block {}. Ignoring {message:?}.", block.header.signer_signature_hash(); - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - continue; - } - - match message { - SignerMessageV0::BlockResponse(BlockResponse::Accepted(accepted)) => { - let BlockAccepted { - signer_signature_hash: response_hash, - signature, - metadata, - } = accepted; - let block_sighash = block.header.signer_signature_hash(); - if block_sighash != response_hash { - warn!( - "Processed signature for a different block. Will try to continue."; - "signature" => %signature, - "block_signer_signature_hash" => %block_sighash, - "response_hash" => %response_hash, - "slot_id" => slot_id, - "reward_cycle_id" => reward_cycle_id, - "response_hash" => %response_hash, - "server_version" => %metadata.server_version - ); - continue; - } - debug!("SignCoordinator: Received valid signature from signer"; "slot_id" => slot_id, "signature" => %signature); - let Ok(valid_sig) = signer_pubkey.verify(block_sighash.bits(), &signature) - else { - warn!("Got invalid signature from a signer. Ignoring."); - continue; - }; - if !valid_sig { - warn!( - "Processed signature but didn't validate over the expected block. Ignoring"; - "signature" => %signature, - "block_signer_signature_hash" => %block_sighash, - "slot_id" => slot_id, - ); - continue; - } - - if Self::fault_injection_ignore_signatures() { - warn!("SignCoordinator: fault injection: ignoring well-formed signature for block"; - "block_signer_sighash" => %block_sighash, - "signer_pubkey" => signer_pubkey.to_hex(), - "signer_slot_id" => slot_id, - "signature" => %signature, - "signer_weight" => signer_entry.weight, - "total_weight_signed" => total_weight_signed, - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - continue; - } - - if !gathered_signatures.contains_key(&slot_id) { - total_weight_signed = total_weight_signed - .checked_add(signer_entry.weight) - .expect("FATAL: total weight signed exceeds u32::MAX"); - } - - info!("SignCoordinator: Signature Added to block"; - "block_signer_sighash" => %block_sighash, - "signer_pubkey" => signer_pubkey.to_hex(), - "signer_slot_id" => slot_id, - "signature" => %signature, - "signer_weight" => signer_entry.weight, - "total_weight_signed" => total_weight_signed, - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id(), - "server_version" => metadata.server_version, - ); - gathered_signatures.insert(slot_id, signature); - responded_signers.insert(signer_pubkey); - } - SignerMessageV0::BlockResponse(BlockResponse::Rejected(rejected_data)) => { - let block_sighash = block.header.signer_signature_hash(); - if block_sighash != rejected_data.signer_signature_hash { - warn!( - "Processed rejection for a different block. Will try to continue."; - "block_signer_signature_hash" => %block_sighash, - "rejected_data.signer_signature_hash" => %rejected_data.signer_signature_hash, - "slot_id" => slot_id, - "reward_cycle_id" => reward_cycle_id, - ); - continue; - } - let rejected_pubkey = match rejected_data.recover_public_key() { - Ok(rejected_pubkey) => { - if rejected_pubkey != signer_pubkey { - warn!("Recovered public key from rejected data does not match signer's public key. Ignoring."); - continue; - } - rejected_pubkey - } - Err(e) => { - warn!("Failed to recover public key from rejected data: {e:?}. Ignoring."); - continue; - } - }; - responded_signers.insert(rejected_pubkey); - debug!( - "Signer {slot_id} rejected our block {}/{}", - &block.header.consensus_hash, - &block.header.block_hash() - ); - total_reject_weight = total_reject_weight - .checked_add(signer_entry.weight) - .expect("FATAL: total weight rejected exceeds u32::MAX"); - - if total_reject_weight.saturating_add(self.weight_threshold) - > self.total_weight - { - debug!( - "{total_reject_weight}/{} signers vote to reject our block {}/{}", - self.total_weight, - &block.header.consensus_hash, - &block.header.block_hash() - ); - counters.bump_naka_rejected_blocks(); - return Err(NakamotoNodeError::SignersRejected); - } - continue; - } - SignerMessageV0::BlockProposal(_) => { - debug!("Received block proposal message. Ignoring."); - continue; - } - SignerMessageV0::BlockPushed(_) => { - debug!("Received block pushed message. Ignoring."); - continue; - } - SignerMessageV0::MockSignature(_) - | SignerMessageV0::MockProposal(_) - | SignerMessageV0::MockBlock(_) => { - debug!("Received mock message. Ignoring."); - continue; - } - }; - } - // After gathering all signatures, return them if we've hit the threshold - if total_weight_signed >= self.weight_threshold { - info!("SignCoordinator: Received enough signatures. Continuing."; - "stacks_block_hash" => %block.header.block_hash(), - "stacks_block_id" => %block.header.block_id() - ); - return Ok(gathered_signatures.values().cloned().collect()); - } - } - } -} From 06e2764227318bf3003f693b88e6b40c453e8175 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 16 Dec 2024 14:46:31 -0500 Subject: [PATCH 16/35] chore: clean up compile error and warnings --- .../stacks-node/src/nakamoto_node/relayer.rs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 86aed60325..a438e188b2 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -400,13 +400,6 @@ impl RelayerThread { SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) .expect("FATAL: failed to query sortition DB for stacks tip"); - let stacks_tip = StacksBlockId::new(&cur_stacks_tip_ch, &cur_stacks_tip_bh); - - let stacks_tip_sortition = - SortitionDB::get_block_snapshot_consensus(&self.sortdb.conn(), &cur_stacks_tip_ch) - .expect("Relayer: Failed to load canonical Stacks tip's tenure snapshot") - .expect("Relayer: Canonical Stacks tip has no tenure snapshot"); - let directive = if sn.sortition { if won_sortition || self.config.get_node_config(false).mock_mining { info!("Relayer: Won sortition; begin tenure."); @@ -805,7 +798,7 @@ impl RelayerThread { } #[cfg(not(test))] - fn fault_injection_stall_miner_startup() {} + fn fault_injection_stall_miner_thread_startup() {} /// Create the block miner thread state. /// Only proceeds if all of the following are true: @@ -975,12 +968,6 @@ impl RelayerThread { }; // Get the necessary snapshots and state - let burn_tip = SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &new_burn_view)? - .ok_or_else(|| { - error!("Relayer: failed to get block snapshot for new burn view"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; - let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = SortitionDB::get_canonical_stacks_chain_tip_hash(sortdb.conn()).unwrap(); @@ -1027,10 +1014,6 @@ impl RelayerThread { } debug!("Relayer: successfully stopped tenure; will try to continue."); - let Some(mining_pkh) = self.get_mining_key_pkh() else { - return Ok(()); - }; - let Some(canonical_stacks_tip_election_snapshot) = Self::can_continue_tenure( &self.sortdb, new_burn_view.clone(), From 7abaaca4ff13c748219bfedebc58aa73b2de0c1e Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 20 Dec 2024 14:47:57 -0500 Subject: [PATCH 17/35] feat: tenure_extend_wait_secs: a config option to wait for a block-found before extending the ongoing tenure if the miner produced the ongoing tenure --- testnet/stacks-node/src/config.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testnet/stacks-node/src/config.rs b/testnet/stacks-node/src/config.rs index 4ad793a4c3..d000c466f2 100644 --- a/testnet/stacks-node/src/config.rs +++ b/testnet/stacks-node/src/config.rs @@ -93,6 +93,7 @@ const DEFAULT_FIRST_REJECTION_PAUSE_MS: u64 = 5_000; const DEFAULT_SUBSEQUENT_REJECTION_PAUSE_MS: u64 = 10_000; const DEFAULT_BLOCK_COMMIT_DELAY_MS: u64 = 20_000; const DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE: u8 = 25; +const DEFAULT_TENURE_EXTEND_WAIT_SECS: u64 = 30; #[derive(Clone, Deserialize, Default, Debug)] #[serde(deny_unknown_fields)] @@ -2145,6 +2146,8 @@ pub struct MinerConfig { pub block_commit_delay: Duration, /// The percentage of the remaining tenure cost limit to consume each block. pub tenure_cost_limit_per_block_percentage: Option, + /// The number of seconds to wait to try to continue a tenure if a BlockFound is expected + pub tenure_extend_wait_secs: Duration, } impl Default for MinerConfig { @@ -2181,6 +2184,7 @@ impl Default for MinerConfig { tenure_cost_limit_per_block_percentage: Some( DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE, ), + tenure_extend_wait_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_WAIT_SECS), } } } @@ -2566,6 +2570,7 @@ pub struct MinerConfigFile { pub subsequent_rejection_pause_ms: Option, pub block_commit_delay_ms: Option, pub tenure_cost_limit_per_block_percentage: Option, + pub tenure_extend_wait_secs: Option, } impl MinerConfigFile { @@ -2706,6 +2711,7 @@ impl MinerConfigFile { subsequent_rejection_pause_ms: self.subsequent_rejection_pause_ms.unwrap_or(miner_default_config.subsequent_rejection_pause_ms), block_commit_delay: self.block_commit_delay_ms.map(Duration::from_millis).unwrap_or(miner_default_config.block_commit_delay), tenure_cost_limit_per_block_percentage, + tenure_extend_wait_secs: self.tenure_extend_wait_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_wait_secs), }) } } From 06096eed8df090d0e78f13b157553cbbff373475 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 20 Dec 2024 14:48:36 -0500 Subject: [PATCH 18/35] fix: allow a BlockFound to be produced if the Relayer determines that the miner is "late" in doing so -- e.g. because a flashblock arrived --- .../stacks-node/src/nakamoto_node/miner.rs | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 3f383ac95b..049dd12c6e 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -75,6 +75,7 @@ pub enum MinerDirective { BeginTenure { parent_tenure_start: StacksBlockId, burnchain_tip: BlockSnapshot, + late: bool, }, /// The miner should try to continue their tenure if they are the active miner ContinueTenure { new_burn_view: ConsensusHash }, @@ -104,7 +105,7 @@ struct ParentStacksBlockInfo { #[derive(PartialEq, Clone, Debug)] pub enum MinerReason { /// The miner thread was spawned to begin a new tenure - BlockFound, + BlockFound { late: bool }, /// The miner thread was spawned to extend an existing tenure Extended { /// Current consensus hash on the underlying burnchain. Corresponds to the last-seen @@ -116,7 +117,9 @@ pub enum MinerReason { impl std::fmt::Display for MinerReason { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - MinerReason::BlockFound => write!(f, "BlockFound"), + MinerReason::BlockFound { late } => { + write!(f, "BlockFound({})", if *late { "late" } else { "current" }) + } MinerReason::Extended { burn_view_consensus_hash, } => write!( @@ -498,6 +501,7 @@ impl BlockMinerThread { // update mined-block counters and mined-tenure counters self.globals.counters.bump_naka_mined_blocks(); if self.last_block_mined.is_some() { + // TODO: reviewers: should this be .is_none()? // this is the first block of the tenure, bump tenure counter self.globals.counters.bump_naka_mined_tenures(); } @@ -958,6 +962,7 @@ impl BlockMinerThread { miner_address, &self.parent_tenure_id, stacks_tip_header, + &self.reason, ) { Ok(parent_info) => Ok(parent_info), Err(NakamotoNodeError::BurnchainTipChanged) => { @@ -1233,7 +1238,7 @@ impl BlockMinerThread { }; let (tenure_change_tx, coinbase_tx) = match &self.reason { - MinerReason::BlockFound => { + MinerReason::BlockFound { .. } => { let tenure_change_tx = self.generate_tenure_change_tx(current_miner_nonce, payload)?; let coinbase_tx = @@ -1253,6 +1258,8 @@ impl BlockMinerThread { "parent_block_id" => %parent_block_id, "num_blocks_so_far" => num_blocks_so_far, ); + + // NOTE: this switches payload.cause to TenureChangeCause::Extend payload = payload.extend( *burn_view_consensus_hash, parent_block_id, @@ -1353,11 +1360,22 @@ impl BlockMinerThread { chain_state: &mut StacksChainState, ) -> Result<(), NakamotoNodeError> { Self::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; + + if let MinerReason::BlockFound { late } = &self.reason { + if *late && self.last_block_mined.is_none() { + // this is a late BlockFound tenure change that ought to be appended to the Stacks + // chain tip, and we haven't submitted it yet. + return Ok(()); + } + } + let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); if cur_burn_chain_tip.consensus_hash != self.burn_block.consensus_hash { - info!("Miner: Cancel block assembly; burnchain tip has changed"); + info!("Miner: Cancel block assembly; burnchain tip has changed"; + "new_tip" => %cur_burn_chain_tip.consensus_hash, + "local_tip" => %self.burn_block.consensus_hash); self.globals.counters.bump_missed_tenures(); Err(NakamotoNodeError::BurnchainTipChanged) } else { @@ -1377,7 +1395,7 @@ impl ParentStacksBlockInfo { // TODO: add tests from mutation testing results #4869 #[cfg_attr(test, mutants::skip)] /// Determine where in the set of forks to attempt to mine the next anchored block. - /// `mine_tip_ch` and `mine_tip_bhh` identify the parent block on top of which to mine. + /// `parent_tenure_id` and `stacks_tip_header` identify the parent block on top of which to mine. /// `check_burn_block` identifies what we believe to be the burn chain's sortition history tip. /// This is used to mitigate (but not eliminate) a TOCTTOU issue with mining: the caller's /// conception of the sortition history tip may have become stale by the time they call this @@ -1389,6 +1407,7 @@ impl ParentStacksBlockInfo { miner_address: StacksAddress, parent_tenure_id: &StacksBlockId, stacks_tip_header: StacksHeaderInfo, + reason: &MinerReason, ) -> Result { // the stacks block I'm mining off of's burn header hash and vtxindex: let parent_snapshot = SortitionDB::get_block_snapshot_consensus( @@ -1398,11 +1417,17 @@ impl ParentStacksBlockInfo { .expect("Failed to look up block's parent snapshot") .expect("Failed to look up block's parent snapshot"); - // don't mine off of an old burnchain block + // don't mine off of an old burnchain block, unless we're late let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { + let allow_late = if let MinerReason::BlockFound { late } = reason { + *late + } else { + false + }; + + if !allow_late && burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { info!( "New canonical burn chain tip detected. Will not try to mine."; "new_consensus_hash" => %burn_chain_tip.consensus_hash, @@ -1476,6 +1501,8 @@ impl ParentStacksBlockInfo { "stacks_tip_consensus_hash" => %parent_snapshot.consensus_hash, "stacks_tip_burn_hash" => %parent_snapshot.burn_header_hash, "stacks_tip_burn_height" => parent_snapshot.block_height, + "parent_tenure_info" => ?parent_tenure_info, + "reason" => %reason ); let coinbase_nonce = { From 7631f41644bf143e325d9f1604227feb31773314 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 20 Dec 2024 14:49:09 -0500 Subject: [PATCH 19/35] chore: fix choose_miner_directive() to attempt to continue a tenure if the miner produced the ongoing tenure (but only after a deadline), and to stop the tenure thread if the miner did not win sortition (even if continuation is later possible). If continuation is possible, then start a continuation thread if the ongoing tenure is still active. --- .../stacks-node/src/nakamoto_node/relayer.rs | 344 +++++++++++++++--- 1 file changed, 290 insertions(+), 54 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index a438e188b2..3de86b526c 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -238,6 +238,9 @@ pub struct RelayerThread { /// handle to the subordinate miner thread miner_thread: Option>>, + /// miner thread's burn view + miner_thread_burn_view: Option, + /// The relayer thread reads directives from the relay_rcv, but it also periodically wakes up /// to check if it should issue a block commit or try to register a VRF key next_initiative: Instant, @@ -247,6 +250,8 @@ pub struct RelayerThread { last_committed: Option, /// Timeout for waiting for the first block in a tenure before submitting a block commit new_tenure_timeout: Option, + /// Timeout for waiting for a BlockFound in a subsequent tenure before trying to extend our own + tenure_extend_timeout: Option, } impl RelayerThread { @@ -301,10 +306,12 @@ impl RelayerThread { relayer, miner_thread: None, + miner_thread_burn_view: None, is_miner, next_initiative: Instant::now() + Duration::from_millis(next_initiative_delay), last_committed: None, new_tenure_timeout: None, + tenure_extend_timeout: None, } } @@ -387,6 +394,35 @@ impl RelayerThread { } /// Choose a miner directive based on the outcome of a sortition. + /// + /// The decision process is a little tricky, because the right decision depends on: + /// * whether or not we won the _given_ sortition (`sn`) + /// * whether or not we won the sortition that started the ongoing Stacks tenure + /// * whether or not we won the last sortition with a winner + /// * whether or not the last sortition winner has produced a Stacks block + /// * whether or not the ongoing Stacks tenure is at or descended from the last-winning + /// sortition + /// + /// Specifically: + /// + /// If we won the given sortition `sn`, then we can start mining immediately with a `BlockFound` + /// tenure-change. Otherwise, if we won the tenure which started the ongoing Stacks tenure + /// (i.e. we're the active miner), then we _may_ start mining after a timeout _if_ the winning + /// miner (not us) fails to submit a `BlockFound` tenure-change block for `sn`. + /// + /// Otherwise, if the given sortition `sn` has no winner, the find out who won the last sortition + /// with a winner. If it was us, and if we haven't yet submitted a `BlockFound` tenure-change + /// for it (which can happen if this given sortition is from a flash block), then start mining + /// immediately with a "late" `BlockFound` tenure, _and_ prepare to start mining right afterwards + /// with an `Extended` tenure-change so as to represent the given sortition `sn`'s burn view in + /// the Stacks chain. + /// + /// Otherwise, if this sortition has no winner, and we did not win the last-winning sortition, + /// then check to see if we're the ongoing Stack's tenure's miner. If so, then we _may_ start + /// mining after a timeout _if_ the winner of the last-good sortition (not us) fails to submit + /// a `BlockFound` tenure-change block. This can happen if `sn` was a flash block, and the + /// remote miner has yet to process it. + /// /// We won't always be able to mine -- for example, this could be an empty sortition, but the /// parent block could be an epoch 2 block. In this case, the right thing to do is to wait for /// the next block-commit. @@ -396,27 +432,37 @@ impl RelayerThread { won_sortition: bool, committed_index_hash: StacksBlockId, ) -> Option { - let (cur_stacks_tip_ch, cur_stacks_tip_bh) = + let (cur_stacks_tip_ch, _) = SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) .expect("FATAL: failed to query sortition DB for stacks tip"); - let directive = if sn.sortition { + self.tenure_extend_timeout = None; + + if sn.sortition { + // a sortition happened if won_sortition || self.config.get_node_config(false).mock_mining { - info!("Relayer: Won sortition; begin tenure."); + // a sortition happenend, and we won + info!("Relayer: Won sortition; begin tenure."; + "winning_sortition" => %sn.consensus_hash); return Some(MinerDirective::BeginTenure { parent_tenure_start: committed_index_hash, burnchain_tip: sn, + late: false, }); } + + // a sortition happened, but we didn't win. match Self::can_continue_tenure( &self.sortdb, sn.consensus_hash, self.get_mining_key_pkh(), ) { Ok(Some(_)) => { - return Some(MinerDirective::ContinueTenure { - new_burn_view: sn.consensus_hash, - }); + // we can continue our ongoing tenure, but we should give the new winning miner + // a chance to send their BlockFound first. + debug!("Relayer: Did not win sortition, but am mining the ongoing tenure. Allowing the new miner some time to come online before trying to continue."); + self.tenure_extend_timeout = Some(Instant::now()); + return Some(MinerDirective::StopTenure); } Ok(None) => { return Some(MinerDirective::StopTenure); @@ -426,34 +472,140 @@ impl RelayerThread { return Some(MinerDirective::StopTenure); } } - } else { - // find out what epoch the Stacks tip is in. - // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so - // right now since this sortition has no winner. - let stacks_tip_sn = - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) - .expect("FATAL: failed to query sortiiton DB for epoch") - .expect("FATAL: no sortition for canonical stacks tip"); - - let cur_epoch = - SortitionDB::get_stacks_epoch(self.sortdb.conn(), stacks_tip_sn.block_height) - .expect("FATAL: failed to query sortition DB for epoch") - .expect("FATAL: no epoch defined for existing sortition"); - - if cur_epoch.epoch_id < StacksEpochId::Epoch30 { - debug!( - "As of sortition {}, there has not yet been a Nakamoto tip. Cannot mine.", + } + + // no sortition happened. + // find out what epoch the Stacks tip is in. + // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so + // right now since this sortition has no winner. + let stacks_tip_sn = + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) + .expect("FATAL: failed to query sortiiton DB for epoch") + .expect("FATAL: no sortition for canonical stacks tip"); + + let cur_epoch = + SortitionDB::get_stacks_epoch(self.sortdb.conn(), stacks_tip_sn.block_height) + .expect("FATAL: failed to query sortition DB for epoch") + .expect("FATAL: no epoch defined for existing sortition"); + + if cur_epoch.epoch_id < StacksEpochId::Epoch30 { + debug!( + "As of sortition {}, there has not yet been a Nakamoto tip. Cannot mine.", + &stacks_tip_sn.consensus_hash + ); + return None; + } + + // find out who won the last non-empty sortition. It may have been us. + let Ok(last_winning_snapshot) = Self::get_last_winning_snapshot(&self.sortdb, &sn) + .inspect_err(|e| { + warn!("Relayer: Failed to load last winning snapshot: {e:?}"); + }) + else { + // this should be unreachable, but don't tempt fate. + info!("Relayer: No prior snapshots have a winning sortition. Will not try to mine."); + return None; + }; + + if last_winning_snapshot.miner_pk_hash == self.get_mining_key_pkh() { + debug!( + "Relayer: we won the last winning sortition {}", + &last_winning_snapshot.consensus_hash + ); + + // we won the last non-empty sortition. Has there been a BlockFound issued for it? + // This would be true if the stacks tip's tenure is at or descends from this snapshot. + // If there has _not_ been a BlockFound, then we should issue one. + let ih = self + .sortdb + .index_handle(&last_winning_snapshot.sortition_id); + let need_blockfound = if stacks_tip_sn.block_height > last_winning_snapshot.block_height + { + // stacks tip is ahead of this snapshot, so no BlockFound can be issued. + test_debug!("Relayer: stacks_tip_sn.block_height ({}) > last_winning_snapshot.block_height ({})", stacks_tip_sn.block_height, last_winning_snapshot.block_height); + false + } else if stacks_tip_sn.block_height == last_winning_snapshot.block_height + && stacks_tip_sn.consensus_hash == last_winning_snapshot.consensus_hash + { + // this is the ongoing tenure snapshot. A BlockFound has already been issued. We + // can instead opt to Extend + test_debug!( + "Relayer: ongoing tenure {} already represents last-winning snapshot", &stacks_tip_sn.consensus_hash ); - None + self.tenure_extend_timeout = Some(Instant::now()); + false } else { - info!("Relayer: No sortition; continue tenure."); - Some(MinerDirective::ContinueTenure { - new_burn_view: sn.consensus_hash, + // stacks tip's snapshot may be an ancestor of the last-won sortition. + // If so, then we can issue a BlockFound. + SortitionDB::get_ancestor_snapshot( + &ih, + stacks_tip_sn.block_height, + &last_winning_snapshot.sortition_id, + ) + .map_err(|e| { + error!("Relayer: Failed to load ancestor snapshot: {e:?}"); + e + }) + .ok() + .flatten() + .map(|sn| { + let need_blockfound = sn.consensus_hash == stacks_tip_sn.consensus_hash; + if !need_blockfound { + test_debug!( + "Relayer: stacks_tip_sn.consensus_hash ({}) != sn.consensus_hash ({})", + &stacks_tip_sn.consensus_hash, + &sn.consensus_hash + ); + } + need_blockfound + }) + .unwrap_or_else(|| { + test_debug!( + "Relayer: no ancestor at height {} off of sortition {} height {}", + stacks_tip_sn.block_height, + &last_winning_snapshot.consensus_hash, + last_winning_snapshot.block_height + ); + false }) + }; + if need_blockfound { + info!( + "Relayer: will submit late BlockFound for {}", + &last_winning_snapshot.consensus_hash + ); + // prepare to extend after our BlockFound gets mined. + self.tenure_extend_timeout = Some(Instant::now()); + return Some(MinerDirective::BeginTenure { + parent_tenure_start: StacksBlockId( + last_winning_snapshot.winning_stacks_block_hash.clone().0, + ), + burnchain_tip: last_winning_snapshot, + late: true, + }); } - }; - directive + } + + // try to continue our tenure if we produced the canonical Stacks tip. + if stacks_tip_sn.miner_pk_hash == self.get_mining_key_pkh() { + info!("Relayer: No sortition, but we produced the canonical Stacks tip. Will continue tenure."); + + if last_winning_snapshot.miner_pk_hash != self.get_mining_key_pkh() { + // delay trying to continue since the last snasphot with a sortition was won + // by someone else -- there's a chance that this other miner will produce a + // BlockFound in the interim. + debug!("Relayer: Did not win last winning snapshot despite mining the ongoing tenure, so allowing the new miner some time to come online."); + self.tenure_extend_timeout = Some(Instant::now()); + return None; + } + return Some(MinerDirective::ContinueTenure { + new_burn_view: sn.consensus_hash, + }); + } + + info!("Relayer: No sortition, and we did not produce the last Stacks tip. Will not mine."); + return None; } /// Given the pointer to a recently processed sortition, see if we won the sortition, and @@ -474,8 +626,8 @@ impl RelayerThread { .expect("FATAL: unknown consensus hash"); // always clear this even if this isn't the latest sortition - let cleared = self.last_commits.remove(&sn.winning_block_txid); - let won_sortition = sn.sortition && cleared; + self.last_commits.remove(&sn.winning_block_txid); + let won_sortition = sn.sortition; // && cleared; if won_sortition { increment_stx_blocks_mined_counter(); } @@ -831,7 +983,13 @@ impl RelayerThread { let burn_chain_tip = burn_chain_sn.burn_header_hash; - if burn_chain_tip != burn_header_hash { + let allow_late = if let MinerReason::BlockFound { late } = &reason { + *late + } else { + false + }; + + if burn_chain_tip != burn_header_hash && !allow_late { debug!( "Relayer: Drop stale RunTenure for {burn_header_hash}: current sortition is for {burn_chain_tip}" ); @@ -870,6 +1028,8 @@ impl RelayerThread { // when starting a new tenure, block the mining thread if its currently running. // the new mining thread will join it (so that the new mining thread stalls, not the relayer) let prior_tenure_thread = self.miner_thread.take(); + self.miner_thread_burn_view = None; + let vrf_key = self .globals .get_leader_key_registration_state() @@ -881,7 +1041,7 @@ impl RelayerThread { let new_miner_state = self.create_block_miner( vrf_key, block_election_snapshot, - burn_tip, + burn_tip.clone(), parent_tenure_start, reason, )?; @@ -909,6 +1069,7 @@ impl RelayerThread { new_miner_handle.thread().id() ); self.miner_thread.replace(new_miner_handle); + self.miner_thread_burn_view.replace(burn_tip); Ok(()) } @@ -919,6 +1080,8 @@ impl RelayerThread { debug!("Relayer: no tenure thread to stop"); return Ok(()); }; + self.miner_thread_burn_view = None; + let id = prior_tenure_thread.thread().id(); let globals = self.globals.clone(); @@ -945,6 +1108,15 @@ impl RelayerThread { )) } + /// Helper method to get the last snapshot with a winner + fn get_last_winning_snapshot( + sortdb: &SortitionDB, + sort_tip: &BlockSnapshot, + ) -> Result { + let ih = sortdb.index_handle(&sort_tip.sortition_id); + Ok(ih.get_last_snapshot_with_sortition(sort_tip.block_height)?) + } + /// Determine if the miner can contine an existing tenure with the new sortition (identified /// by `new_burn_view`) /// @@ -981,11 +1153,12 @@ impl RelayerThread { NakamotoNodeError::SnapshotNotFoundForChainTip })?; - let won_last_good_sortition = canonical_stacks_snapshot.miner_pk_hash == Some(mining_pkh); + let won_ongoing_tenure_sortition = + canonical_stacks_snapshot.miner_pk_hash == Some(mining_pkh); info!( "Relayer: Checking for tenure continuation."; - "won_last_good_sortition" => won_last_good_sortition, + "won_ongoing_tenure_sortition" => won_ongoing_tenure_sortition, "current_mining_pkh" => %mining_pkh, "canonical_stacks_tip_id" => %canonical_stacks_tip, "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, @@ -993,7 +1166,7 @@ impl RelayerThread { "burn_view_ch" => %new_burn_view, ); - if !won_last_good_sortition { + if !won_ongoing_tenure_sortition { info!("Relayer: Did not win the last sortition that commits to our Stacks fork. Cannot continue tenure."); return Ok(None); } @@ -1079,11 +1252,12 @@ impl RelayerThread { MinerDirective::BeginTenure { parent_tenure_start, burnchain_tip, + late, } => match self.start_new_tenure( parent_tenure_start, burnchain_tip.clone(), burnchain_tip.clone(), - MinerReason::BlockFound, + MinerReason::BlockFound { late }, ) { Ok(()) => { debug!("Relayer: successfully started new tenure."; @@ -1091,7 +1265,7 @@ impl RelayerThread { "burn_tip" => %burnchain_tip.consensus_hash, "burn_view_snapshot" => %burnchain_tip.consensus_hash, "block_election_snapshot" => %burnchain_tip.consensus_hash, - "reason" => %MinerReason::BlockFound); + "reason" => %MinerReason::BlockFound { late }); } Err(e) => { error!("Relayer: Failed to start new tenure: {e:?}"); @@ -1324,16 +1498,80 @@ impl RelayerThread { )) } + /// Try to start up a tenure-extend, after a delay has passed. + /// We would do this if we were the miner of the ongoing tenure, but did not win the last + /// sortition, and the winning miner never produced a block. + fn try_continue_tenure(&mut self) { + if self.tenure_extend_timeout.is_none() { + return; + } + + let deadline_passed = self + .tenure_extend_timeout + .map(|tenure_extend_timeout| { + let deadline_passed = + tenure_extend_timeout.elapsed() > self.config.miner.tenure_extend_wait_secs; + if !deadline_passed { + test_debug!( + "Relayer: will not try to tenure-extend yet ({} <= {})", + tenure_extend_timeout.elapsed().as_secs(), + self.config.miner.tenure_extend_wait_secs.as_secs() + ); + } + deadline_passed + }) + .unwrap_or(false); + + if !deadline_passed { + return; + } + + // reset timer so we can try again if for some reason a miner was already running (e.g. a + // blockfound from earlier). + self.tenure_extend_timeout = Some(Instant::now()); + + // try to extend, but only if we aren't already running a thread for the current or newer + // burnchain view + let Ok(sn) = + SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()).inspect_err(|e| { + error!("Relayer: failed to read canonical burnchain sortition: {e:?}"); + }) + else { + return; + }; + + if let Some(miner_thread_burn_view) = self.miner_thread_burn_view.as_ref() { + // a miner thread is already running. If its burn view is the same as the canonical + // tip, then do nothing + if sn.consensus_hash == miner_thread_burn_view.consensus_hash { + info!("Relayer: will not try to start a tenure extend -- the current miner thread's burn view matches the sortition tip"; "sortition tip" => %sn.consensus_hash); + return; + } + } + + if let Err(e) = self.continue_tenure(sn.consensus_hash.clone()) { + warn!( + "Relayer: failed to continue tenure for burn view {}: {e:?}", + &sn.consensus_hash + ); + } + } + /// Main loop of the relayer. /// Runs in a separate thread. - /// Continuously receives + /// Continuously receives from `relay_rcv`. + /// Wakes up once per second to see if we need to continue mining an ongoing tenure. pub fn main(mut self, relay_rcv: Receiver) { debug!("relayer thread ID is {:?}", std::thread::current().id()); self.next_initiative = Instant::now() + Duration::from_millis(self.config.node.next_initiative_delay); + // how often we perform a loop pass below + let poll_frequency_ms = 1_000; + while self.globals.keep_running() { + self.try_continue_tenure(); let raised_initiative = self.globals.take_initiative(); let timed_out = Instant::now() >= self.next_initiative; let mut initiative_directive = if raised_initiative.is_some() || timed_out { @@ -1344,33 +1582,31 @@ impl RelayerThread { None }; - let directive = if let Some(directive) = initiative_directive.take() { - directive + let directive_opt = if let Some(directive) = initiative_directive.take() { + Some(directive) } else { // channel was drained, so do a time-bound recv - match relay_rcv.recv_timeout(Duration::from_millis( - self.config.node.next_initiative_delay, - )) { + match relay_rcv.recv_timeout(Duration::from_millis(poll_frequency_ms)) { Ok(directive) => { // only do this once, so we can call .initiative() again - directive - } - Err(RecvTimeoutError::Timeout) => { - continue; + Some(directive) } + Err(RecvTimeoutError::Timeout) => None, Err(RecvTimeoutError::Disconnected) => { break; } } }; - debug!("Relayer: main loop directive"; - "directive" => %directive, - "raised_initiative" => ?raised_initiative, - "timed_out" => %timed_out); + if let Some(directive) = directive_opt { + debug!("Relayer: main loop directive"; + "directive" => %directive, + "raised_initiative" => ?raised_initiative, + "timed_out" => %timed_out); - if !self.handle_directive(directive) { - break; + if !self.handle_directive(directive) { + break; + } } } From 0f7ada422da6ebc46d5275ead40f81fa37d41edc Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 20 Dec 2024 14:50:10 -0500 Subject: [PATCH 20/35] chore: fix tests --- .../stacks-node/src/tests/nakamoto_integrations.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index c1d0c41eff..2fe8dc5d2f 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -97,12 +97,9 @@ use stacks_signer::v0::SpawnedSigner; use super::bitcoin_regtest::BitcoinCoreController; use crate::config::{EventKeyType, InitialBalance}; use crate::nakamoto_node::miner::{ - MinerReason, TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, - TEST_SKIP_P2P_BROADCAST, -}; -use crate::nakamoto_node::relayer::{ - RelayerThread, TEST_MINER_THREAD_STALL, TEST_MINER_THREAD_START_STALL, + TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST, }; +use crate::nakamoto_node::relayer::{RelayerThread, TEST_MINER_THREAD_STALL}; use crate::neon::{Counters, RunLoopCounter}; use crate::operations::BurnchainOpSigner; use crate::run_loop::boot_nakamoto; @@ -10383,8 +10380,10 @@ fn test_tenure_extend_from_flashblocks() { let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( 1, initial_balances, - |_config| {}, |_| {}, + |config| { + config.miner.tenure_extend_wait_secs = Duration::from_secs(15); + }, None, None, ); @@ -10616,7 +10615,7 @@ fn test_tenure_extend_from_flashblocks() { // wait for the miner directive to be processed wait_for(60, || { - sleep_ms(10_000); + sleep_ms(30_000); let directives_cnt = nakamoto_miner_directives.load(Ordering::SeqCst); Ok(directives_cnt > miner_directives_before) }) From 6b1842916030231e256cf8dd0096913dd5a60ed5 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 20 Dec 2024 14:50:20 -0500 Subject: [PATCH 21/35] chore: expect a TenureExtend for a flash block --- testnet/stacks-node/src/tests/signer/v0.rs | 58 ++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 00276b09ee..59afef42c6 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -6321,6 +6321,9 @@ fn continue_after_fast_block_no_sortition() { let node_2_rpc = gen_random_port(); let node_2_p2p = gen_random_port(); + debug!("Node 1 bound at (p2p={}, rpc={})", node_1_p2p, node_1_rpc); + debug!("Node 2 bound at (p2p={}, rpc={})", node_2_p2p, node_2_rpc); + let localhost = "127.0.0.1"; let node_1_rpc_bind = format!("{localhost}:{node_1_rpc}"); let node_2_rpc_bind = format!("{localhost}:{node_2_rpc}"); @@ -6357,6 +6360,8 @@ fn continue_after_fast_block_no_sortition() { config.burnchain.local_mining_public_key = Some(btc_miner_1_pk.to_hex()); config.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[1])); + config.miner.tenure_extend_wait_secs = Duration::from_secs(10); + config.events_observers.retain(|listener| { let Ok(addr) = std::net::SocketAddr::from_str(&listener.endpoint) else { warn!( @@ -6659,7 +6664,11 @@ fn continue_after_fast_block_no_sortition() { .unwrap() .replace(Vec::new()); - info!("------------------------- Wait for Miner B's Block N -------------------------"); + info!("------------------------- Wait for Miner B's Block N -------------------------"; + "blocks_processed_before_2" => %blocks_processed_before_2, + "stacks_height_before" => %stacks_height_before, + "nmb_old_blocks" => %nmb_old_blocks); + // wait for the new block to be processed wait_for(30, || { let stacks_height = signer_test @@ -6667,6 +6676,15 @@ fn continue_after_fast_block_no_sortition() { .get_peer_info() .expect("Failed to get peer info") .stacks_tip_height; + + let blocks_mined1_val = blocks_mined1.load(Ordering::SeqCst); + let blocks_mined2_val = blocks_mined2.load(Ordering::SeqCst); + info!("Waiting for Miner B's Block N"; + "blocks_mined1_val" => %blocks_mined1_val, + "blocks_mined2_val" => %blocks_mined2_val, + "stacks_height" => %stacks_height, + "observed_blocks" => %test_observer::get_blocks().len()); + Ok( blocks_mined2.load(Ordering::SeqCst) > blocks_processed_before_2 && stacks_height > stacks_height_before @@ -6701,13 +6719,47 @@ fn continue_after_fast_block_no_sortition() { ); submit_tx(&http_origin, &transfer_tx); - // wait for the new block to be processed + // wait for the tenure-extend block to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + Ok( + blocks_mined2.load(Ordering::SeqCst) > blocks_processed_before_2 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + + verify_last_block_contains_tenure_change_tx(TenureChangeCause::Extended); + + let nmb_old_blocks = test_observer::get_blocks().len(); + let blocks_processed_before_2 = blocks_mined2.load(Ordering::SeqCst); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // wait for the new block with the STX transfer to be processed wait_for(30, || { let stacks_height = signer_test .stacks_client .get_peer_info() .expect("Failed to get peer info") .stacks_tip_height; + + let blocks_mined1_val = blocks_mined1.load(Ordering::SeqCst); + let blocks_mined2_val = blocks_mined2.load(Ordering::SeqCst); + info!("Waiting for Miner B's Block N"; + "blocks_mined1_val" => %blocks_mined1_val, + "blocks_mined2_val" => %blocks_mined2_val, + "stacks_height" => %stacks_height, + "observed_blocks" => %test_observer::get_blocks().len()); + Ok( blocks_mined2.load(Ordering::SeqCst) > blocks_processed_before_2 && stacks_height > stacks_height_before @@ -6780,7 +6832,7 @@ fn continue_after_fast_block_no_sortition() { .expect("Failed to get peer info"); assert_eq!(get_burn_height(), starting_burn_height + btc_blocks_mined); - assert_eq!(peer_info.stacks_tip_height, starting_peer_height + 5); + assert_eq!(peer_info.stacks_tip_height, starting_peer_height + 6); info!("------------------------- Shutdown -------------------------"); rl2_coord_channels From 70833cdceab2682d6610631119cc417f83e597b7 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 9 Jan 2025 14:20:16 -0500 Subject: [PATCH 22/35] refactor: use `TestFlag` for more flags --- stackslib/src/net/api/postblock_proposal.rs | 17 +++-- .../stacks-node/src/nakamoto_node/miner.rs | 26 ++++--- .../stacks-node/src/nakamoto_node/relayer.rs | 18 +++-- .../src/tests/nakamoto_integrations.rs | 28 ++++---- testnet/stacks-node/src/tests/signer/v0.rs | 70 +++++++++---------- 5 files changed, 84 insertions(+), 75 deletions(-) diff --git a/stackslib/src/net/api/postblock_proposal.rs b/stackslib/src/net/api/postblock_proposal.rs index 515836814a..d8144a73fa 100644 --- a/stackslib/src/net/api/postblock_proposal.rs +++ b/stackslib/src/net/api/postblock_proposal.rs @@ -15,6 +15,8 @@ // along with this program. If not, see . use std::io::{Read, Write}; +#[cfg(any(test, feature = "testing"))] +use std::sync::LazyLock; use std::thread::{self, JoinHandle, Thread}; #[cfg(any(test, feature = "testing"))] use std::time::Duration; @@ -35,6 +37,8 @@ use stacks_common::types::net::PeerHost; use stacks_common::types::StacksPublicKeyBuffer; use stacks_common::util::hash::{hex_bytes, to_hex, Hash160, Sha256Sum, Sha512Trunc256Sum}; use stacks_common::util::retry::BoundReader; +#[cfg(any(test, feature = "testing"))] +use stacks_common::util::tests::TestFlag; use stacks_common::util::{get_epoch_time_ms, get_epoch_time_secs}; use crate::burnchains::affirmation::AffirmationMap; @@ -67,11 +71,11 @@ use crate::net::{ use crate::util_lib::db::Error as DBError; #[cfg(any(test, feature = "testing"))] -pub static TEST_VALIDATE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_VALIDATE_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(any(test, feature = "testing"))] /// Artificial delay to add to block validation. -pub static TEST_VALIDATE_DELAY_DURATION_SECS: std::sync::Mutex> = - std::sync::Mutex::new(None); +pub static TEST_VALIDATE_DELAY_DURATION_SECS: LazyLock> = + LazyLock::new(TestFlag::default); // This enum is used to supply a `reason_code` for validation // rejection responses. This is serialized as an enum with string @@ -353,10 +357,10 @@ impl NakamotoBlockProposal { ) -> Result { #[cfg(any(test, feature = "testing"))] { - if *TEST_VALIDATE_STALL.lock().unwrap() == Some(true) { + if TEST_VALIDATE_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Block validation is stalled due to testing directive."); - while *TEST_VALIDATE_STALL.lock().unwrap() == Some(true) { + while TEST_VALIDATE_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } info!( @@ -368,7 +372,8 @@ impl NakamotoBlockProposal { #[cfg(any(test, feature = "testing"))] { - if let Some(delay) = *TEST_VALIDATE_DELAY_DURATION_SECS.lock().unwrap() { + let delay = TEST_VALIDATE_DELAY_DURATION_SECS.get(); + if delay > 0 { warn!("Sleeping for {} seconds to simulate slow processing", delay); thread::sleep(Duration::from_secs(delay)); } diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index c75ca67a00..1b2bf7f6cd 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -13,6 +13,8 @@ // // You should have received a copy of the GNU General Public License // along with this program. If not, see . +#[cfg(test)] +use std::sync::LazyLock; use std::thread; use std::thread::JoinHandle; use std::time::{Duration, Instant}; @@ -45,6 +47,8 @@ use stacks::util::get_epoch_time_secs; use stacks::util::secp256k1::MessageSignature; use stacks_common::types::chainstate::{StacksAddress, StacksBlockId}; use stacks_common::types::{PrivateKey, StacksEpochId}; +#[cfg(test)] +use stacks_common::util::tests::TestFlag; use stacks_common::util::vrf::VRFProof; use super::relayer::RelayerThread; @@ -56,13 +60,13 @@ use crate::run_loop::nakamoto::Globals; use crate::run_loop::RegisteredKey; #[cfg(test)] -pub static TEST_MINE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_MINE_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(test)] -pub static TEST_BROADCAST_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_BROADCAST_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(test)] -pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_BLOCK_ANNOUNCE_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(test)] -pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_SKIP_P2P_BROADCAST: LazyLock> = LazyLock::new(TestFlag::default); /// If the miner was interrupted while mining a block, how long should the /// miner thread sleep before trying again? @@ -197,7 +201,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_block_broadcast_stall(new_block: &NakamotoBlock) { - if *TEST_BROADCAST_STALL.lock().unwrap() == Some(true) { + if TEST_BROADCAST_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Fault injection: Broadcasting is stalled due to testing directive."; "stacks_block_id" => %new_block.block_id(), @@ -205,7 +209,7 @@ impl BlockMinerThread { "height" => new_block.header.chain_length, "consensus_hash" => %new_block.header.consensus_hash ); - while *TEST_BROADCAST_STALL.lock().unwrap() == Some(true) { + while TEST_BROADCAST_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } info!("Fault injection: Broadcasting is no longer stalled due to testing directive."; @@ -221,7 +225,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_block_announce_stall(new_block: &NakamotoBlock) { - if *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + if TEST_BLOCK_ANNOUNCE_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Fault injection: Block announcement is stalled due to testing directive."; "stacks_block_id" => %new_block.block_id(), @@ -229,7 +233,7 @@ impl BlockMinerThread { "height" => new_block.header.chain_length, "consensus_hash" => %new_block.header.consensus_hash ); - while *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + while TEST_BLOCK_ANNOUNCE_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } info!("Fault injection: Block announcement is no longer stalled due to testing directive."; @@ -245,7 +249,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_skip_block_broadcast() -> bool { - if *TEST_SKIP_P2P_BROADCAST.lock().unwrap() == Some(true) { + if TEST_SKIP_P2P_BROADCAST.get() { return true; } false @@ -282,10 +286,10 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_stall_miner() { - if *TEST_MINE_STALL.lock().unwrap() == Some(true) { + if TEST_MINE_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Mining is stalled due to testing directive"); - while *TEST_MINE_STALL.lock().unwrap() == Some(true) { + while TEST_MINE_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } warn!("Mining is no longer stalled due to testing directive. Continuing..."); diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 3de86b526c..6b5f27ade6 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -18,6 +18,8 @@ use std::collections::HashSet; use std::fs; use std::io::Read; use std::sync::mpsc::{Receiver, RecvTimeoutError}; +#[cfg(test)] +use std::sync::LazyLock; use std::thread::JoinHandle; use std::time::{Duration, Instant}; @@ -50,6 +52,8 @@ use stacks_common::types::chainstate::{ use stacks_common::types::StacksEpochId; use stacks_common::util::get_epoch_time_ms; use stacks_common::util::hash::Hash160; +#[cfg(test)] +use stacks_common::util::tests::TestFlag; use stacks_common::util::vrf::VRFPublicKey; use super::miner::MinerReason; @@ -68,12 +72,12 @@ use crate::BitcoinRegtestController; /// Mutex to stall the relayer thread right before it creates a miner thread. #[cfg(test)] -pub static TEST_MINER_THREAD_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_MINER_THREAD_STALL: LazyLock> = LazyLock::new(TestFlag::default); /// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) #[cfg(test)] -pub static TEST_MINER_THREAD_START_STALL: std::sync::Mutex> = - std::sync::Mutex::new(None); +pub static TEST_MINER_THREAD_START_STALL: LazyLock> = + LazyLock::new(TestFlag::default); /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] @@ -920,10 +924,10 @@ impl RelayerThread { #[cfg(test)] fn fault_injection_stall_miner_startup() { - if *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { + if TEST_MINER_THREAD_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Relayer miner thread startup is stalled due to testing directive to stall the miner"); - while *TEST_MINER_THREAD_STALL.lock().unwrap() == Some(true) { + while TEST_MINER_THREAD_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } warn!( @@ -937,10 +941,10 @@ impl RelayerThread { #[cfg(test)] fn fault_injection_stall_miner_thread_startup() { - if *TEST_MINER_THREAD_START_STALL.lock().unwrap() == Some(true) { + if TEST_MINER_THREAD_START_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Miner thread startup is stalled due to testing directive"); - while *TEST_MINER_THREAD_START_STALL.lock().unwrap() == Some(true) { + while TEST_MINER_THREAD_START_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } warn!( diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index c4da71600e..48a1bf507c 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -5020,8 +5020,8 @@ fn forked_tenure_is_ignored() { // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted. // Stall the miner thread; only wait until the number of submitted commits increases. - TEST_BROADCAST_STALL.lock().unwrap().replace(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); + TEST_BLOCK_ANNOUNCE_STALL.set(true); let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -5038,7 +5038,7 @@ fn forked_tenure_is_ignored() { // Unpause the broadcast of Tenure B's block, do not submit commits, and do not allow blocks to // be processed test_skip_commit_op.set(true); - TEST_BROADCAST_STALL.lock().unwrap().replace(false); + TEST_BROADCAST_STALL.set(false); // Wait for a stacks block to be broadcasted. // However, it will not be processed. @@ -5091,7 +5091,7 @@ fn forked_tenure_is_ignored() { .get_stacks_blocks_processed(); next_block_and(&mut btc_regtest_controller, 60, || { test_skip_commit_op.set(false); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); let commits_count = commits_submitted.load(Ordering::SeqCst); let blocks_count = mined_blocks.load(Ordering::SeqCst); let blocks_processed = coord_channel @@ -6129,7 +6129,7 @@ fn clarity_burn_state() { result.expect_result_ok().expect("Read-only call failed"); // Pause mining to prevent the stacks block from being mined before the tenure change is processed - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); // Submit a tx for the next block (the next block will be a new tenure, so the burn block height will increment) let call_tx = tests::make_contract_call( &sender_sk, @@ -6154,7 +6154,7 @@ fn clarity_burn_state() { Ok(commits_submitted.load(Ordering::SeqCst) > commits_before) }) .unwrap(); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); wait_for(20, || { Ok(coord_channel .lock() @@ -9749,7 +9749,7 @@ fn skip_mining_long_tx() { }) .unwrap(); - TEST_SKIP_P2P_BROADCAST.lock().unwrap().replace(true); + TEST_SKIP_P2P_BROADCAST.set(true); let tx = make_contract_publish( &sender_2_sk, 0, @@ -9776,7 +9776,7 @@ fn skip_mining_long_tx() { }) .unwrap(); - TEST_SKIP_P2P_BROADCAST.lock().unwrap().replace(false); + TEST_SKIP_P2P_BROADCAST.set(false); } else { let transfer_tx = make_stacks_transfer( &sender_1_sk, @@ -10435,7 +10435,7 @@ fn clarity_cost_spend_down() { .expect("Mutex poisoned") .get_stacks_blocks_processed(); // Pause mining so we can add all our transactions to the mempool at once. - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); let mut submitted_txs = vec![]; for _nmb_tx in 0..nmb_txs_per_signer { for sender_sk in sender_sks.iter() { @@ -10464,7 +10464,7 @@ fn clarity_cost_spend_down() { } } } - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); wait_for(120, || { let blocks_processed = coord_channel .lock() @@ -10693,8 +10693,8 @@ fn test_tenure_extend_from_flashblocks() { assert_eq!(sort_tip.consensus_hash, election_tip.consensus_hash); // stop the relayer thread from starting a miner thread, and stop the miner thread from mining - TEST_MINE_STALL.lock().unwrap().replace(true); - TEST_MINER_THREAD_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); + TEST_MINER_THREAD_STALL.set(true); // mine another Bitcoin block right away, and force it to be a flash block btc_regtest_controller.bootstrap_chain(1); @@ -10703,7 +10703,7 @@ fn test_tenure_extend_from_flashblocks() { // unblock the relayer so it can process the flash block sortition. // Given the above, this will be an `Extend` tenure. - TEST_MINER_THREAD_STALL.lock().unwrap().replace(false); + TEST_MINER_THREAD_STALL.set(false); let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); wait_for(60, || { @@ -10781,7 +10781,7 @@ fn test_tenure_extend_from_flashblocks() { // unstall miner thread and allow block-commits again nakamoto_test_skip_commit_op.set(false); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); // wait for the miner directive to be processed wait_for(60, || { diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 422de6dc02..f97fb46781 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -585,9 +585,7 @@ fn miner_gather_signatures() { // Disable p2p broadcast of the nakamoto blocks, so that we rely // on the signer's using StackerDB to get pushed blocks - *nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST - .lock() - .unwrap() = Some(true); + nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST.set(true); info!("------------------------- Test Setup -------------------------"); let num_signers = 5; @@ -1018,8 +1016,8 @@ fn forked_tenure_testing( .unwrap(); // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted - TEST_BROADCAST_STALL.lock().unwrap().replace(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); + TEST_BLOCK_ANNOUNCE_STALL.set(true); let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -1042,7 +1040,7 @@ fn forked_tenure_testing( .running_nodes .nakamoto_test_skip_commit_op .set(true); - TEST_BROADCAST_STALL.lock().unwrap().replace(false); + TEST_BROADCAST_STALL.set(false); // Wait for a stacks block to be broadcasted let start_time = Instant::now(); @@ -1096,7 +1094,7 @@ fn forked_tenure_testing( if !expect_tenure_c { // allow B to process, so it'll be distinct from C - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); sleep_ms(1000); } @@ -1122,7 +1120,7 @@ fn forked_tenure_testing( let commits_count = commits_submitted.load(Ordering::SeqCst); if commits_count > commits_before { // now allow block B to process if it hasn't already. - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); } let rejected_count = rejected_blocks.load(Ordering::SeqCst); let (blocks_count, rbf_count, has_reject_count) = if expect_tenure_c { @@ -1944,7 +1942,7 @@ fn miner_forking() { info!("------------------------- RL1 Wins Sortition -------------------------"); info!("Pausing stacks block proposal to force an empty tenure commit from RL2"); - TEST_BROADCAST_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); let rl1_commits_before = commits_submitted_rl1.load(Ordering::SeqCst); info!("Unpausing commits from RL1"); @@ -1998,7 +1996,7 @@ fn miner_forking() { // unblock block mining let blocks_len = test_observer::get_blocks().len(); - TEST_BROADCAST_STALL.lock().unwrap().replace(false); + TEST_BROADCAST_STALL.set(false); // Wait for the block to be broadcasted and processed wait_for(30, || Ok(test_observer::get_blocks().len() > blocks_len)) @@ -2084,7 +2082,7 @@ fn miner_forking() { info!("------------------------- RL1 RBFs its Own Commit -------------------------"); info!("Pausing stacks block proposal to test RBF capability"); - TEST_BROADCAST_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); let rl1_commits_before = commits_submitted_rl1.load(Ordering::SeqCst); info!("Unpausing commits from RL1"); @@ -2122,7 +2120,7 @@ fn miner_forking() { let rl1_commits_before = commits_submitted_rl1.load(Ordering::SeqCst); // unblock block mining let blocks_len = test_observer::get_blocks().len(); - TEST_BROADCAST_STALL.lock().unwrap().replace(false); + TEST_BROADCAST_STALL.set(false); // Wait for the block to be broadcasted and processed wait_for(30, || Ok(test_observer::get_blocks().len() > blocks_len)) @@ -2263,7 +2261,7 @@ fn end_of_tenure() { ); info!("------------------------- Test Block Validation Stalled -------------------------"); - TEST_VALIDATE_STALL.lock().unwrap().replace(true); + TEST_VALIDATE_STALL.set(true); let proposals_before = signer_test .running_nodes @@ -2335,7 +2333,7 @@ fn end_of_tenure() { info!("Unpausing block validation and waiting for block to be processed"); // Disable the stall and wait for the block to be processed - TEST_VALIDATE_STALL.lock().unwrap().replace(false); + TEST_VALIDATE_STALL.set(false); wait_for(short_timeout.as_secs(), || { let processed_now = get_chain_info(&signer_test.running_nodes.conf).stacks_tip_height; Ok(processed_now > blocks_before) @@ -2831,7 +2829,7 @@ fn stx_transfers_dont_effect_idle_timeout() { signer_test.boot_to_epoch_3(); // Add a delay to the block validation process - TEST_VALIDATE_DELAY_DURATION_SECS.lock().unwrap().replace(5); + TEST_VALIDATE_DELAY_DURATION_SECS.set(5); let info_before = signer_test.get_peer_info(); let blocks_before = signer_test.running_nodes.nakamoto_blocks_mined.get(); @@ -2975,7 +2973,7 @@ fn idle_tenure_extend_active_mining() { signer_test.boot_to_epoch_3(); // Add a delay to the block validation process - TEST_VALIDATE_DELAY_DURATION_SECS.lock().unwrap().replace(3); + TEST_VALIDATE_DELAY_DURATION_SECS.set(3); signer_test.mine_nakamoto_block(Duration::from_secs(30), true); @@ -3217,7 +3215,7 @@ fn empty_sortition() { signer_test.boot_to_epoch_3(); - TEST_BROADCAST_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); info!("------------------------- Test Mine Regular Tenure A -------------------------"); let commits_before = signer_test @@ -3264,7 +3262,7 @@ fn empty_sortition() { .unwrap(); info!("Pausing stacks block proposal to force an empty tenure"); - TEST_BROADCAST_STALL.lock().unwrap().replace(true); + TEST_BROADCAST_STALL.set(true); info!("Pausing commit op to prevent tenure C from starting..."); signer_test @@ -3297,7 +3295,7 @@ fn empty_sortition() { std::thread::sleep(block_proposal_timeout.add(Duration::from_secs(1))); - TEST_BROADCAST_STALL.lock().unwrap().replace(false); + TEST_BROADCAST_STALL.set(false); info!("------------------------- Test Delayed Block is Rejected -------------------------"); let reward_cycle = signer_test.get_current_reward_cycle(); @@ -3575,7 +3573,7 @@ fn empty_sortition_before_proposal() { .replace(true); info!("Pause miner so it doesn't propose a block before the next tenure arrives"); - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); let burn_height_before = get_chain_info(&signer_test.running_nodes.conf).burn_block_height; @@ -3595,7 +3593,7 @@ fn empty_sortition_before_proposal() { sleep_ms(5_000); info!("Unpause miner"); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); info!("Unpause block commits"); signer_test @@ -4437,9 +4435,7 @@ fn duplicate_signers() { // Disable p2p broadcast of the nakamoto blocks, so that we rely // on the signer's using StackerDB to get pushed blocks - *nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST - .lock() - .unwrap() = Some(true); + nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST.set(true); info!("------------------------- Test Setup -------------------------"); let num_signers = 5; @@ -7642,7 +7638,7 @@ fn block_validation_response_timeout() { info!("------------------------- Test Mine and Verify Confirmed Nakamoto Block -------------------------"); signer_test.mine_and_verify_confirmed_naka_block(timeout, num_signers, true); info!("------------------------- Test Block Validation Stalled -------------------------"); - TEST_VALIDATE_STALL.lock().unwrap().replace(true); + TEST_VALIDATE_STALL.set(true); let validation_stall_start = Instant::now(); let proposals_before = signer_test @@ -7744,7 +7740,7 @@ fn block_validation_response_timeout() { let info_before = info_after; info!("Unpausing block validation"); // Disable the stall and wait for the block to be processed successfully - TEST_VALIDATE_STALL.lock().unwrap().replace(false); + TEST_VALIDATE_STALL.set(false); wait_for(30, || { let info = get_chain_info(&signer_test.running_nodes.conf); Ok(info.stacks_tip_height > info_before.stacks_tip_height) @@ -8022,7 +8018,7 @@ fn tenure_extend_after_failed_miner() { .expect("Timed out waiting for block to be mined and processed"); info!("------------------------- Pause Block Proposals -------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); // Unpause miner 2's block commits let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); @@ -8067,7 +8063,7 @@ fn tenure_extend_after_failed_miner() { info!("------------------------- Miner 1 Extends Tenure A -------------------------"); // Re-enable block mining - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); // wait for a tenure extend block from miner 1 to be processed wait_for(60, || { @@ -8432,7 +8428,7 @@ fn tenure_extend_after_bad_commit() { .expect("Timed out waiting for block to be mined and processed"); info!("------------------------- Pause Block Proposals -------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); // Unpause miner 1's block commits let rl1_commits_before = rl1_commits.load(Ordering::SeqCst); @@ -8483,7 +8479,7 @@ fn tenure_extend_after_bad_commit() { info!("----------------------------- Resume Block Production -----------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); wait_for(60, || { let stacks_height = signer_test @@ -8906,7 +8902,7 @@ fn tenure_extend_after_2_bad_commits() { .expect("Timed out waiting for block to be mined and processed"); info!("------------------------- Pause Block Proposals -------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); // Unpause miner 1's block commits let rl1_commits_before = rl1_commits.load(Ordering::SeqCst); @@ -8957,7 +8953,7 @@ fn tenure_extend_after_2_bad_commits() { info!("----------------------------- Resume Block Production -----------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); wait_for(60, || { let stacks_height = signer_test @@ -8986,7 +8982,7 @@ fn tenure_extend_after_2_bad_commits() { // Pause block production again so that we can make sure miner 2 commits // to the wrong block again. - TEST_MINE_STALL.lock().unwrap().replace(true); + TEST_MINE_STALL.set(true); next_block_and( &mut signer_test.running_nodes.btc_regtest_controller, @@ -9015,7 +9011,7 @@ fn tenure_extend_after_2_bad_commits() { info!("------------------------- Miner 1 Extends Tenure B -------------------------"); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); // wait for a tenure extend block from miner 1 to be processed // (miner 2's proposals will be rejected) @@ -9101,7 +9097,7 @@ fn tenure_extend_after_2_bad_commits() { info!("---------------------- Miner 1 Extends Tenure B (again) ---------------------"); - TEST_MINE_STALL.lock().unwrap().replace(false); + TEST_MINE_STALL.set(false); // wait for a tenure extend block from miner 1 to be processed // (miner 2's proposals will be rejected) @@ -9820,7 +9816,7 @@ fn no_reorg_due_to_successive_block_validation_ok() { debug!("Miner 1 mined block N: {block_n_signature_hash}"); info!("------------------------- Pause Block Validation Response of N+1 -------------------------"); - TEST_VALIDATE_STALL.lock().unwrap().replace(true); + TEST_VALIDATE_STALL.set(true); let proposals_before_2 = rl2_proposals.load(Ordering::SeqCst); let rejections_before_2 = rl2_rejections.load(Ordering::SeqCst); let blocks_before = test_observer::get_blocks().len(); @@ -9955,7 +9951,7 @@ fn no_reorg_due_to_successive_block_validation_ok() { info!("------------------------- Unpause Block Validation Response of N+1 -------------------------"); - TEST_VALIDATE_STALL.lock().unwrap().replace(false); + TEST_VALIDATE_STALL.set(false); // Verify that the node accepted the proposed N+1, sending back a validate ok response wait_for(30, || { From 6fe5d2dfba21b401bfcd50cc5a8f36ced3b0e9a8 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 9 Jan 2025 14:24:24 -0500 Subject: [PATCH 23/35] fix: pause Stacks mining while mining blocks for miner eligibility --- testnet/stacks-node/src/tests/signer/v0.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index f97fb46781..92cafea5a8 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -1357,6 +1357,8 @@ fn bitcoind_forking_test() { info!("Wait for block off of shallow fork"); + TEST_MINE_STALL.set(true); + // we need to mine some blocks to get back to being considered a frequent miner for i in 0..3 { let current_burn_height = get_chain_info(&signer_test.running_nodes.conf).burn_block_height; @@ -1400,8 +1402,10 @@ fn bitcoind_forking_test() { let post_fork_1_nonce = get_account(&http_origin, &miner_address).nonce; + // We should have forked 1 block (-2 nonces) assert_eq!(post_fork_1_nonce, pre_fork_1_nonce - 2); + TEST_MINE_STALL.set(false); for i in 0..5 { info!("Mining post-fork tenure {} of 5", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30), true); @@ -1434,6 +1438,7 @@ fn bitcoind_forking_test() { info!("Wait for block off of deep fork"); // we need to mine some blocks to get back to being considered a frequent miner + TEST_MINE_STALL.set(true); for i in 0..3 { let current_burn_height = get_chain_info(&signer_test.running_nodes.conf).burn_block_height; info!( @@ -1478,6 +1483,8 @@ fn bitcoind_forking_test() { assert_eq!(post_fork_2_nonce, pre_fork_2_nonce - 4 * 2); + TEST_MINE_STALL.set(false); + for i in 0..5 { info!("Mining post-fork tenure {} of 5", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30), true); From 911560ca5d1519c1efae20399f1ef97e1aee5a72 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 9 Jan 2025 16:01:44 -0500 Subject: [PATCH 24/35] test: add wait to ensure tip has advanced --- .../stacks-node/src/tests/nakamoto_integrations.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 48a1bf507c..80d2a819bc 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -5105,6 +5105,17 @@ fn forked_tenure_is_ignored() { .unwrap(); info!("Tenure C produced a block!"); + wait_for(30, || { + let block_tenure_c = + NakamotoChainState::get_canonical_block_header(chainstate.db(), &sortdb) + .unwrap() + .unwrap(); + let blocks = test_observer::get_mined_nakamoto_blocks(); + let block_c = blocks.last().unwrap(); + Ok(block_tenure_c.index_block_hash().to_string() == block_c.block_id) + }) + .expect("Failed to wait for block processing"); + let block_tenure_c = NakamotoChainState::get_canonical_block_header(chainstate.db(), &sortdb) .unwrap() .unwrap(); From 1c3109079d555049a4c53ca6c204f0fe07a29127 Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Thu, 9 Jan 2025 16:55:42 -0500 Subject: [PATCH 25/35] test: add new test for tenure extend Also correct name of existing test case. --- .github/workflows/bitcoin-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index eab3b21f75..c6ff87d4f5 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -125,6 +125,7 @@ jobs: - tests::signer::v0::continue_after_tenure_extend - tests::signer::v0::tenure_extend_after_idle_signers - tests::signer::v0::tenure_extend_after_idle_miner + - tests::signer::v0::tenure_extend_after_failed_miner - tests::signer::v0::tenure_extend_succeeds_after_rejected_attempt - tests::signer::v0::stx_transfers_dont_effect_idle_timeout - tests::signer::v0::idle_tenure_extend_active_mining @@ -155,7 +156,7 @@ jobs: - tests::nakamoto_integrations::sip029_coinbase_change - tests::nakamoto_integrations::clarity_cost_spend_down - tests::nakamoto_integrations::v3_blockbyheight_api_endpoint - - tests::nakamoto_integrations::test_tenure_change_and_extend_from_flashblocks + - tests::nakamoto_integrations::test_tenure_extend_from_flashblocks # TODO: enable these once v1 signer is supported by a new nakamoto epoch # - tests::signer::v1::dkg # - tests::signer::v1::sign_request_rejected From 9de3f8412d8db6bfdf43fb2226ef596170c0437c Mon Sep 17 00:00:00 2001 From: Brice Dobry Date: Mon, 13 Jan 2025 14:33:23 -0500 Subject: [PATCH 26/35] fix: `won_sortition` calculation in relayer --- testnet/stacks-node/src/nakamoto_node/relayer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 6b5f27ade6..edb1c01b96 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -630,8 +630,8 @@ impl RelayerThread { .expect("FATAL: unknown consensus hash"); // always clear this even if this isn't the latest sortition - self.last_commits.remove(&sn.winning_block_txid); - let won_sortition = sn.sortition; // && cleared; + let cleared = self.last_commits.remove(&sn.winning_block_txid); + let won_sortition = sn.sortition && cleared; if won_sortition { increment_stx_blocks_mined_counter(); } From 5590ec08bc8f630c7b15dd9de2e75cf5d5a74859 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 13 Jan 2025 15:55:21 -0500 Subject: [PATCH 27/35] chore: get tenure_extend_after_failed_miner to pass --- libstackerdb/src/libstackerdb.rs | 6 ++ stacks-signer/src/chainstate.rs | 1 + testnet/stacks-node/src/nakamoto_node.rs | 4 ++ .../stacks-node/src/nakamoto_node/miner.rs | 55 +++++++++++++------ .../stacks-node/src/nakamoto_node/relayer.rs | 23 ++++---- .../src/nakamoto_node/signer_coordinator.rs | 39 +++++++++---- testnet/stacks-node/src/neon_node.rs | 4 +- .../src/tests/nakamoto_integrations.rs | 16 +++++- testnet/stacks-node/src/tests/signer/v0.rs | 36 +----------- 9 files changed, 107 insertions(+), 77 deletions(-) diff --git a/libstackerdb/src/libstackerdb.rs b/libstackerdb/src/libstackerdb.rs index 714ef838c4..36d7dd3643 100644 --- a/libstackerdb/src/libstackerdb.rs +++ b/libstackerdb/src/libstackerdb.rs @@ -135,6 +135,12 @@ pub struct StackerDBChunkAckData { pub code: Option, } +impl fmt::Display for StackerDBChunkAckData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + impl SlotMetadata { /// Make a new unsigned slot metadata pub fn new_unsigned( diff --git a/stacks-signer/src/chainstate.rs b/stacks-signer/src/chainstate.rs index fbd57afdc8..d8e4bcf81f 100644 --- a/stacks-signer/src/chainstate.rs +++ b/stacks-signer/src/chainstate.rs @@ -202,6 +202,7 @@ impl SortitionsView { info!( "Current miner timed out, marking as invalid."; "block_height" => block.header.chain_length, + "block_proposal_timeout" => ?self.config.block_proposal_timeout, "current_sortition_consensus_hash" => ?self.cur_sortition.consensus_hash, ); self.cur_sortition.miner_status = SortitionMinerStatus::InvalidatedBeforeFirstBlock; diff --git a/testnet/stacks-node/src/nakamoto_node.rs b/testnet/stacks-node/src/nakamoto_node.rs index 09f8c7285f..c49e0bbc73 100644 --- a/testnet/stacks-node/src/nakamoto_node.rs +++ b/testnet/stacks-node/src/nakamoto_node.rs @@ -23,6 +23,7 @@ use stacks::burnchains::{BurnchainSigner, Txid}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::BlockSnapshot; use stacks::chainstate::stacks::Error as ChainstateError; +use stacks::libstackerdb::StackerDBChunkAckData; use stacks::monitoring; use stacks::monitoring::update_active_miners_count_gauge; use stacks::net::atlas::AtlasConfig; @@ -130,6 +131,9 @@ pub enum Error { /// An error occurred while operating as the signing coordinator #[error("An error occurred while operating as the signing coordinator: {0}")] SigningCoordinatorFailure(String), + /// An error occurred on StackerDB post + #[error("An error occurred while uploading data to StackerDB: {0}")] + StackerDBUploadError(StackerDBChunkAckData), // The thread that we tried to send to has closed #[error("The thread that we tried to send to has closed")] ChannelClosed, diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 1b2bf7f6cd..475b132655 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -40,6 +40,7 @@ use stacks::chainstate::stacks::{ TenureChangeCause, TenureChangePayload, TransactionAnchorMode, TransactionPayload, TransactionVersion, }; +use stacks::net::api::poststackerdbchunk::StackerDBErrorCodes; use stacks::net::p2p::NetworkHandle; use stacks::net::stackerdb::StackerDBs; use stacks::net::{NakamotoBlocksData, StacksMessageType}; @@ -367,6 +368,29 @@ impl BlockMinerThread { } } + /// Pause the miner thread and retry to mine + fn pause_and_retry( + &self, + new_block: &NakamotoBlock, + last_block_rejected: &mut bool, + e: NakamotoNodeError, + ) { + // Sleep for a bit to allow signers to catch up + let pause_ms = if *last_block_rejected { + self.config.miner.subsequent_rejection_pause_ms + } else { + self.config.miner.first_rejection_pause_ms + }; + + error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + thread::sleep(Duration::from_millis(pause_ms)); + *last_block_rejected = true; + } + /// The main loop for the miner thread. This is where the miner will mine /// blocks and then attempt to sign and broadcast them. fn miner_main_loop( @@ -469,21 +493,20 @@ impl BlockMinerThread { ); return Err(e); } + NakamotoNodeError::StackerDBUploadError(ref ack) => { + if ack.code == Some(StackerDBErrorCodes::BadSigner.code()) { + error!("Error while gathering signatures: failed to upload miner StackerDB data: {ack:?}. Giving up."; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + return Err(e); + } + self.pause_and_retry(&new_block, last_block_rejected, e); + return Ok(()); + } _ => { - // Sleep for a bit to allow signers to catch up - let pause_ms = if *last_block_rejected { - self.config.miner.subsequent_rejection_pause_ms - } else { - self.config.miner.first_rejection_pause_ms - }; - - error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); - thread::sleep(Duration::from_millis(pause_ms)); - *last_block_rejected = true; + self.pause_and_retry(&new_block, last_block_rejected, e); return Ok(()); } }, @@ -507,8 +530,7 @@ impl BlockMinerThread { // update mined-block counters and mined-tenure counters self.globals.counters.bump_naka_mined_blocks(); - if self.last_block_mined.is_some() { - // TODO: reviewers: should this be .is_none()? + if self.last_block_mined.is_none() { // this is the first block of the tenure, bump tenure counter self.globals.counters.bump_naka_mined_tenures(); } @@ -778,7 +800,6 @@ impl BlockMinerThread { &mut miners_session, &self.burn_election_block.consensus_hash, ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure) } /// Get the coinbase recipient address, if set in the config and if allowed in this epoch diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 6b5f27ade6..f460062fd4 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -19,10 +19,11 @@ use std::fs; use std::io::Read; use std::sync::mpsc::{Receiver, RecvTimeoutError}; #[cfg(test)] -use std::sync::LazyLock; use std::thread::JoinHandle; use std::time::{Duration, Instant}; +use lazy_static::lazy_static; +use rand::{thread_rng, Rng}; use stacks::burnchains::{Burnchain, Txid}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::operations::leader_block_commit::{ @@ -70,14 +71,14 @@ use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; use crate::BitcoinRegtestController; -/// Mutex to stall the relayer thread right before it creates a miner thread. #[cfg(test)] -pub static TEST_MINER_THREAD_STALL: LazyLock> = LazyLock::new(TestFlag::default); +lazy_static! { + /// Mutex to stall the relayer thread right before it creates a miner thread. + pub static ref TEST_MINER_THREAD_STALL: TestFlag = TestFlag::default(); -/// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) -#[cfg(test)] -pub static TEST_MINER_THREAD_START_STALL: LazyLock> = - LazyLock::new(TestFlag::default); + /// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) + pub static ref TEST_MINER_THREAD_START_STALL: TestFlag = TestFlag::default(); +} /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] @@ -630,8 +631,8 @@ impl RelayerThread { .expect("FATAL: unknown consensus hash"); // always clear this even if this isn't the latest sortition - self.last_commits.remove(&sn.winning_block_txid); - let won_sortition = sn.sortition; // && cleared; + let cleared = self.last_commits.remove(&sn.winning_block_txid); + let won_sortition = sn.sortition && cleared; if won_sortition { increment_stx_blocks_mined_counter(); } @@ -1052,8 +1053,10 @@ impl RelayerThread { debug!("Relayer: starting new tenure thread"); + let rand_id = thread_rng().gen::(); + let new_miner_handle = std::thread::Builder::new() - .name(format!("miner.{parent_tenure_start}",)) + .name(format!("miner.{parent_tenure_start}.{rand_id}",)) .stack_size(BLOCK_PROCESSOR_STACK_SIZE) .spawn(move || { Self::fault_injection_stall_miner_thread_startup(); diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 8927df484a..7e11adfc27 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -136,18 +136,26 @@ impl SignerCoordinator { is_mainnet: bool, miners_session: &mut StackerDBSession, election_sortition: &ConsensusHash, - ) -> Result<(), String> { + ) -> Result<(), NakamotoNodeError> { let Some(slot_range) = NakamotoChainState::get_miner_slot(sortdb, tip, election_sortition) - .map_err(|e| format!("Failed to read miner slot information: {e:?}"))? + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to read miner slot information: {e:?}" + )) + })? else { - return Err("No slot for miner".into()); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "No slot for miner".into(), + )); }; let slot_id = slot_range .start .saturating_add(miner_slot_id.to_u8().into()); if !slot_range.contains(&slot_id) { - return Err("Not enough slots for miner messages".into()); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Not enough slots for miner messages".into(), + )); } // Get the LAST slot version number written to the DB. If not found, use 0. // Add 1 to get the NEXT version number @@ -155,13 +163,19 @@ impl SignerCoordinator { let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); let slot_version = stackerdbs .get_slot_version(&miners_contract_id, slot_id) - .map_err(|e| format!("Failed to read slot version: {e:?}"))? + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to read slot version: {e:?}" + )) + })? .unwrap_or(0) .saturating_add(1); let mut chunk = StackerDBChunkData::new(slot_id, slot_version, message.serialize_to_vec()); - chunk - .sign(miner_sk) - .map_err(|_| "Failed to sign StackerDB chunk")?; + chunk.sign(miner_sk).map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to sign StackerDB chunk: {e:?}" + )) + })?; match miners_session.put_chunk(&chunk) { Ok(ack) => { @@ -169,10 +183,12 @@ impl SignerCoordinator { debug!("Wrote message to stackerdb: {ack:?}"); Ok(()) } else { - Err(format!("{ack:?}")) + Err(NakamotoNodeError::StackerDBUploadError(ack)) } } - Err(e) => Err(format!("{e:?}")), + Err(e) => Err(NakamotoNodeError::SigningCoordinatorFailure(format!( + "{e:?}" + ))), } } @@ -227,8 +243,7 @@ impl SignerCoordinator { self.is_mainnet, &mut self.miners_session, election_sortition, - ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure)?; + )?; counters.bump_naka_proposed_blocks(); #[cfg(test)] diff --git a/testnet/stacks-node/src/neon_node.rs b/testnet/stacks-node/src/neon_node.rs index 2d4dc7fadd..070837997d 100644 --- a/testnet/stacks-node/src/neon_node.rs +++ b/testnet/stacks-node/src/neon_node.rs @@ -2376,7 +2376,7 @@ impl BlockMinerThread { ) .map_err(|e| { warn!("Failed to write mock proposal to stackerdb."); - e + e.to_string() })?; // Retrieve any MockSignatures from stackerdb @@ -2404,7 +2404,7 @@ impl BlockMinerThread { ) .map_err(|e| { warn!("Failed to write mock block to stackerdb."); - e + e.to_string() })?; Ok(()) } diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 18e21ef1b3..94b78b229e 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -711,9 +711,8 @@ pub fn next_block_and_mine_commit( ) } -/// Mine a bitcoin block, and wait until: -/// (1) 2 block commits have been issued ** or ** more than 10 seconds have -/// passed since (1) occurred +/// Mine a bitcoin block, and wait until a block-commit has been issued, **or** a timeout occurs +/// (timeout_secs) pub fn next_block_and_commits_only( btc_controller: &mut BitcoinRegtestController, timeout_secs: u64, @@ -10548,7 +10547,9 @@ fn clarity_cost_spend_down() { /// Miner wins sortition at Bitcoin height N /// Relayer processes sortition N /// Miner wins sortition at Bitcoin height N+1 +/// Transactions that depend on the burn view get submitted to the mempool /// A flash block at height N+2 happens before the miner can publish its block-found for N+1 +/// The miner mines these transactions with a burn view for height N+2 /// Result: the miner issues a tenure-extend from N+1 with burn view for N+2 #[test] #[ignore] @@ -10622,6 +10623,7 @@ fn test_tenure_extend_from_flashblocks() { (if (is-eq u0 (mod burn-block-height u2)) (var-set my-counter (+ u1 (var-get my-counter))) (var-set my-counter (+ u2 (var-get my-counter)))) + (print burn-block-height) (ok 1) ) ) @@ -10837,6 +10839,14 @@ fn test_tenure_extend_from_flashblocks() { }) .unwrap(); + // transactions are all mined, and all reflect the flash block's burn view + let mut blocks = test_observer::get_blocks(); + blocks.sort_by_key(|block| block["block_height"].as_u64().unwrap()); + + for block in blocks.iter() { + eprintln!("block: {:#?}", &block); + } + // boot a follower. it should reach the chain tip info!("----- BEGIN FOLLOWR BOOTUP ------"); diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 758b514bd2..062d334dbd 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -8080,7 +8080,8 @@ fn tenure_extend_after_failed_miner() { info!("------------------------- Miner 1 Extends Tenure A -------------------------"); - // Re-enable block mining + // Re-enable block mining, for both miners. + // Since miner B has been offline, it won't be able to mine. TEST_MINE_STALL.set(false); // wait for a tenure extend block from miner 1 to be processed @@ -8136,38 +8137,6 @@ fn tenure_extend_after_failed_miner() { }) .expect("Timed out waiting for block to be mined and processed"); - // Re-enable block commits for miner 2 - let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); - rl2_skip_commit_op.set(true); - - // Wait for block commit from miner 2 - wait_for(30, || { - Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) - }) - .expect("Timed out waiting for block commit from miner 2"); - - info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); - - let stacks_height_before = signer_test - .stacks_client - .get_peer_info() - .expect("Failed to get peer info") - .stacks_tip_height; - - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 60, - || { - let stacks_height = signer_test - .stacks_client - .get_peer_info() - .expect("Failed to get peer info") - .stacks_tip_height; - Ok(stacks_height > stacks_height_before) - }, - ) - .expect("Timed out waiting for final block to be mined and processed"); - info!("------------------------- Shutdown -------------------------"); rl2_coord_channels .lock() @@ -8365,6 +8334,7 @@ fn tenure_extend_after_bad_commit() { }; info!("------------------------- Pause Miner 1's Block Commit -------------------------"); + // Make sure miner 1 doesn't submit any further block commits for the next tenure BEFORE mining the bitcoin block rl1_skip_commit_op.set(true); From 27519c3d95c4c9fd7d9c1e0e48b9304350f431d2 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 13 Jan 2025 16:45:18 -0500 Subject: [PATCH 28/35] chore: expand test_tenure_extend_from_flashblocks to check that all burn view-sensitive transactions get mined, and that a tenure extend happens --- .../src/tests/nakamoto_integrations.rs | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 94b78b229e..1803dffa25 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -10801,7 +10801,7 @@ fn test_tenure_extend_from_flashblocks() { &[], ); let txid = submit_tx(&http_origin, &contract_tx); - sent_txids.push(txid); + sent_txids.push(format!("0x{}", &txid.to_string())); accounts_before.push(account); } @@ -10839,12 +10839,40 @@ fn test_tenure_extend_from_flashblocks() { }) .unwrap(); - // transactions are all mined, and all reflect the flash block's burn view + // transactions are all mined, and all reflect the flash block's burn view. + // we had a tenure-extend as well. let mut blocks = test_observer::get_blocks(); blocks.sort_by_key(|block| block["block_height"].as_u64().unwrap()); + let mut included_txids = HashSet::new(); + let mut has_extend = false; for block in blocks.iter() { - eprintln!("block: {:#?}", &block); + for tx in block.get("transactions").unwrap().as_array().unwrap() { + let txid_str = tx.get("txid").unwrap().as_str().unwrap().to_string(); + included_txids.insert(txid_str); + + let raw_tx = tx.get("raw_tx").unwrap().as_str().unwrap(); + if raw_tx == "0x00" { + continue; + } + let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); + let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); + + if let TransactionPayload::TenureChange(payload) = &parsed.payload { + if payload.cause == TenureChangeCause::Extended { + has_extend = true; + } + } + } + } + + assert!(has_extend); + + let expected_txids: HashSet<_> = sent_txids.clone().into_iter().collect(); + for expected_txid in expected_txids.iter() { + if !included_txids.contains(expected_txid) { + panic!("Missing {}", expected_txid); + } } // boot a follower. it should reach the chain tip From 17d6edc58fe3da3923eff90c07212df77088cfa5 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 13 Jan 2025 17:52:40 -0500 Subject: [PATCH 29/35] fix: build issue; fix relayer to always start a new tenure if the current sortition was won by the node's miner (even if continuing the prior tenure is possible) --- .../stacks-node/src/nakamoto_node/miner.rs | 3 ++- .../stacks-node/src/nakamoto_node/relayer.rs | 11 ++++++++- .../src/tests/nakamoto_integrations.rs | 24 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 475b132655..1608541aed 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -1016,7 +1016,8 @@ impl BlockMinerThread { // TODO: shouldn't this be self.burn_block.sortition_hash? self.keychain.generate_proof( self.registered_key.target_block_height, - self.burn_election_block.sortition_hash.as_bytes(), + // self.burn_election_block.sortition_hash.as_bytes(), + self.burn_block.sortition_hash.as_bytes(), ) }; diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index f460062fd4..d91589716c 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -18,10 +18,10 @@ use std::collections::HashSet; use std::fs; use std::io::Read; use std::sync::mpsc::{Receiver, RecvTimeoutError}; -#[cfg(test)] use std::thread::JoinHandle; use std::time::{Duration, Instant}; +#[cfg(test)] use lazy_static::lazy_static; use rand::{thread_rng, Rng}; use stacks::burnchains::{Burnchain, Txid}; @@ -1163,9 +1163,13 @@ impl RelayerThread { let won_ongoing_tenure_sortition = canonical_stacks_snapshot.miner_pk_hash == Some(mining_pkh); + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + let won_current_tip = sort_tip.miner_pk_hash == Some(mining_pkh); + info!( "Relayer: Checking for tenure continuation."; "won_ongoing_tenure_sortition" => won_ongoing_tenure_sortition, + "won_current_tip" => won_current_tip, "current_mining_pkh" => %mining_pkh, "canonical_stacks_tip_id" => %canonical_stacks_tip, "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, @@ -1178,6 +1182,11 @@ impl RelayerThread { return Ok(None); } + if won_current_tip { + info!("Relayer: Won current sortition, so no need to continue tenure. Just start a new one."); + return Ok(None); + } + Ok(Some(canonical_stacks_snapshot)) } diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 1803dffa25..c314f386ed 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -10875,6 +10875,30 @@ fn test_tenure_extend_from_flashblocks() { } } + // mine one additional tenure, to verify that we're on track + let commits_before = commits_submitted.load(Ordering::SeqCst); + let node_info_before = get_chain_info_opt(&naka_conf).unwrap(); + + btc_regtest_controller.bootstrap_chain(1); + + wait_for(20, || { + Ok(commits_submitted.load(Ordering::SeqCst) > commits_before) + }) + .unwrap(); + + // there was a sortition winner + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + assert!(sort_tip.sortition); + + wait_for(20, || { + let node_info = get_chain_info_opt(&naka_conf).unwrap(); + Ok( + node_info.burn_block_height > node_info_before.burn_block_height + && node_info.stacks_tip_height > node_info_before.stacks_tip_height, + ) + }) + .unwrap(); + // boot a follower. it should reach the chain tip info!("----- BEGIN FOLLOWR BOOTUP ------"); From 99d3eff7c8f147a58bd26fd0b03909ba3f316788 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 13 Jan 2025 17:55:03 -0500 Subject: [PATCH 30/35] test: change VRF proof calculation to test a comment from @obycode --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 1608541aed..8f4e8b4a9f 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -1013,10 +1013,8 @@ impl BlockMinerThread { self.burn_election_block.sortition_hash.as_bytes(), ) } else { - // TODO: shouldn't this be self.burn_block.sortition_hash? self.keychain.generate_proof( self.registered_key.target_block_height, - // self.burn_election_block.sortition_hash.as_bytes(), self.burn_block.sortition_hash.as_bytes(), ) }; From 262ee7db0661754595f84bb32dff2ee6355ec42c Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 13 Jan 2025 23:21:54 -0500 Subject: [PATCH 31/35] chore: revert to LazyStatic --- testnet/stacks-node/src/nakamoto_node/relayer.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index d91589716c..f77991798e 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -18,11 +18,11 @@ use std::collections::HashSet; use std::fs; use std::io::Read; use std::sync::mpsc::{Receiver, RecvTimeoutError}; +#[cfg(test)] +use std::sync::LazyLock; use std::thread::JoinHandle; use std::time::{Duration, Instant}; -#[cfg(test)] -use lazy_static::lazy_static; use rand::{thread_rng, Rng}; use stacks::burnchains::{Burnchain, Txid}; use stacks::chainstate::burn::db::sortdb::SortitionDB; @@ -72,13 +72,13 @@ use crate::run_loop::RegisteredKey; use crate::BitcoinRegtestController; #[cfg(test)] -lazy_static! { - /// Mutex to stall the relayer thread right before it creates a miner thread. - pub static ref TEST_MINER_THREAD_STALL: TestFlag = TestFlag::default(); +/// Mutex to stall the relayer thread right before it creates a miner thread. +pub static TEST_MINER_THREAD_STALL: LazyLock> = LazyLock::new(TestFlag::default); - /// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) - pub static ref TEST_MINER_THREAD_START_STALL: TestFlag = TestFlag::default(); -} +#[cfg(test)] +/// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) +pub static TEST_MINER_THREAD_START_STALL: LazyLock> = + LazyLock::new(TestFlag::default); /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] From 62c9f1311768162f01af37863d9d373d7b12ca96 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 14 Jan 2025 23:32:08 -0500 Subject: [PATCH 32/35] chore: add docstrings, and (to test) disable the check_burn_view_change() function --- .../stacks-node/src/nakamoto_node/miner.rs | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 8f4e8b4a9f..04a241aa2c 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -78,8 +78,14 @@ const ABORT_TRY_AGAIN_MS: u64 = 200; pub enum MinerDirective { /// The miner won sortition so they should begin a new tenure BeginTenure { + /// This is the block ID of the first block in the parent tenure parent_tenure_start: StacksBlockId, + /// This is the snapshot that this miner won, and will produce a tenure for burnchain_tip: BlockSnapshot, + /// This is `true` if the snapshot above is known not to be the the latest burnchain tip, + /// but an ancestor of it (for example, the burnchain tip could be an empty flash block, but the + /// miner may nevertheless need to produce a Stacks block with a BlockFound tenure-change + /// transaction for the tenure began by winning `burnchain_tip`'s sortition). late: bool, }, /// The miner should try to continue their tenure if they are the active miner @@ -110,7 +116,17 @@ struct ParentStacksBlockInfo { #[derive(PartialEq, Clone, Debug)] pub enum MinerReason { /// The miner thread was spawned to begin a new tenure - BlockFound { late: bool }, + BlockFound { + /// `late` indicates whether or not the tenure that is about to be started corresponds to + /// an ancestor of the canonical tip. This can happen if this miner won the highest + /// sortition, but that sortition's snapshot is not the canonical tip (e.g. the canonical + /// tip may have no sortition, but its parent (or Nth ancestor) would have had a sortition + /// that this miner won, and it would be the latest non-empty sortition ancestor of the + /// tip). This indication is important because the miner would issue a BlockFound + /// tenure-change, and then issue an Extended tenure-change right afterwards in order to + /// update the burnchain view exposed to Clarity for the highest sortition. + late: bool + }, /// The miner thread was spawned to extend an existing tenure Extended { /// Current consensus hash on the underlying burnchain. Corresponds to the last-seen @@ -1015,7 +1031,7 @@ impl BlockMinerThread { } else { self.keychain.generate_proof( self.registered_key.target_block_height, - self.burn_block.sortition_hash.as_bytes(), + self.burn_election_block.sortition_hash.as_bytes(), ) }; @@ -1372,7 +1388,7 @@ impl BlockMinerThread { // ongoing tenure is not an ancestor of the given burn view, so it must have // advanced (or forked) relative to the given burn view. Either way, this burn // view has changed. - info!("Nakamoto chainstate burn view has changed from miner burn view"; + info!("Nakamoto chainstate burn view has advanced from miner burn view"; "nakamoto_burn_view" => %ongoing_tenure_id.burn_view_consensus_hash, "miner_burn_view" => %burn_view.consensus_hash); @@ -1390,8 +1406,6 @@ impl BlockMinerThread { sortdb: &SortitionDB, chain_state: &mut StacksChainState, ) -> Result<(), NakamotoNodeError> { - Self::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; - if let MinerReason::BlockFound { late } = &self.reason { if *late && self.last_block_mined.is_none() { // this is a late BlockFound tenure change that ought to be appended to the Stacks From 618c3a0879d624caa246d8f90a6f1f4e3823e7b7 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Wed, 15 Jan 2025 00:15:45 -0500 Subject: [PATCH 33/35] chore: cargo fmt --- testnet/stacks-node/src/nakamoto_node/miner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 04a241aa2c..eef91265f1 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -80,7 +80,7 @@ pub enum MinerDirective { BeginTenure { /// This is the block ID of the first block in the parent tenure parent_tenure_start: StacksBlockId, - /// This is the snapshot that this miner won, and will produce a tenure for + /// This is the snapshot that this miner won, and will produce a tenure for burnchain_tip: BlockSnapshot, /// This is `true` if the snapshot above is known not to be the the latest burnchain tip, /// but an ancestor of it (for example, the burnchain tip could be an empty flash block, but the @@ -125,7 +125,7 @@ pub enum MinerReason { /// tip). This indication is important because the miner would issue a BlockFound /// tenure-change, and then issue an Extended tenure-change right afterwards in order to /// update the burnchain view exposed to Clarity for the highest sortition. - late: bool + late: bool, }, /// The miner thread was spawned to extend an existing tenure Extended { From fa823b15abb4ec19d4e124d5ca5747ab6b73216d Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Wed, 15 Jan 2025 00:19:15 -0500 Subject: [PATCH 34/35] test: disable check_burn_view_changed() --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 ++ testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index eef91265f1..872138c8c4 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -1406,6 +1406,8 @@ impl BlockMinerThread { sortdb: &SortitionDB, chain_state: &mut StacksChainState, ) -> Result<(), NakamotoNodeError> { + // BlockMinerThread::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; + if let MinerReason::BlockFound { late } = &self.reason { if *late && self.last_block_mined.is_none() { // this is a late BlockFound tenure change that ought to be appended to the Stacks diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f26ed35aea..36a02e0d6f 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -371,10 +371,12 @@ impl SignerCoordinator { chain_state: &mut StacksChainState, burn_block: &BlockSnapshot, ) -> bool { + /* if BlockMinerThread::check_burn_view_changed(sortdb, chain_state, burn_block).is_err() { // can't continue mining -- burn view changed, or a DB error occurred return true; } + */ let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); From 8e9303aac28c16c4456990a06aad3ea784d00bd5 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Wed, 15 Jan 2025 00:47:43 -0500 Subject: [PATCH 35/35] fix: remove compile warnings that prevent CI from running --- testnet/stacks-node/src/nakamoto_node/miner.rs | 2 +- testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 872138c8c4..01d6e494bb 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -1404,7 +1404,7 @@ impl BlockMinerThread { fn check_burn_tip_changed( &self, sortdb: &SortitionDB, - chain_state: &mut StacksChainState, + _chain_state: &mut StacksChainState, ) -> Result<(), NakamotoNodeError> { // BlockMinerThread::check_burn_view_changed(sortdb, chain_state, &self.burn_block)?; diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 36a02e0d6f..a6b9c2c41a 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -37,7 +37,7 @@ use stacks::util_lib::boot::boot_code_id; use super::stackerdb_listener::StackerDBListenerComms; use super::Error as NakamotoNodeError; use crate::event_dispatcher::StackerDBChannel; -use crate::nakamoto_node::miner::BlockMinerThread; +// use crate::nakamoto_node::miner::BlockMinerThread; use crate::nakamoto_node::stackerdb_listener::{StackerDBListener, EVENT_RECEIVER_POLL}; use crate::neon::Counters; use crate::Config; @@ -368,7 +368,7 @@ impl SignerCoordinator { /// Check if the tenure needs to change fn check_burn_tip_changed( sortdb: &SortitionDB, - chain_state: &mut StacksChainState, + _chain_state: &mut StacksChainState, burn_block: &BlockSnapshot, ) -> bool { /*