Skip to content

Commit

Permalink
Do not crash on block gap in displaced_leaves_after_finalizing (par…
Browse files Browse the repository at this point in the history
…itytech#4997)

After the merge of paritytech#4922 we saw failing zombienet tests with the
following error:
```
2024-07-09 10:30:09 Error applying finality to block (0xb9e1d3d9cb2047fe61667e28a0963e0634a7b29781895bc9ca40c898027b4c09, 56685): UnknownBlock: Header was not found in the database: 0x0000000000000000000000000000000000000000000000000000000000000000    
2024-07-09 10:30:09 GRANDPA voter error: could not complete a round on disk: UnknownBlock: Header was not found in the database: 0x0000000000000000000000000000000000000000000000000000000000000000    
```

[Example](https://gitlab.parity.io/parity/mirrors/polkadot-sdk/-/jobs/6662262)

The crashing situation is warp-sync related. After warp syncing, it can
happen that there are gaps in block ancestry where we don't have the
header. At the same time, the genesis hash is in the set of leaves. In
`displaced_leaves_after_finalizing` we then iterate from the finalized
block backwards until we hit an unknown block, crashing the node.

This PR makes the detection of displaced branches resilient against
unknown block in the finalized block chain.

cc @nazar-pc (github won't let me request a review from you)

---------

Co-authored-by: Bastian Köcher <[email protected]>
Co-authored-by: command-bot <>
  • Loading branch information
2 people authored and Jay Pan committed Dec 27, 2024
1 parent 0eedfd3 commit 2d48c47
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions prdoc/pr_4997.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0
# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json

title: Do not crash on block gap in displaced_leaves_after_finalizing

doc:
- audience:
- Node Operator
- Node Dev
description: |
After recent changes, crashes where occuring when calculating displaced branches after a block was finalized.
The reason are block gaps in the finalized chain. When encountering unknown blocks, the node was panicking.
This PR introduces changes to tolerate unknown blocks. Leafs that are separated by a gap from the to-be-finalized
block are not marked as displaced.

crates:
- name: sc-client-db
bump: none
- name: sp-blockchain
bump: patch
155 changes: 155 additions & 0 deletions substrate/client/db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2613,6 +2613,35 @@ pub(crate) mod tests {
Ok(header.hash())
}

pub fn insert_disconnected_header(
backend: &Backend<Block>,
number: u64,
parent_hash: H256,
extrinsics_root: H256,
best: bool,
) -> H256 {
use sp_runtime::testing::Digest;

let digest = Digest::default();
let header =
Header { number, parent_hash, state_root: Default::default(), digest, extrinsics_root };

let mut op = backend.begin_operation().unwrap();

op.set_block_data(
header.clone(),
Some(vec![]),
None,
None,
if best { NewBlockState::Best } else { NewBlockState::Normal },
)
.unwrap();

backend.commit_operation(op).unwrap();

header.hash()
}

pub fn insert_header_no_head(
backend: &Backend<Block>,
number: u64,
Expand Down Expand Up @@ -3112,6 +3141,123 @@ pub(crate) mod tests {
}
}

#[test]
fn displaced_leaves_after_finalizing_works_with_disconnect() {
// In this test we will create a situation that can typically happen after warp sync.
// The situation looks like this:
// g -> <unimported> -> a3 -> a4
// Basically there is a gap of unimported blocks at some point in the chain.
let backend = Backend::<Block>::new_test(1000, 100);
let blockchain = backend.blockchain();
let genesis_number = 0;
let genesis_hash =
insert_header(&backend, genesis_number, Default::default(), None, Default::default());

let a3_number = 3;
let a3_hash = insert_disconnected_header(
&backend,
a3_number,
H256::from([200; 32]),
H256::from([1; 32]),
true,
);

let a4_number = 4;
let a4_hash =
insert_disconnected_header(&backend, a4_number, a3_hash, H256::from([2; 32]), true);
{
let displaced =
blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]);
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

{
let displaced =
blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]);
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

// Import block a1 which has the genesis block as parent.
// g -> a1 -> <unimported> -> a3(f) -> a4
let a1_number = 1;
let a1_hash = insert_disconnected_header(
&backend,
a1_number,
genesis_hash,
H256::from([123; 32]),
false,
);
{
let displaced =
blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, a1_hash]);
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

// Import block b1 which has the genesis block as parent.
// g -> a1 -> <unimported> -> a3(f) -> a4
// \-> b1
let b1_number = 1;
let b1_hash = insert_disconnected_header(
&backend,
b1_number,
genesis_hash,
H256::from([124; 32]),
false,
);
{
let displaced =
blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, a1_hash, b1_hash]);
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

// If branch of b blocks is higher in number than a branch, we
// should still not prune disconnected leafs.
// g -> a1 -> <unimported> -> a3(f) -> a4
// \-> b1 -> b2 ----------> b3 ----> b4 -> b5
let b2_number = 2;
let b2_hash =
insert_disconnected_header(&backend, b2_number, b1_hash, H256::from([40; 32]), false);
let b3_number = 3;
let b3_hash =
insert_disconnected_header(&backend, b3_number, b2_hash, H256::from([41; 32]), false);
let b4_number = 4;
let b4_hash =
insert_disconnected_header(&backend, b4_number, b3_hash, H256::from([42; 32]), false);
let b5_number = 5;
let b5_hash =
insert_disconnected_header(&backend, b5_number, b4_hash, H256::from([43; 32]), false);
{
let displaced =
blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![b5_hash, a4_hash, a1_hash]);
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

// Even though there is a disconnect, diplace should still detect
// branches above the block gap.
// /-> c4
// g -> a1 -> <unimported> -> a3 -> a4(f)
// \-> b1 -> b2 ----------> b3 -> b4 -> b5
let c4_number = 4;
let c4_hash =
insert_disconnected_header(&backend, c4_number, a3_hash, H256::from([44; 32]), false);
{
let displaced =
blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap();
assert_eq!(blockchain.leaves().unwrap(), vec![b5_hash, a4_hash, c4_hash, a1_hash]);
assert_eq!(displaced.displaced_leaves, vec![(c4_number, c4_hash)]);
assert_eq!(displaced.displaced_blocks, vec![c4_hash]);
}
}
#[test]
fn displaced_leaves_after_finalizing_works() {
let backend = Backend::<Block>::new_test(1000, 100);
Expand Down Expand Up @@ -3156,6 +3302,15 @@ pub(crate) mod tests {
assert_eq!(displaced_a3.displaced_leaves, vec![]);
assert_eq!(displaced_a3.displaced_blocks, vec![]);
}
{
// Finalized block is above leaves and not imported yet.
// We will not be able to make a connection,
// nothing can be marked as displaced.
let displaced =
blockchain.displaced_leaves_after_finalizing(H256::from([57; 32]), 10).unwrap();
assert_eq!(displaced.displaced_leaves, vec![]);
assert_eq!(displaced.displaced_blocks, vec![]);
}

// fork from genesis: 2 prong.
let b1_number = 1;
Expand Down
2 changes: 1 addition & 1 deletion substrate/primitives/blockchain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ targets = ["x86_64-unknown-linux-gnu"]
[dependencies]
codec = { features = ["derive"], workspace = true }
futures = { workspace = true }
log = { workspace = true, default-features = true }
parking_lot = { workspace = true, default-features = true }
schnellru = { workspace = true }
thiserror = { workspace = true }
Expand All @@ -29,3 +28,4 @@ sp-consensus = { workspace = true, default-features = true }
sp-database = { workspace = true, default-features = true }
sp-runtime = { workspace = true, default-features = true }
sp-state-machine = { workspace = true, default-features = true }
tracing = { workspace = true, default-features = true }
62 changes: 49 additions & 13 deletions substrate/primitives/blockchain/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@

//! Substrate blockchain trait
use log::warn;
use parking_lot::RwLock;
use sp_runtime::{
generic::BlockId,
traits::{Block as BlockT, Header as HeaderT, NumberFor, Zero},
Justifications,
};
use std::collections::{btree_set::BTreeSet, HashMap, VecDeque};
use tracing::{debug, warn};

use crate::{
error::{Error, Result},
Expand Down Expand Up @@ -228,6 +228,7 @@ pub trait Backend<Block: BlockT>:
//
// FIXME #1558 only issue this warning when not on a dead fork
warn!(
target: crate::LOG_TARGET,
"Block {:?} exists in chain but not found when following all leaves backwards",
base_hash,
);
Expand All @@ -254,16 +255,35 @@ pub trait Backend<Block: BlockT>:
) -> std::result::Result<DisplacedLeavesAfterFinalization<Block>, Error> {
let leaves = self.leaves()?;

debug!(
target: crate::LOG_TARGET,
?leaves,
%finalized_block_hash,
?finalized_block_number,
"Checking for displaced leaves after finalization."
);

// If we have only one leaf there are no forks, and we can return early.
if finalized_block_number == Zero::zero() || leaves.len() == 1 {
return Ok(DisplacedLeavesAfterFinalization::default())
}

// Store hashes of finalized blocks for quick checking later, the last block if the
// Store hashes of finalized blocks for quick checking later, the last block is the
// finalized one
let mut finalized_chain = VecDeque::new();
finalized_chain
.push_front(MinimalBlockMetadata::from(&self.header_metadata(finalized_block_hash)?));
let current_finalized = match self.header_metadata(finalized_block_hash) {
Ok(metadata) => metadata,
Err(Error::UnknownBlock(_)) => {
debug!(
target: crate::LOG_TARGET,
hash = ?finalized_block_hash,
"Tried to fetch unknown block, block ancestry has gaps."
);
return Ok(DisplacedLeavesAfterFinalization::default());
},
Err(e) => Err(e)?,
};
finalized_chain.push_front(MinimalBlockMetadata::from(&current_finalized));

// Local cache is a performance optimization in case of finalized block deep below the
// tip of the chain with a lot of leaves above finalized block
Expand All @@ -273,6 +293,7 @@ pub trait Backend<Block: BlockT>:
displaced_leaves: Vec::with_capacity(leaves.len()),
displaced_blocks: Vec::with_capacity(leaves.len()),
};

let mut displaced_blocks_candidates = Vec::new();

for leaf_hash in leaves {
Expand Down Expand Up @@ -306,21 +327,34 @@ pub trait Backend<Block: BlockT>:
continue;
}

// Otherwise the whole leaf branch needs to be pruned, track it all the way to the
// point of branching from the finalized chain
result.displaced_leaves.push((leaf_number, leaf_hash));
result.displaced_blocks.extend(displaced_blocks_candidates.drain(..));
result.displaced_blocks.push(current_header_metadata.hash);
// We reuse `displaced_blocks_candidates` to store the current metadata.
// This block is not displaced if there is a gap in the ancestry. We
// check for this gap later.
displaced_blocks_candidates.push(current_header_metadata.hash);

// Collect the rest of the displaced blocks of leaf branch
for distance_from_finalized in 1_u32.. {
// Find block at `distance_from_finalized` from finalized block
let (finalized_chain_block_number, finalized_chain_block_hash) =
match finalized_chain.iter().rev().nth(distance_from_finalized as usize) {
Some(header) => (header.number, header.hash),
None => {
let metadata = MinimalBlockMetadata::from(&self.header_metadata(
finalized_chain.front().expect("Not empty; qed").parent,
)?);
let to_fetch = finalized_chain.front().expect("Not empty; qed");
let metadata = match self.header_metadata(to_fetch.parent) {
Ok(metadata) => metadata,
Err(Error::UnknownBlock(_)) => {
debug!(
target: crate::LOG_TARGET,
distance_from_finalized,
hash = ?to_fetch.parent,
number = ?to_fetch.number,
"Tried to fetch unknown block, block ancestry has gaps."
);
break;
},
Err(e) => Err(e)?,
};
let metadata = MinimalBlockMetadata::from(&metadata);
let result = (metadata.number, metadata.hash);
finalized_chain.push_front(metadata);
result
Expand All @@ -336,11 +370,13 @@ pub trait Backend<Block: BlockT>:
let parent_hash = current_header_metadata.parent;
if finalized_chain_block_hash == parent_hash {
// Reached finalized chain, nothing left to do
result.displaced_blocks.extend(displaced_blocks_candidates.drain(..));
result.displaced_leaves.push((leaf_number, leaf_hash));
break;
}

// Store displaced block and look deeper for block on finalized chain
result.displaced_blocks.push(parent_hash);
displaced_blocks_candidates.push(parent_hash);
current_header_metadata =
MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?);
}
Expand Down
2 changes: 2 additions & 0 deletions substrate/primitives/blockchain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ mod header_metadata;
pub use backend::*;
pub use error::*;
pub use header_metadata::*;

const LOG_TARGET: &str = "db::blockchain";

0 comments on commit 2d48c47

Please sign in to comment.