From a32af0c57241462996b9c51f31bfe9fec4041384 Mon Sep 17 00:00:00 2001 From: Etan Kissling Date: Wed, 7 Feb 2024 17:52:08 +0100 Subject: [PATCH] fix checkpoint block potentially not getting backfilled into DB When using checkpoint sync, only checkpoint state is available, block is not downloaded and backfilled later. `dag.backfill` tracks latest filled `slot`, and latest `parent_root` for which no block has been synced yet. In checkpoint sync, this assumption is broken, because there, the start `dag.backfill.slot` is set based on checkpoint state slot, and the block is also not available. However, sync manager in backward mode also requests `dag.backfill.slot` and `block_clearance` then backfills the checkpoint block once it is synced. But, there is no guarantee that a peer ever sends us that block. They could send us all parent blocks and solely omit the checkpoint block itself. In that situation, we would accept the parent blocks and advance `dag.backfill`, and subsequently never request the checkpoint block again, resulting in gap inside blocks DB that is never filled. To mitigate that, the assumption is restored that `dag.backfill.slot` is the latest filled `slot`, and `dag.backfill.parent_root` is the next block that needs to be synced. By setting `slot` to `tail.slot + 1` and `parent_root` to `tail.root`, we put a fake summary into `dag.backfill` so that `block_clearance` only proceeds once checkpoint block exists. --- AllTests-mainnet.md | 9 +- beacon_chain/beacon_chain_db.nim | 3 + .../block_clearance.nim | 24 +++-- .../block_pools_types.nim | 6 ++ .../consensus_object_pools/blockchain_dag.nim | 21 +++-- .../blockchain_dag_light_client.nim | 39 ++++---- beacon_chain/nimbus_beacon_node.nim | 5 +- beacon_chain/trusted_node_sync.nim | 4 +- tests/test_blockchain_dag.nim | 91 ++++++++++++++++--- 9 files changed, 144 insertions(+), 58 deletions(-) diff --git a/AllTests-mainnet.md b/AllTests-mainnet.md index 4357e69d7b..535866412a 100644 --- a/AllTests-mainnet.md +++ b/AllTests-mainnet.md @@ -23,11 +23,12 @@ OK: 1/1 Fail: 0/1 Skip: 0/1 OK: 12/12 Fail: 0/12 Skip: 0/12 ## Backfill ```diff ++ Backfill to genesis OK + Init without genesis / block OK -+ backfill to genesis OK -+ reload backfill position OK ++ Reload backfill position OK ++ Restart after each block OK ``` -OK: 3/3 Fail: 0/3 Skip: 0/3 +OK: 4/4 Fail: 0/4 Skip: 0/4 ## Beacon chain DB [Preset: mainnet] ```diff + empty database [Preset: mainnet] OK @@ -986,4 +987,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2 OK: 9/9 Fail: 0/9 Skip: 0/9 ---TOTAL--- -OK: 667/672 Fail: 0/672 Skip: 5/672 +OK: 668/673 Fail: 0/673 Skip: 5/673 diff --git a/beacon_chain/beacon_chain_db.nim b/beacon_chain/beacon_chain_db.nim index 85ac99ab83..ddb531bc27 100644 --- a/beacon_chain/beacon_chain_db.nim +++ b/beacon_chain/beacon_chain_db.nim @@ -204,6 +204,9 @@ type slot*: Slot parent_root*: Eth2Digest +func shortLog*(v: BeaconBlockSummary): auto = + (v.slot, shortLog(v.parent_root)) + # Subkeys essentially create "tables" within the key-value store by prefixing # each entry with a table id diff --git a/beacon_chain/consensus_object_pools/block_clearance.nim b/beacon_chain/consensus_object_pools/block_clearance.nim index 378a3d263d..196e94ab5b 100644 --- a/beacon_chain/consensus_object_pools/block_clearance.nim +++ b/beacon_chain/consensus_object_pools/block_clearance.nim @@ -349,7 +349,7 @@ proc addBackfillBlock*( blockRoot = shortLog(signedBlock.root) blck = shortLog(signedBlock.message) signature = shortLog(signedBlock.signature) - backfill = (dag.backfill.slot, shortLog(dag.backfill.parent_root)) + backfill = shortLog(dag.backfill) template blck(): untyped = signedBlock.message # shortcuts without copy template blockRoot(): untyped = signedBlock.root @@ -393,18 +393,22 @@ proc addBackfillBlock*( if existing.isSome: if existing.get().bid.slot == blck.slot and existing.get().bid.root == blockRoot: - - # Special case: when starting with only a checkpoint state, we will not - # have the head block data in the database - if dag.getForkedBlock(existing.get().bid).isNone(): + let isDuplicate = dag.containsBlock(existing.get().bid) + if isDuplicate: + debug "Duplicate block" + else: checkSignature() - - debug "Block backfilled (checkpoint)" + debug "Block backfilled (known BlockId)" dag.putBlock(signedBlock.asTrusted()) - return ok() - debug "Duplicate block" - return err(VerifierError.Duplicate) + if blockRoot == dag.backfill.parent_root: + dag.backfill = blck.toBeaconBlockSummary() + + return + if isDuplicate: + err(VerifierError.Duplicate) + else: + ok() # Block is older than finalized, but different from the block in our # canonical history: it must be from an unviable branch diff --git a/beacon_chain/consensus_object_pools/block_pools_types.nim b/beacon_chain/consensus_object_pools/block_pools_types.nim index f8d3850147..0a450325a0 100644 --- a/beacon_chain/consensus_object_pools/block_pools_types.nim +++ b/beacon_chain/consensus_object_pools/block_pools_types.nim @@ -156,6 +156,12 @@ type ## The backfill points to the oldest block with an unbroken ancestry from ## dag.tail - when backfilling, we'll move backwards in time starting ## with the parent of this block until we reach `frontfill`. + ## + ## - `backfill.slot` points to the earliest block that has been synced, + ## or, if no blocks have been synced yet, to `checkpoint_state.slot + 1` + ## which is the earliest slot that may have `parent_root` as ancestor. + ## - `backfill.parent_root` is the latest block that is not yet synced. + ## - Once backfill completes, `backfill.slot` refers to `GENESIS_SLOT`. frontfillBlocks*: seq[Eth2Digest] ## A temporary cache of blocks that we could load from era files, once diff --git a/beacon_chain/consensus_object_pools/blockchain_dag.nim b/beacon_chain/consensus_object_pools/blockchain_dag.nim index 8a92dab87d..170ff61ccd 100644 --- a/beacon_chain/consensus_object_pools/blockchain_dag.nim +++ b/beacon_chain/consensus_object_pools/blockchain_dag.nim @@ -117,7 +117,7 @@ proc updateFrontfillBlocks*(dag: ChainDAGRef) = # era files match the chain if dag.db.db.readOnly: return # TODO abstraction leak - where to put this? - if dag.frontfillBlocks.len == 0 or dag.backfill.slot > 0: + if dag.frontfillBlocks.len == 0 or dag.backfill.slot > GENESIS_SLOT: return info "Writing frontfill index", slots = dag.frontfillBlocks.len @@ -211,6 +211,9 @@ proc containsBlock( cfg: RuntimeConfig, db: BeaconChainDB, slot: Slot, root: Eth2Digest): bool = db.containsBlock(root, cfg.consensusForkAtEpoch(slot.epoch)) +proc containsBlock*(dag: ChainDAGRef, bid: BlockId): bool = + dag.cfg.containsBlock(dag.db, bid.slot, bid.root) + proc getForkedBlock*(db: BeaconChainDB, root: Eth2Digest): Opt[ForkedTrustedSignedBeaconBlock] = # When we only have a digest, we don't know which fork it's from so we try @@ -1179,9 +1182,14 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB, db.getBeaconBlockSummary(backfillRoot).expect( "Backfill block must have a summary: " & $backfillRoot) - else: + elif dag.containsBlock(dag.tail): db.getBeaconBlockSummary(dag.tail.root).expect( "Tail block must have a summary: " & $dag.tail.root) + else: + # Checkpoint sync, checkpoint block unavailable + BeaconBlockSummary( + slot: dag.tail.slot + 1, + parent_root: dag.tail.root) dag.forkDigests = newClone ForkDigests.init( cfg, getStateField(dag.headState, genesis_validators_root)) @@ -1193,8 +1201,9 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB, let finalizedTick = Moment.now() - if dag.backfill.slot > 0: # See if we can frontfill blocks from era files - dag.frontfillBlocks = newSeqOfCap[Eth2Digest](dag.backfill.slot.int) + if dag.backfill.slot > GENESIS_SLOT: # Try frontfill from era files + let backfillSlot = dag.backfill.slot - 1 + dag.frontfillBlocks = newSeqOfCap[Eth2Digest](backfillSlot.int) let historical_roots = getStateField(dag.headState, historical_roots).asSeq() @@ -1207,7 +1216,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB, # blocks from genesis to backfill, if possible. for bid in dag.era.getBlockIds( historical_roots, historical_summaries, Slot(0), Eth2Digest()): - if bid.slot >= dag.backfill.slot: + if bid.slot >= backfillSlot: # If we end up in here, we failed the root comparison just below in # an earlier iteration fatal "Era summaries don't lead up to backfill, database or era files corrupt?", @@ -1256,7 +1265,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB, head = shortLog(dag.head), finalizedHead = shortLog(dag.finalizedHead), tail = shortLog(dag.tail), - backfill = (dag.backfill.slot, shortLog(dag.backfill.parent_root)), + backfill = shortLog(dag.backfill), loadDur = loadTick - startTick, summariesDur = summariesTick - loadTick, diff --git a/beacon_chain/consensus_object_pools/blockchain_dag_light_client.nim b/beacon_chain/consensus_object_pools/blockchain_dag_light_client.nim index 5aaad018a8..af1369fa6d 100644 --- a/beacon_chain/consensus_object_pools/blockchain_dag_light_client.nim +++ b/beacon_chain/consensus_object_pools/blockchain_dag_light_client.nim @@ -28,7 +28,8 @@ proc updateExistingState( ## Wrapper around `updateState` for states expected to exist. let ok = dag.updateState(state, bsi, save, cache) if not ok: - error "State failed to load unexpectedly", bsi, tail = dag.tail.slot + error "State failed to load unexpectedly", + bsi, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags ok @@ -41,7 +42,8 @@ template withUpdatedExistingState( dag.withUpdatedState(stateParam, bsiParam) do: okBody do: - error "State failed to load unexpectedly", bsi, tail = dag.tail.slot + error "State failed to load unexpectedly", + bsi, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags failureBody @@ -49,7 +51,8 @@ proc getExistingBlockIdAtSlot(dag: ChainDAGRef, slot: Slot): Opt[BlockSlotId] = ## Wrapper around `getBlockIdAtSlot` for blocks expected to exist. let bsi = dag.getBlockIdAtSlot(slot) if bsi.isNone: - error "Block failed to load unexpectedly", slot, tail = dag.tail.slot + error "Block failed to load unexpectedly", + slot, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags bsi @@ -57,7 +60,8 @@ proc existingParent(dag: ChainDAGRef, bid: BlockId): Opt[BlockId] = ## Wrapper around `parent` for parents known to exist. let parent = dag.parent(bid) if parent.isNone: - error "Parent failed to load unexpectedly", bid, tail = dag.tail.slot + error "Parent failed to load unexpectedly", + bid, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags parent @@ -66,7 +70,8 @@ proc getExistingForkedBlock( ## Wrapper around `getForkedBlock` for blocks expected to exist. let bdata = dag.getForkedBlock(bid) if bdata.isNone: - error "Block failed to load unexpectedly", bid, tail = dag.tail.slot + error "Block failed to load unexpectedly", + bid, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags bdata @@ -78,7 +83,7 @@ proc existingCurrentSyncCommitteeForPeriod( let syncCommittee = dag.currentSyncCommitteeForPeriod(tmpState, period) if syncCommittee.isNone: error "Current sync committee failed to load unexpectedly", - period, tail = dag.tail.slot + period, tail = dag.tail.slot, backfill = shortLog(dag.backfill) doAssert strictVerification notin dag.updateFlags syncCommittee @@ -360,7 +365,7 @@ proc initLightClientUpdateForPeriod( continue finalizedSlot = finalizedEpoch.start_slot finalizedBsi = - if finalizedSlot >= dag.tail.slot: + if finalizedSlot >= max(dag.tail.slot, dag.backfill.slot): dag.getExistingBlockIdAtSlot(finalizedSlot).valueOr: dag.handleUnexpectedLightClientError(finalizedSlot) res.err() @@ -542,7 +547,7 @@ proc assignLightClientData( when lcDataFork > LightClientDataFork.None: if finalized_slot == forkyObject.finalized_header.beacon.slot: forkyObject.finality_branch = attested_data.finality_branch - elif finalized_slot < dag.tail.slot: + elif finalized_slot < max(dag.tail.slot, dag.backfill.slot): forkyObject.finalized_header.reset() forkyObject.finality_branch.reset() else: @@ -632,13 +637,13 @@ proc createLightClientUpdate( let finalized_slot = attested_data.finalized_slot finalized_bsi = - if finalized_slot >= dag.tail.slot: + if finalized_slot >= max(dag.tail.slot, dag.backfill.slot): dag.getExistingBlockIdAtSlot(finalized_slot) else: Opt.none(BlockSlotId) has_finality = finalized_bsi.isSome and - finalized_bsi.get.bid.slot >= dag.tail.slot + finalized_bsi.get.bid.slot >= max(dag.tail.slot, dag.backfill.slot) meta = LightClientUpdateMetadata( attested_slot: attested_slot, finalized_slot: finalized_slot, @@ -722,19 +727,7 @@ proc initLightClientDataCache*(dag: ChainDAGRef) = # State availability, needed for `cacheLightClientData` dag.tail.slot, # Block availability, needed for `LightClientHeader.execution_branch` - if dag.backfill.slot != GENESIS_SLOT: - let existing = dag.getBlockIdAtSlot(dag.backfill.slot) - if existing.isSome: - if dag.getForkedBlock(existing.get.bid).isNone: - # Special case: when starting with only a checkpoint state, - # we will not have the head block data in the database - dag.backfill.slot + 1 - else: - dag.backfill.slot - else: - dag.backfill.slot - else: - dag.backfill.slot)) + dag.backfill.slot)) dag.lcDataStore.cache.tailSlot = max(dag.head.slot, targetTailSlot) if dag.head.slot < dag.lcDataStore.cache.tailSlot: diff --git a/beacon_chain/nimbus_beacon_node.nim b/beacon_chain/nimbus_beacon_node.nim index 66d2ff8660..bbe4b232ea 100644 --- a/beacon_chain/nimbus_beacon_node.nim +++ b/beacon_chain/nimbus_beacon_node.nim @@ -323,7 +323,10 @@ proc initFullNode( dag.finalizedHead.slot func getBackfillSlot(): Slot = - dag.backfill.slot + if dag.backfill.parent_root != dag.tail.root: + dag.backfill.slot + else: + dag.tail.slot func getFrontfillSlot(): Slot = max(dag.frontfill.get(BlockId()).slot, dag.horizon) diff --git a/beacon_chain/trusted_node_sync.nim b/beacon_chain/trusted_node_sync.nim index ea9220e5fe..27abe734c7 100644 --- a/beacon_chain/trusted_node_sync.nim +++ b/beacon_chain/trusted_node_sync.nim @@ -407,7 +407,7 @@ proc doTrustedNodeSync*( let validatorMonitor = newClone(ValidatorMonitor.init(false, false)) dag = ChainDAGRef.init(cfg, db, validatorMonitor, {}, eraPath = eraDir) - backfillSlot = dag.backfill.slot + backfillSlot = max(dag.backfill.slot, 1.Slot) - 1 horizon = max(dag.horizon, dag.frontfill.valueOr(BlockId()).slot) let canReindex = if backfillSlot <= horizon: @@ -418,7 +418,7 @@ proc doTrustedNodeSync*( # detection to kick in, in addBackfillBlock let missingSlots = dag.backfill.slot - horizon + 1 - notice "Downloading historical blocks - you can interrupt this process at any time and it automatically be completed when you start the beacon node", + notice "Downloading historical blocks - you can interrupt this process at any time and it will automatically be completed when you start the beacon node", backfillSlot, horizon, missingSlots var # Same averaging as SyncManager diff --git a/tests/test_blockchain_dag.nim b/tests/test_blockchain_dag.nim index dde5dab31f..5f0a3a7540 100644 --- a/tests/test_blockchain_dag.nim +++ b/tests/test_blockchain_dag.nim @@ -817,7 +817,7 @@ suite "Backfill": let db = BeaconChainDB.new("", inMemory = true) - test "backfill to genesis": + test "Backfill to genesis": let tailBlock = blocks[^1] genBlock = get_initial_beacon_block(genState[]) @@ -860,15 +860,17 @@ suite "Backfill": dag.getFinalizedEpochRef() != nil - dag.backfill == tailBlock.phase0Data.message.toBeaconBlockSummary() + # Checkpoint block is unavailable, and should be backfileld first + not dag.containsBlock(dag.tail) + dag.backfill == BeaconBlockSummary( + slot: dag.tail.slot + 1, + parent_root: dag.tail.root) # Check that we can propose right from the checkpoint state dag.getProposalState(dag.head, dag.head.slot + 1, cache).isOk() - var - badBlock = blocks[^2].phase0Data - badBlock.signature = blocks[^3].phase0Data.signature - + var badBlock = blocks[^1].phase0Data + badBlock.signature = blocks[^2].phase0Data.signature check: dag.addBackfillBlock(badBlock) == AddBackRes.err VerifierError.Invalid @@ -878,6 +880,8 @@ suite "Backfill": dag.addBackfillBlock(genBlock.phase0Data.asSigned()) == AddBackRes.err VerifierError.MissingParent + dag.addBackfillBlock(blocks[^2].phase0Data) == + AddBackRes.err VerifierError.MissingParent dag.addBackfillBlock(tailBlock.phase0Data).isOk() check: @@ -917,7 +921,10 @@ suite "Backfill": check: dag.getFinalizedEpochRef() != nil - test "reload backfill position": + for i in 0.. 1: + blocks[^(i - 1)].phase0Data.message.toBeaconBlockSummary() + else: + BeaconBlockSummary( + slot: blocks[^1].phase0Data.message.slot + 1, + parent_root: blocks[^1].phase0Data.root)) + + for j in 1..blocks.len: + if j < i: + check dag.addBackfillBlock(blocks[^j].phase0Data) == + AddBackRes.err VerifierError.Duplicate + elif j > i: + check dag.addBackfillBlock(blocks[^j].phase0Data) == + AddBackRes.err VerifierError.MissingParent + else: + discard + + check: + dag.addBackfillBlock(blocks[^i].phase0Data).isOk() + dag.backfill == blocks[^i].phase0Data.message.toBeaconBlockSummary() + + block: + let + validatorMonitor = newClone(ValidatorMonitor.init()) + dag = init(ChainDAGRef, defaultRuntimeConfig, db, validatorMonitor, {}) + genBlock = get_initial_beacon_block(genState[]) + check: + dag.addBackfillBlock(genBlock.phase0Data.asSigned()).isOk() + dag.backfill == default(BeaconBlockSummary) + + let + validatorMonitor = newClone(ValidatorMonitor.init()) + dag = init(ChainDAGRef, defaultRuntimeConfig, db, validatorMonitor, {}) + check dag.backfill == default(BeaconBlockSummary) + suite "Starting states": setup: let @@ -1055,14 +1114,17 @@ suite "Starting states": dag.getFinalizedEpochRef() != nil - dag.backfill == tailBlock.phase0Data.message.toBeaconBlockSummary() + # Checkpoint block is unavailable, and should be backfileld first + not dag.containsBlock(dag.tail) + dag.backfill == BeaconBlockSummary( + slot: dag.tail.slot + 1, + parent_root: dag.tail.root) # Check that we can propose right from the checkpoint state dag.getProposalState(dag.head, dag.head.slot + 1, cache).isOk() - var - badBlock = blocks[^2].phase0Data - badBlock.signature = blocks[^3].phase0Data.signature + var badBlock = blocks[^1].phase0Data + badBlock.signature = blocks[^2].phase0Data.signature check: dag.addBackfillBlock(badBlock) == AddBackRes.err VerifierError.Invalid @@ -1072,7 +1134,9 @@ suite "Starting states": dag.addBackfillBlock(genBlock.phase0Data.asSigned()) == AddBackRes.err VerifierError.MissingParent - dag.addBackfillBlock(tailBlock.phase0Data) == AddBackRes.ok() + dag.addBackfillBlock(blocks[^2].phase0Data) == + AddBackRes.err VerifierError.MissingParent + dag.addBackfillBlock(tailBlock.phase0Data).isOk() check: dag.addBackfillBlock(blocks[^2].phase0Data).isOk() @@ -1084,6 +1148,9 @@ suite "Starting states": dag.getBlockId(blocks[^2].root).get().root == blocks[^2].root dag.getBlockIdAtSlot(dag.tail.slot).get().bid == dag.tail + dag.getBlockIdAtSlot(dag.tail.slot - 1).get() == + blocks[^2].toBlockId().atSlot() + dag.getBlockIdAtSlot(dag.tail.slot - 2).isNone dag.backfill == blocks[^2].phase0Data.message.toBeaconBlockSummary()