Skip to content

Commit

Permalink
fix checkpoint block potentially not getting backfilled into DB
Browse files Browse the repository at this point in the history
When using checkpoint sync, only checkpoint state is available, block is
not downloaded and backfilled later.

`dag.backfill` tracks latest filled `slot`, and latest `parent_root` for
which no block has been synced yet.

In checkpoint sync, this assumption is broken, because there, the start
`dag.backfill.slot` is set based on checkpoint state slot, and the block
is also not available.

However, sync manager in backward mode also requests `dag.backfill.slot`
and `block_clearance` then backfills the checkpoint block once it is
synced. But, there is no guarantee that a peer ever sends us that block.
They could send us all parent blocks and solely omit the checkpoint
block itself. In that situation, we would accept the parent blocks and
advance `dag.backfill`, and subsequently never request the checkpoint
block again, resulting in gap inside blocks DB that is never filled.

To mitigate that, the assumption is restored that `dag.backfill.slot`
is the latest filled `slot`, and `dag.backfill.parent_root` is the next
block that needs to be synced. By setting `slot` to `tail.slot + 1` and
`parent_root` to `tail.root`, we put a fake summary into `dag.backfill`
so that `block_clearance` only proceeds once checkpoint block exists.
  • Loading branch information
etan-status committed Feb 7, 2024
1 parent 94ba0a9 commit a32af0c
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 58 deletions.
9 changes: 5 additions & 4 deletions AllTests-mainnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
OK: 12/12 Fail: 0/12 Skip: 0/12
## Backfill
```diff
+ Backfill to genesis OK
+ Init without genesis / block OK
+ backfill to genesis OK
+ reload backfill position OK
+ Reload backfill position OK
+ Restart after each block OK
```
OK: 3/3 Fail: 0/3 Skip: 0/3
OK: 4/4 Fail: 0/4 Skip: 0/4
## Beacon chain DB [Preset: mainnet]
```diff
+ empty database [Preset: mainnet] OK
Expand Down Expand Up @@ -986,4 +987,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9

---TOTAL---
OK: 667/672 Fail: 0/672 Skip: 5/672
OK: 668/673 Fail: 0/673 Skip: 5/673
3 changes: 3 additions & 0 deletions beacon_chain/beacon_chain_db.nim
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ type
slot*: Slot
parent_root*: Eth2Digest

func shortLog*(v: BeaconBlockSummary): auto =
(v.slot, shortLog(v.parent_root))

# Subkeys essentially create "tables" within the key-value store by prefixing
# each entry with a table id

Expand Down
24 changes: 14 additions & 10 deletions beacon_chain/consensus_object_pools/block_clearance.nim
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ proc addBackfillBlock*(
blockRoot = shortLog(signedBlock.root)
blck = shortLog(signedBlock.message)
signature = shortLog(signedBlock.signature)
backfill = (dag.backfill.slot, shortLog(dag.backfill.parent_root))
backfill = shortLog(dag.backfill)

template blck(): untyped = signedBlock.message # shortcuts without copy
template blockRoot(): untyped = signedBlock.root
Expand Down Expand Up @@ -393,18 +393,22 @@ proc addBackfillBlock*(
if existing.isSome:
if existing.get().bid.slot == blck.slot and
existing.get().bid.root == blockRoot:

# Special case: when starting with only a checkpoint state, we will not
# have the head block data in the database
if dag.getForkedBlock(existing.get().bid).isNone():
let isDuplicate = dag.containsBlock(existing.get().bid)
if isDuplicate:
debug "Duplicate block"
else:
checkSignature()

debug "Block backfilled (checkpoint)"
debug "Block backfilled (known BlockId)"
dag.putBlock(signedBlock.asTrusted())
return ok()

debug "Duplicate block"
return err(VerifierError.Duplicate)
if blockRoot == dag.backfill.parent_root:
dag.backfill = blck.toBeaconBlockSummary()

return
if isDuplicate:
err(VerifierError.Duplicate)
else:
ok()

# Block is older than finalized, but different from the block in our
# canonical history: it must be from an unviable branch
Expand Down
6 changes: 6 additions & 0 deletions beacon_chain/consensus_object_pools/block_pools_types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ type
## The backfill points to the oldest block with an unbroken ancestry from
## dag.tail - when backfilling, we'll move backwards in time starting
## with the parent of this block until we reach `frontfill`.
##
## - `backfill.slot` points to the earliest block that has been synced,
## or, if no blocks have been synced yet, to `checkpoint_state.slot + 1`
## which is the earliest slot that may have `parent_root` as ancestor.
## - `backfill.parent_root` is the latest block that is not yet synced.
## - Once backfill completes, `backfill.slot` refers to `GENESIS_SLOT`.

frontfillBlocks*: seq[Eth2Digest]
## A temporary cache of blocks that we could load from era files, once
Expand Down
21 changes: 15 additions & 6 deletions beacon_chain/consensus_object_pools/blockchain_dag.nim
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ proc updateFrontfillBlocks*(dag: ChainDAGRef) =
# era files match the chain
if dag.db.db.readOnly: return # TODO abstraction leak - where to put this?

if dag.frontfillBlocks.len == 0 or dag.backfill.slot > 0:
if dag.frontfillBlocks.len == 0 or dag.backfill.slot > GENESIS_SLOT:
return

info "Writing frontfill index", slots = dag.frontfillBlocks.len
Expand Down Expand Up @@ -211,6 +211,9 @@ proc containsBlock(
cfg: RuntimeConfig, db: BeaconChainDB, slot: Slot, root: Eth2Digest): bool =
db.containsBlock(root, cfg.consensusForkAtEpoch(slot.epoch))

proc containsBlock*(dag: ChainDAGRef, bid: BlockId): bool =
dag.cfg.containsBlock(dag.db, bid.slot, bid.root)

proc getForkedBlock*(db: BeaconChainDB, root: Eth2Digest):
Opt[ForkedTrustedSignedBeaconBlock] =
# When we only have a digest, we don't know which fork it's from so we try
Expand Down Expand Up @@ -1179,9 +1182,14 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,

db.getBeaconBlockSummary(backfillRoot).expect(
"Backfill block must have a summary: " & $backfillRoot)
else:
elif dag.containsBlock(dag.tail):
db.getBeaconBlockSummary(dag.tail.root).expect(
"Tail block must have a summary: " & $dag.tail.root)
else:
# Checkpoint sync, checkpoint block unavailable
BeaconBlockSummary(
slot: dag.tail.slot + 1,
parent_root: dag.tail.root)

dag.forkDigests = newClone ForkDigests.init(
cfg, getStateField(dag.headState, genesis_validators_root))
Expand All @@ -1193,8 +1201,9 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,

let finalizedTick = Moment.now()

if dag.backfill.slot > 0: # See if we can frontfill blocks from era files
dag.frontfillBlocks = newSeqOfCap[Eth2Digest](dag.backfill.slot.int)
if dag.backfill.slot > GENESIS_SLOT: # Try frontfill from era files
let backfillSlot = dag.backfill.slot - 1
dag.frontfillBlocks = newSeqOfCap[Eth2Digest](backfillSlot.int)

let
historical_roots = getStateField(dag.headState, historical_roots).asSeq()
Expand All @@ -1207,7 +1216,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
# blocks from genesis to backfill, if possible.
for bid in dag.era.getBlockIds(
historical_roots, historical_summaries, Slot(0), Eth2Digest()):
if bid.slot >= dag.backfill.slot:
if bid.slot >= backfillSlot:
# If we end up in here, we failed the root comparison just below in
# an earlier iteration
fatal "Era summaries don't lead up to backfill, database or era files corrupt?",
Expand Down Expand Up @@ -1256,7 +1265,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
head = shortLog(dag.head),
finalizedHead = shortLog(dag.finalizedHead),
tail = shortLog(dag.tail),
backfill = (dag.backfill.slot, shortLog(dag.backfill.parent_root)),
backfill = shortLog(dag.backfill),

loadDur = loadTick - startTick,
summariesDur = summariesTick - loadTick,
Expand Down
39 changes: 16 additions & 23 deletions beacon_chain/consensus_object_pools/blockchain_dag_light_client.nim
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ proc updateExistingState(
## Wrapper around `updateState` for states expected to exist.
let ok = dag.updateState(state, bsi, save, cache)
if not ok:
error "State failed to load unexpectedly", bsi, tail = dag.tail.slot
error "State failed to load unexpectedly",
bsi, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
ok

Expand All @@ -41,23 +42,26 @@ template withUpdatedExistingState(
dag.withUpdatedState(stateParam, bsiParam) do:
okBody
do:
error "State failed to load unexpectedly", bsi, tail = dag.tail.slot
error "State failed to load unexpectedly",
bsi, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
failureBody

proc getExistingBlockIdAtSlot(dag: ChainDAGRef, slot: Slot): Opt[BlockSlotId] =
## Wrapper around `getBlockIdAtSlot` for blocks expected to exist.
let bsi = dag.getBlockIdAtSlot(slot)
if bsi.isNone:
error "Block failed to load unexpectedly", slot, tail = dag.tail.slot
error "Block failed to load unexpectedly",
slot, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
bsi

proc existingParent(dag: ChainDAGRef, bid: BlockId): Opt[BlockId] =
## Wrapper around `parent` for parents known to exist.
let parent = dag.parent(bid)
if parent.isNone:
error "Parent failed to load unexpectedly", bid, tail = dag.tail.slot
error "Parent failed to load unexpectedly",
bid, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
parent

Expand All @@ -66,7 +70,8 @@ proc getExistingForkedBlock(
## Wrapper around `getForkedBlock` for blocks expected to exist.
let bdata = dag.getForkedBlock(bid)
if bdata.isNone:
error "Block failed to load unexpectedly", bid, tail = dag.tail.slot
error "Block failed to load unexpectedly",
bid, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
bdata

Expand All @@ -78,7 +83,7 @@ proc existingCurrentSyncCommitteeForPeriod(
let syncCommittee = dag.currentSyncCommitteeForPeriod(tmpState, period)
if syncCommittee.isNone:
error "Current sync committee failed to load unexpectedly",
period, tail = dag.tail.slot
period, tail = dag.tail.slot, backfill = shortLog(dag.backfill)
doAssert strictVerification notin dag.updateFlags
syncCommittee

Expand Down Expand Up @@ -360,7 +365,7 @@ proc initLightClientUpdateForPeriod(
continue
finalizedSlot = finalizedEpoch.start_slot
finalizedBsi =
if finalizedSlot >= dag.tail.slot:
if finalizedSlot >= max(dag.tail.slot, dag.backfill.slot):
dag.getExistingBlockIdAtSlot(finalizedSlot).valueOr:
dag.handleUnexpectedLightClientError(finalizedSlot)
res.err()
Expand Down Expand Up @@ -542,7 +547,7 @@ proc assignLightClientData(
when lcDataFork > LightClientDataFork.None:
if finalized_slot == forkyObject.finalized_header.beacon.slot:
forkyObject.finality_branch = attested_data.finality_branch
elif finalized_slot < dag.tail.slot:
elif finalized_slot < max(dag.tail.slot, dag.backfill.slot):
forkyObject.finalized_header.reset()
forkyObject.finality_branch.reset()
else:
Expand Down Expand Up @@ -632,13 +637,13 @@ proc createLightClientUpdate(
let
finalized_slot = attested_data.finalized_slot
finalized_bsi =
if finalized_slot >= dag.tail.slot:
if finalized_slot >= max(dag.tail.slot, dag.backfill.slot):
dag.getExistingBlockIdAtSlot(finalized_slot)
else:
Opt.none(BlockSlotId)
has_finality =
finalized_bsi.isSome and
finalized_bsi.get.bid.slot >= dag.tail.slot
finalized_bsi.get.bid.slot >= max(dag.tail.slot, dag.backfill.slot)
meta = LightClientUpdateMetadata(
attested_slot: attested_slot,
finalized_slot: finalized_slot,
Expand Down Expand Up @@ -722,19 +727,7 @@ proc initLightClientDataCache*(dag: ChainDAGRef) =
# State availability, needed for `cacheLightClientData`
dag.tail.slot,
# Block availability, needed for `LightClientHeader.execution_branch`
if dag.backfill.slot != GENESIS_SLOT:
let existing = dag.getBlockIdAtSlot(dag.backfill.slot)
if existing.isSome:
if dag.getForkedBlock(existing.get.bid).isNone:
# Special case: when starting with only a checkpoint state,
# we will not have the head block data in the database
dag.backfill.slot + 1
else:
dag.backfill.slot
else:
dag.backfill.slot
else:
dag.backfill.slot))
dag.backfill.slot))

dag.lcDataStore.cache.tailSlot = max(dag.head.slot, targetTailSlot)
if dag.head.slot < dag.lcDataStore.cache.tailSlot:
Expand Down
5 changes: 4 additions & 1 deletion beacon_chain/nimbus_beacon_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,10 @@ proc initFullNode(
dag.finalizedHead.slot

func getBackfillSlot(): Slot =
dag.backfill.slot
if dag.backfill.parent_root != dag.tail.root:
dag.backfill.slot
else:
dag.tail.slot

func getFrontfillSlot(): Slot =
max(dag.frontfill.get(BlockId()).slot, dag.horizon)
Expand Down
4 changes: 2 additions & 2 deletions beacon_chain/trusted_node_sync.nim
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ proc doTrustedNodeSync*(
let
validatorMonitor = newClone(ValidatorMonitor.init(false, false))
dag = ChainDAGRef.init(cfg, db, validatorMonitor, {}, eraPath = eraDir)
backfillSlot = dag.backfill.slot
backfillSlot = max(dag.backfill.slot, 1.Slot) - 1
horizon = max(dag.horizon, dag.frontfill.valueOr(BlockId()).slot)

let canReindex = if backfillSlot <= horizon:
Expand All @@ -418,7 +418,7 @@ proc doTrustedNodeSync*(
# detection to kick in, in addBackfillBlock
let missingSlots = dag.backfill.slot - horizon + 1

notice "Downloading historical blocks - you can interrupt this process at any time and it automatically be completed when you start the beacon node",
notice "Downloading historical blocks - you can interrupt this process at any time and it will automatically be completed when you start the beacon node",
backfillSlot, horizon, missingSlots

var # Same averaging as SyncManager
Expand Down
Loading

0 comments on commit a32af0c

Please sign in to comment.