Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.0.2 -> main] Prevent sync ahead in irreversible mode #814

Merged
merged 5 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions plugins/net_plugin/net_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2206,10 +2206,12 @@ namespace eosio {
auto forkdb_head = cc.fork_db_head();
auto calculated_lib = forkdb_head.irreversible_blocknum();
auto num_blocks_that_can_be_applied = calculated_lib > head_num ? calculated_lib - head_num : 0;
// add blocks that can potentially be applied as they are not in the forkdb yet
num_blocks_that_can_be_applied += blk_num > forkdb_head.block_num() ? blk_num - forkdb_head.block_num() : 0;
if (num_blocks_that_can_be_applied < sync_fetch_span) {
if (head_num )
fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}",
("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size()));
fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}",
("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size()));
return true;
}
}
Expand Down
21 changes: 21 additions & 0 deletions tests/TestHarness/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,27 @@ def findInLog(self, searchStr):
return True
return False

def linesInLog(self, searchStr):
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
lines=[]
for file in files:
with open(file, 'r') as f:
for line in f:
if searchStr in line:
lines.append(line)
return lines

def countInLog(self, searchStr) -> int:
dataDir=Utils.getNodeDataDir(self.nodeId)
files=Node.findStderrFiles(dataDir)
count = 0
for file in files:
with open(file, 'r') as f:
contents = f.read()
count += contents.count(searchStr)
return count

# verify only one or two 'Starting block' per block number unless block is restarted
def verifyStartingBlockMessages(self):
dataDir=Utils.getNodeDataDir(self.nodeId)
Expand Down
67 changes: 49 additions & 18 deletions tests/nodeos_startup_catchup.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ def waitForNodeStarted(node):
transactionsPerBlock=targetTpsPerGenerator*trxGeneratorCnt*timePerBlock/1000
steadyStateWait=20
startBlockNum=blockNum+steadyStateWait
numBlocks=20
numBlocks=400
endBlockNum=startBlockNum+numBlocks
waitForBlock(node0, endBlockNum)
waitForBlock(node0, endBlockNum, timeout=numBlocks)
steadyStateWindowTrxs=0
steadyStateAvg=0
steadyStateWindowBlks=0
Expand Down Expand Up @@ -182,23 +182,18 @@ def waitForNodeStarted(node):
Print("Shutdown catchup node and validate exit code")
catchupNode.interruptAndVerifyExitStatus(60)

# every other catchup make a lib catchup
if catchup_num % 2 == 0:
Print(f"Wait for producer to advance lib past head of catchup {catchupHead}")
# catchupHead+5 to allow for advancement of head during shutdown of catchupNode
waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib)
Print(f"Wait for producer to advance lib past head of catchup {catchupHead}")
# catchupHead+5 to allow for advancement of head during shutdown of catchupNode
waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib)

Print("Restart catchup node")
addSwapFlags = None
if catchup_num % 3 == 0:
addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""}
catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags)
catchupNode.relaunch(skipGenesis=False)
waitForNodeStarted(catchupNode)
lastCatchupLibNum=lib(catchupNode)

Print("Verify catchup node is advancing")
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastCatchupLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib)
waitForBlock(catchupNode, lastLibNum, timeout=twoRoundsTimeout, blockType=BlockType.lib)

Print("Verify producer is still advancing LIB")
lastLibNum=lib(node0)
Expand All @@ -209,20 +204,56 @@ def waitForNodeStarted(node):
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastLibNum, timeout=(numBlocksToCatchup/2 + 60), blockType=BlockType.lib)
catchupNode.interruptAndVerifyExitStatus(60)

Print("Verify catchup without a block log")
addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""}
catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags)
waitForNodeStarted(catchupNode)
lastCatchupLibNum=lib(catchupNode)

Print("Verify catchup node is advancing without block log")
# verify catchup node is advancing to producer
waitForBlock(catchupNode, lastLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib)
catchupNode.interruptAndVerifyExitStatus(60)
catchupNode.popenProc=None

logFile = Utils.getNodeDataDir(catchupNodeNum) + "/stderr.txt"
f = open(logFile)
contents = f.read()
# Verify not syncing ahead of sync-fetch-span
sync_fetch_span = 1000 # default
irreversible = False
if catchupNode.nodeId in specificExtraNodeosArgs:
m = re.search(r"sync-fetch-span (\d+)", specificExtraNodeosArgs[catchupNode.nodeId])
if m is not None:
sync_fetch_span = int(m.group(1))
irreversible = re.search(r"irreversible", specificExtraNodeosArgs[catchupNode.nodeId]) is not None
Print(f"Verify request span for sync-fetch-span {sync_fetch_span} of {catchupNode.data_dir}")
lines = catchupNode.linesInLog("requesting range")
for line in lines:
m = re.search(r"requesting range (\d+) to (\d+), fhead (\d+), lib (\d+)", line)
if m is not None:
startBlockNum=int(m.group(1))
endBlockNum=int(m.group(2))
fhead=int(m.group(3))
libNum=int(m.group(4))
if endBlockNum-startBlockNum > sync_fetch_span:
errorExit(f"Requested range exceeds sync-fetch-span {sync_fetch_span}: {line}")
if irreversible:
# for now just use a larger tolerance, later when the logs include calculated lib this can be more precise
# See https://github.com/AntelopeIO/spring/issues/806
if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*10):
errorExit(f"Requested range too far head of fork head {fhead} in irreversible mode, sync-fetch-span {sync_fetch_span}: {line}")
else:
if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*2-1):
errorExit(f"Requested range too far head of fork head {fhead} sync-fetch-span {sync_fetch_span}: {line}")

# See https://github.com/AntelopeIO/spring/issues/81 for fix to reduce the number of expected unlinkable blocks
# Test verifies LIB is advancing, check to see that not too many unlinkable block exceptions are generated
# while syncing up to head.
numUnlinkable = contents.count("3030001 unlinkable_block_exception: Unlinkable block")
numUnlinkable = catchupNode.countInLog("unlinkable_block")
numUnlinkableAllowed = 500
Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block_exception in {logFile}")
Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block in {catchupNode.data_dir}")
if numUnlinkable > numUnlinkableAllowed:
errorExit(f"Node{catchupNodeNum} has {numUnlinkable} which is more than the configured "
f"allowed {numUnlinkableAllowed} unlinkable blocks: {logFile}.")
f"allowed {numUnlinkableAllowed} unlinkable blocks: {catchupNode.data_dir}.")

testSuccessful=True

Expand Down