diff --git a/plugins/net_plugin/net_plugin.cpp b/plugins/net_plugin/net_plugin.cpp index 40467a49c9..6b04257dcb 100644 --- a/plugins/net_plugin/net_plugin.cpp +++ b/plugins/net_plugin/net_plugin.cpp @@ -2206,10 +2206,12 @@ namespace eosio { auto forkdb_head = cc.fork_db_head(); auto calculated_lib = forkdb_head.irreversible_blocknum(); auto num_blocks_that_can_be_applied = calculated_lib > head_num ? calculated_lib - head_num : 0; + // add blocks that can potentially be applied as they are not in the forkdb yet + num_blocks_that_can_be_applied += blk_num > forkdb_head.block_num() ? blk_num - forkdb_head.block_num() : 0; if (num_blocks_that_can_be_applied < sync_fetch_span) { if (head_num ) - fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}", - ("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size())); + fc_ilog(logger, "sync ahead allowed past sync-fetch-span ${sp}, block ${bn} for paused LIB ${l}, chain_lib ${cl}, forkdb size ${s}", + ("bn", blk_num)("sp", sync_fetch_span)("l", calculated_lib)("cl", head_num)("s", cc.fork_db_size())); return true; } } diff --git a/tests/TestHarness/Node.py b/tests/TestHarness/Node.py index 0ccedfda0c..8cfab82bac 100644 --- a/tests/TestHarness/Node.py +++ b/tests/TestHarness/Node.py @@ -687,6 +687,27 @@ def findInLog(self, searchStr): return True return False + def linesInLog(self, searchStr): + dataDir=Utils.getNodeDataDir(self.nodeId) + files=Node.findStderrFiles(dataDir) + lines=[] + for file in files: + with open(file, 'r') as f: + for line in f: + if searchStr in line: + lines.append(line) + return lines + + def countInLog(self, searchStr) -> int: + dataDir=Utils.getNodeDataDir(self.nodeId) + files=Node.findStderrFiles(dataDir) + count = 0 + for file in files: + with open(file, 'r') as f: + contents = f.read() + count += contents.count(searchStr) + return count + # verify only one or two 'Starting block' per block number unless block is restarted def verifyStartingBlockMessages(self): dataDir=Utils.getNodeDataDir(self.nodeId) diff --git a/tests/nodeos_startup_catchup.py b/tests/nodeos_startup_catchup.py index feca64f4dc..669e1d7233 100755 --- a/tests/nodeos_startup_catchup.py +++ b/tests/nodeos_startup_catchup.py @@ -135,9 +135,9 @@ def waitForNodeStarted(node): transactionsPerBlock=targetTpsPerGenerator*trxGeneratorCnt*timePerBlock/1000 steadyStateWait=20 startBlockNum=blockNum+steadyStateWait - numBlocks=20 + numBlocks=400 endBlockNum=startBlockNum+numBlocks - waitForBlock(node0, endBlockNum) + waitForBlock(node0, endBlockNum, timeout=numBlocks) steadyStateWindowTrxs=0 steadyStateAvg=0 steadyStateWindowBlks=0 @@ -182,23 +182,18 @@ def waitForNodeStarted(node): Print("Shutdown catchup node and validate exit code") catchupNode.interruptAndVerifyExitStatus(60) - # every other catchup make a lib catchup - if catchup_num % 2 == 0: - Print(f"Wait for producer to advance lib past head of catchup {catchupHead}") - # catchupHead+5 to allow for advancement of head during shutdown of catchupNode - waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib) + Print(f"Wait for producer to advance lib past head of catchup {catchupHead}") + # catchupHead+5 to allow for advancement of head during shutdown of catchupNode + waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib) Print("Restart catchup node") - addSwapFlags = None - if catchup_num % 3 == 0: - addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""} - catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags) + catchupNode.relaunch(skipGenesis=False) waitForNodeStarted(catchupNode) lastCatchupLibNum=lib(catchupNode) Print("Verify catchup node is advancing") # verify catchup node is advancing to producer - waitForBlock(catchupNode, lastCatchupLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib) + waitForBlock(catchupNode, lastLibNum, timeout=twoRoundsTimeout, blockType=BlockType.lib) Print("Verify producer is still advancing LIB") lastLibNum=lib(node0) @@ -209,20 +204,56 @@ def waitForNodeStarted(node): # verify catchup node is advancing to producer waitForBlock(catchupNode, lastLibNum, timeout=(numBlocksToCatchup/2 + 60), blockType=BlockType.lib) catchupNode.interruptAndVerifyExitStatus(60) + + Print("Verify catchup without a block log") + addSwapFlags = {"--block-log-retain-blocks": "0", "--delete-all": ""} + catchupNode.relaunch(skipGenesis=False, addSwapFlags=addSwapFlags) + waitForNodeStarted(catchupNode) + lastCatchupLibNum=lib(catchupNode) + + Print("Verify catchup node is advancing without block log") + # verify catchup node is advancing to producer + waitForBlock(catchupNode, lastLibNum+1, timeout=twoRoundsTimeout, blockType=BlockType.lib) + catchupNode.interruptAndVerifyExitStatus(60) catchupNode.popenProc=None - logFile = Utils.getNodeDataDir(catchupNodeNum) + "/stderr.txt" - f = open(logFile) - contents = f.read() + # Verify not syncing ahead of sync-fetch-span + sync_fetch_span = 1000 # default + irreversible = False + if catchupNode.nodeId in specificExtraNodeosArgs: + m = re.search(r"sync-fetch-span (\d+)", specificExtraNodeosArgs[catchupNode.nodeId]) + if m is not None: + sync_fetch_span = int(m.group(1)) + irreversible = re.search(r"irreversible", specificExtraNodeosArgs[catchupNode.nodeId]) is not None + Print(f"Verify request span for sync-fetch-span {sync_fetch_span} of {catchupNode.data_dir}") + lines = catchupNode.linesInLog("requesting range") + for line in lines: + m = re.search(r"requesting range (\d+) to (\d+), fhead (\d+), lib (\d+)", line) + if m is not None: + startBlockNum=int(m.group(1)) + endBlockNum=int(m.group(2)) + fhead=int(m.group(3)) + libNum=int(m.group(4)) + if endBlockNum-startBlockNum > sync_fetch_span: + errorExit(f"Requested range exceeds sync-fetch-span {sync_fetch_span}: {line}") + if irreversible: + # for now just use a larger tolerance, later when the logs include calculated lib this can be more precise + # See https://github.com/AntelopeIO/spring/issues/806 + if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*10): + errorExit(f"Requested range too far head of fork head {fhead} in irreversible mode, sync-fetch-span {sync_fetch_span}: {line}") + else: + if endBlockNum > fhead and fhead > libNum and endBlockNum - fhead > (sync_fetch_span*2-1): + errorExit(f"Requested range too far head of fork head {fhead} sync-fetch-span {sync_fetch_span}: {line}") + # See https://github.com/AntelopeIO/spring/issues/81 for fix to reduce the number of expected unlinkable blocks # Test verifies LIB is advancing, check to see that not too many unlinkable block exceptions are generated # while syncing up to head. - numUnlinkable = contents.count("3030001 unlinkable_block_exception: Unlinkable block") + numUnlinkable = catchupNode.countInLog("unlinkable_block") numUnlinkableAllowed = 500 - Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block_exception in {logFile}") + Print(f"Node{catchupNodeNum} has {numUnlinkable} unlinkable_block in {catchupNode.data_dir}") if numUnlinkable > numUnlinkableAllowed: errorExit(f"Node{catchupNodeNum} has {numUnlinkable} which is more than the configured " - f"allowed {numUnlinkableAllowed} unlinkable blocks: {logFile}.") + f"allowed {numUnlinkableAllowed} unlinkable blocks: {catchupNode.data_dir}.") testSuccessful=True