-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[1.0] Normally process blocks from the forkdb on startup #572
Changes from all commits
5a6f76c
1eb8318
61ebd0f
a48dc06
03589b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2224,6 +2224,8 @@ namespace eosio { | |
set_state( lib_catchup ); | ||
sync_last_requested_num = 0; | ||
sync_next_expected_num = chain_info.lib_num + 1; | ||
} else if (sync_next_expected_num >= sync_last_requested_num) { | ||
// break | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change causes the net_plugin to request more blocks if possible instead of remaining in a mode where it thinks it is already syncing. |
||
} else { | ||
peer_dlog(c, "already syncing, start sync ignored"); | ||
return; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,7 +43,7 @@ | |
def executeTest(cluster, testNodeId, testNodeArgs, resultMsgs): | ||
testNode = None | ||
testResult = False | ||
resultDesc = "!!!BUG IS CONFIRMED ON TEST CASE #{} ({})".format( | ||
resultDesc = "!!!BUG IS CONFIRMED ON TEST CASE #{} ({})".format( | ||
testNodeId, | ||
testNodeArgs | ||
) | ||
|
@@ -58,6 +58,7 @@ def executeTest(cluster, testNodeId, testNodeArgs, resultMsgs): | |
|
||
testNode = cluster.getNode(testNodeId) | ||
assert not testNode.verifyAlive() # resets pid so reluanch works | ||
peers = testNode.rmFromCmd('--p2p-peer-address') | ||
testNode.relaunch(addSwapFlags={"--terminate-at-block": "9999999"}) | ||
|
||
# Wait for node to start up. | ||
|
@@ -75,9 +76,9 @@ def executeTest(cluster, testNodeId, testNodeArgs, resultMsgs): | |
checkReplay(testNode, testNodeArgs) | ||
|
||
# verify node can be restarted after a replay | ||
checkRestart(testNode, "--replay-blockchain") | ||
checkRestart(testNode, "--replay-blockchain", peers) | ||
|
||
resultDesc = "!!!TEST CASE #{} ({}) IS SUCCESSFUL".format( | ||
resultDesc = "!!!TEST CASE #{} ({}) IS SUCCESSFUL".format( | ||
testNodeId, | ||
testNodeArgs | ||
) | ||
|
@@ -144,12 +145,12 @@ def checkReplay(testNode, testNodeArgs): | |
head, lib = getBlockNumInfo(testNode) | ||
assert head == termAtBlock, f"head {head} termAtBlock {termAtBlock}" | ||
|
||
def checkRestart(testNode, rmChainArgs): | ||
def checkRestart(testNode, rmChainArgs, peers): | ||
"""Test restart of node continues""" | ||
if testNode and not testNode.killed: | ||
assert testNode.kill(signal.SIGTERM) | ||
|
||
if not testNode.relaunch(rmArgs=rmChainArgs): | ||
if not testNode.relaunch(chainArg=peers, rmArgs=rmChainArgs): | ||
Utils.errorExit(f"Unable to relaunch after {rmChainArgs}") | ||
|
||
assert testNode.verifyAlive(), f"relaunch failed after {rmChainArgs}" | ||
|
@@ -201,7 +202,7 @@ def checkHeadOrSpeculative(head, lib): | |
def executeSnapshotBlocklogTest(cluster, testNodeId, resultMsgs, nodeArgs, termAtBlock): | ||
testNode = cluster.getNode(testNodeId) | ||
testResult = False | ||
resultDesc = "!!!BUG IS CONFIRMED ON TEST CASE #{} ({})".format( | ||
resultDesc = "!!!BUG IS CONFIRMED ON TEST CASE #{} ({})".format( | ||
testNodeId, | ||
f"replay block log, {nodeArgs} --terminate-at-block {termAtBlock}" | ||
) | ||
|
@@ -221,7 +222,7 @@ def executeSnapshotBlocklogTest(cluster, testNodeId, resultMsgs, nodeArgs, termA | |
m=re.search(r"Block ([\d]+) reached configured maximum block", line) | ||
if m: | ||
assert int(m.group(1)) == termAtBlock, f"actual terminating block number {m.group(1)} not equal to expected termAtBlock {termAtBlock}" | ||
resultDesc = f"!!!TEST CASE #{testNodeId} (replay block log, mode {nodeArgs} --terminate-at-block {termAtBlock}) IS SUCCESSFUL" | ||
resultDesc = f"!!!TEST CASE #{testNodeId} (replay block log, mode {nodeArgs} --terminate-at-block {termAtBlock}) IS SUCCESSFUL" | ||
testResult = True | ||
|
||
Print(resultDesc) | ||
|
@@ -266,10 +267,10 @@ def executeSnapshotBlocklogTest(cluster, testNodeId, resultMsgs, nodeArgs, termA | |
0 : "--enable-stale-production" | ||
} | ||
regularNodeosArgs = { | ||
1 : "--read-mode irreversible --terminate-at-block 75", | ||
2 : "--read-mode head --terminate-at-block 100", | ||
3 : "--read-mode speculative --terminate-at-block 125", | ||
4 : "--read-mode irreversible --terminate-at-block 155" | ||
1 : "--read-mode irreversible --terminate-at-block 100", | ||
2 : "--read-mode head --terminate-at-block 125", | ||
3 : "--read-mode speculative --terminate-at-block 150", | ||
4 : "--read-mode irreversible --terminate-at-block 180" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These block values don't really matter, I could revert to old values if desired. They were changed when I was doing some initial testing to try and reproduce the issue in this test. The changes below are what is needed. |
||
} | ||
replayNodeosArgs = { | ||
5 : "--read-mode irreversible", | ||
|
@@ -345,6 +346,18 @@ def executeSnapshotBlocklogTest(cluster, testNodeId, resultMsgs, nodeArgs, termA | |
if not success: | ||
break | ||
|
||
# Test nodes can restart and advance lib | ||
if not cluster.biosNode.relaunch(): | ||
Utils.errorExit("Unable to restart bios node") | ||
|
||
if not producingNode.relaunch(): | ||
Utils.errorExit("Unable to restart producing node") | ||
|
||
if success: | ||
for nodeId, nodeArgs in {**regularNodeosArgs, **replayNodeosArgs}.items(): | ||
assert cluster.getNode(nodeId).relaunch(), f"Unable to relaunch {nodeId}" | ||
assert cluster.getNode(nodeId).waitForLibToAdvance(), f"LIB did not advance for {nodeId}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This extra section on restarting nodes failed before the fixes in this PR. |
||
|
||
testSuccessful = success | ||
|
||
Utils.Print("Script End ................................") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This compromise came after a long conversation with @arhag on potential alternatives.