From af074243bd8e2d176fdda1f49af6c55f79cc8cd9 Mon Sep 17 00:00:00 2001
From: Lin Huang <lin.huang@eosnetwork.com>
Date: Tue, 29 Oct 2024 15:47:33 -0400
Subject: [PATCH 01/17] initial version of tests for SHiP restarting from
 corrupted log and index files

---
 tests/CMakeLists.txt       |   3 +
 tests/ship_restart_test.py | 314 +++++++++++++++++++++++++++++++++++++
 2 files changed, 317 insertions(+)
 create mode 100755 tests/ship_restart_test.py

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index a6194c13f1..e84d653199 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -53,6 +53,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/full-version-label.sh ${CMAKE_CURRENT
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nodeos_producer_watermark_test.py ${CMAKE_CURRENT_BINARY_DIR}/nodeos_producer_watermark_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cli_test.py ${CMAKE_CURRENT_BINARY_DIR}/cli_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ship_reqs_across_svnn_test.py ${CMAKE_CURRENT_BINARY_DIR}/ship_reqs_across_svnn_test.py COPYONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ship_restart_test.py ${CMAKE_CURRENT_BINARY_DIR}/ship_restart_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ship_test.py ${CMAKE_CURRENT_BINARY_DIR}/ship_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ship_streamer_test.py ${CMAKE_CURRENT_BINARY_DIR}/ship_streamer_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ship_kill_client_test.py ${CMAKE_CURRENT_BINARY_DIR}/ship_kill_client_test.py COPYONLY)
@@ -175,6 +176,8 @@ set_property(TEST production_restart PROPERTY LABELS nonparallelizable_tests)
 
 add_test(NAME ship_reqs_across_svnn_test COMMAND tests/ship_reqs_across_svnn_test.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 set_property(TEST ship_reqs_across_svnn_test PROPERTY LABELS nonparallelizable_tests)
+add_test(NAME ship_restart_test COMMAND tests/ship_restart_test.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+set_property(TEST ship_restart_test PROPERTY LABELS nonparallelizable_tests)
 add_test(NAME ship_test COMMAND tests/ship_test.py -v --num-clients 10 --num-requests 5000 ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 set_property(TEST ship_test PROPERTY LABELS nonparallelizable_tests)
 add_test(NAME ship_test_unix COMMAND tests/ship_test.py -v --num-clients 10 --num-requests 5000 ${UNSHARE} --unix-socket WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
diff --git a/tests/ship_restart_test.py b/tests/ship_restart_test.py
new file mode 100755
index 0000000000..47312f29d8
--- /dev/null
+++ b/tests/ship_restart_test.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+
+import os
+import tempfile
+import shutil
+import signal
+
+from TestHarness import Cluster, TestHelper, Utils, WalletMgr
+
+###############################################################################
+# ship_restart_test
+# 
+# This test verifies SHiP shuts down gracefully or recovers when restarting
+# with various scenarios of corrupted log and/or index files.
+# 
+###############################################################################
+
+Print=Utils.Print
+
+args = TestHelper.parse_args({"--dump-error-details","--keep-logs","-v","--leave-running","--unshared"})
+
+Utils.Debug=args.v
+cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs)
+dumpErrorDetails=args.dump_error_details
+walletPort=TestHelper.DEFAULT_WALLET_PORT
+
+totalProducerNodes=1
+totalNonProducerNodes=1 # for SHiP node
+totalNodes=totalProducerNodes+totalNonProducerNodes
+
+walletMgr=WalletMgr(True, port=walletPort)
+testSuccessful=False
+
+prodNodeId = 0
+shipNodeId = 1
+
+origStateHistoryLog   = ""
+stateHistoryLog       = ""
+origStateHistoryIndex = ""
+stateHistoryIndex     = ""
+
+# Returns True if file1 and file2 contain the same content
+def equalFiles(file1, file2):
+    # not the same if sizes are different
+    if os.path.getsize(file1) != os.path.getsize(file2):
+        return False
+
+    readSize=1024
+    with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
+        while True:
+            bytes1 = f1.read(readSize)
+            bytes2 = f2.read(readSize)
+
+            if bytes1 != bytes2:
+                return False
+
+            # end of both files
+            if not bytes1:
+                return True
+
+# Verifies that SHiP should fail to restart with a corrupted first entry header
+def corruptedHeaderTest(pos, curruptedValue, shipNode):
+    # restore log and index
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    with open(stateHistoryLog, 'rb+') as f: # opened as binary file
+        f.seek(pos) # seek to the position to corrupt
+        f.write(curruptedValue) # corrupt it
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert not isRelaunchSuccess, "SHiP node should have failed to relaunch"
+
+try:
+    TestHelper.printSystemInfo("BEGIN")
+
+    cluster.setWalletMgr(walletMgr)
+    Print("Stand up cluster")
+
+    specificExtraNodeosArgs={}
+    specificExtraNodeosArgs[shipNodeId]="--plugin eosio::state_history_plugin --trace-history --chain-state-history --finality-data-history --state-history-stride 200 --plugin eosio::net_api_plugin --plugin eosio::producer_api_plugin"
+
+    if cluster.launch(topo="mesh", pnodes=totalProducerNodes, totalNodes=totalNodes,
+                      activateIF=True,
+                      specificExtraNodeosArgs=specificExtraNodeosArgs) is False:
+        Utils.cmdError("launcher")
+        Utils.errorExit("Failed to stand up cluster.")
+
+    # Verify nodes are in sync and advancing
+    cluster.waitOnClusterSync(blockAdvancing=5)
+    Print("Cluster in Sync")
+
+    Print("Shutdown unneeded bios node")
+    cluster.biosNode.kill(signal.SIGTERM)
+
+    prodNode = cluster.getNode(prodNodeId)
+    shipNode = cluster.getNode(shipNodeId)
+
+    Print("Shutdown producer and SHiP nodes")
+    prodNode.kill(signal.SIGTERM)
+    shipNode.kill(signal.SIGTERM)
+
+    shipDir               = os.path.join(Utils.getNodeDataDir(shipNodeId), "state-history")
+    stateHistoryLog       = os.path.join(shipDir, "chain_state_history.log")
+    stateHistoryIndex     = os.path.join(shipDir, "chain_state_history.index")
+    tmpDir                = tempfile.mkdtemp()
+    origStateHistoryLog   = os.path.join(tmpDir, "chain_state_history.log")
+    origStateHistoryIndex = os.path.join(tmpDir, "chain_state_history.index")
+
+    # save original chain_state_history log and index files
+    Print("Save original SHiP log and index")
+    shutil.copyfile(stateHistoryLog, origStateHistoryLog)
+    shutil.copyfile(stateHistoryIndex, origStateHistoryIndex)
+
+    ############## Part 1: restart tests while producer node is down  #################
+    
+    #-------- Index file is removed. It should be regenerated at restart.
+    Print("index file removed test")
+
+    os.remove(stateHistoryIndex)
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log unchanged
+    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index regenerated
+
+    shipNode.kill(signal.SIGTERM) # shut down ship node for next test
+
+    '''
+    Test failure 1: index file was not regenerated. Reenable this after issue is fixed.
+
+    #-------- Index file last entry is corrupted. It should be regenerated at restart.
+    with open(stateHistoryIndex, 'rb+') as stateHistoryIndexFile: # opened as binary file
+        # seek to last entry (8 bytes before the end of file)
+        stateHistoryIndexFile.seek(-8, 2) # -8 for backward, 2 for starting at end
+
+        # set the index to a random value
+        stateHistoryIndexFile.write(b'\x00\x01\x02\x03\x04\x05\x06\x07')
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert equalFiles(stateHistoryLog, origStateHistoryLog)
+    assert equalFiles(stateHistoryIndex, origStateHistoryIndex)
+    '''
+
+    #-------- Truncate index file. It should be regenerated
+    #         because index size is not the same as expected size
+    Print("Truncated index file test")
+
+    # restore log and index
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    with open(stateHistoryIndex, 'rb+') as f:
+        indexFileSize = os.path.getsize(stateHistoryIndex)
+        newSize       = indexFileSize - 8
+        f.truncate(newSize)
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log file unchanged
+    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index file regenerated
+
+    shipNode.kill(signal.SIGTERM) # shut down it for next test
+
+    #-------- Add an extra entry to index file. It should be regenerated
+    #         because index size is not the same as expected size
+    Print("Extra entry index file test")
+
+    # restore log and index
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    with open(stateHistoryIndex, 'rb+') as stateHistoryIndexFile: # opened as binary file
+        # seek to last index, 8 bytes before the end of file
+        stateHistoryIndexFile.seek(0, 2) # -8 for backward, 2 for starting at end
+
+        # write a small value
+        stateHistoryIndexFile.write(b'\x00\x00\x00\x00\x00\x00\x01\x0F')
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log file not changed
+    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index file regenerated
+
+    shipNode.kill(signal.SIGTERM) # shut down it for next test
+
+    #-------- Remove log file. The log file should be reconstructed from state
+    Print("Removed log file test")
+
+    # restore index
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    os.remove(stateHistoryLog)
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+
+    shipNode.kill(signal.SIGTERM) # shut down it for next test
+
+    #-------- Corrupt first entry's magic. Relaunch should fail
+    Print("first entry magic corruption test")
+
+    corruptedHeaderTest(0, b'\x00\x01\x02\x03\x04\x05\x06\x07', shipNode) # 0 is magic's position
+
+    #-------- Corrupt first entry's block_id. Relaunch should fail
+    Print("first entry block_id corruption test")
+
+    corruptedHeaderTest(8, b'\x00\x01\x02\x03\x04\x05\x06\x07', shipNode) # 8 is block_id's position
+
+    '''
+    # Test failure 1: Reenable this after issue is fixed.
+    #-------- Corrupt first entry's payload_size. Relaunch should fail
+    Print("first entry payload_size corruption test")
+
+    corruptedHeaderTest(40, b'\x00\x00\x00\x00\x00\x00\x00\x01', shipNode) # 40 is payload_size position
+    '''
+
+    '''
+    # Test failure 2: Reenable this after issue is fixed.
+    #-------- Corrupt last entry's position . It should be repaired.
+    # After producer node restarts, head on SHiP node should advance.
+    Print("last entry postion corruption test")
+
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    with open(stateHistoryLog, 'rb+') as stateHistoryLogFile: # opened as binary file
+        # seek to last index (8 bytes before the end of file)
+        stateHistoryLogFile.seek(-8, 2) # -8 for backward, 2 for starting at end
+
+        # set the index to a random value
+        stateHistoryLogFile.write(b'\x00\x01\x02\x03\x04\x05\x06\x07')
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    isRelaunchSuccess = prodNode.relaunch(chainArg="--enable-stale-production")
+    assert isRelaunchSuccess, "Failed to relaunch prodNode"
+
+    assert shipNode.waitForHeadToAdvance(), "Head did not advance on shipNode"
+    prodNode.kill(signal.SIGTERM)
+    shipNode.kill(signal.SIGTERM)
+    '''
+
+    '''
+    # Test failure 3: Reenable this after issue is fixed.
+    #-------- Corrupt last entry's header. It should be repaired.
+    # After producer node restarts, head on SHiP node should advance.
+    Print("last entry header corruption test")
+
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    with open(stateHistoryLog, 'rb+') as f: # opened as binary file
+        # seek to last index (8 bytes before the end of file)
+        f.seek(-8, 2) # -8 for backward, 2 for starting at end
+
+        data = f.read(8)
+        integer_value = int.from_bytes(data, byteorder='little')
+        f.seek(integer_value)
+
+        # corrupt the header
+        f.write(b'\x00\x01\x02\x03\x04\x05\x06\x07')
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    isRelaunchSuccess = prodNode.relaunch(chainArg="--enable-stale-production")
+    assert isRelaunchSuccess, "Failed to relaunch prodNode"
+
+    assert shipNode.waitForHeadToAdvance(), "Head did not advance on shipNode"
+    prodNode.kill(signal.SIGTERM)
+    shipNode.kill(signal.SIGTERM)
+    '''
+
+    ############## Part 2: restart tests while producer node is up  #################
+
+    isRelaunchSuccess = prodNode.relaunch(chainArg="--enable-stale-production")
+    assert isRelaunchSuccess, "Failed to relaunch prodNode"
+
+    shutil.copyfile(origStateHistoryLog, stateHistoryLog)
+    shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
+
+    #-------- Index file is removed. It should be regenerated at restart
+    os.remove(stateHistoryIndex)
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert shipNode.waitForHeadToAdvance(), "Head did not advance on shipNode"
+
+    shipNode.kill(signal.SIGTERM) # shut down it for next test
+
+    '''
+    #-------- Corrupt last entry of log file. It should be repaired
+    # and LIB should advance
+    with open(stateHistoryLog, 'rb+') as stateHistoryLogFile: # opened as binary file
+        # seek to last index, 8 bytes before the end of file
+        stateHistoryLogFile.seek(-8, 2) # -8 for backward, 2 for starting at end
+
+        # set the index to a random value
+        stateHistoryLogFile.write(b'\x00\x01\x02\x03\x04\x05\x06\x07')
+
+    isRelaunchSuccess = shipNode.relaunch()
+    assert isRelaunchSuccess, "Failed to relaunch shipNode"
+    assert shipNode.waitForLibToAdvance(), "LIB did not advance on shipNode"
+    '''
+
+    testSuccessful = True
+finally:
+    TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails)
+    if tmpDir is not None:
+        shutil.rmtree(tmpDir, ignore_errors=True)
+
+errorCode = 0 if testSuccessful else 1
+exit(errorCode)

From d47c1f09f7d6654ae3d224c4c2aae1dde3ceef1d Mon Sep 17 00:00:00 2001
From: Lin Huang <lin.huang@eosnetwork.com>
Date: Wed, 30 Oct 2024 09:59:31 -0400
Subject: [PATCH 02/17] Update with github issues for failed tests and minor
 comment changes

---
 tests/ship_restart_test.py | 40 ++++++++++++++------------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/tests/ship_restart_test.py b/tests/ship_restart_test.py
index 47312f29d8..fbc410c50d 100755
--- a/tests/ship_restart_test.py
+++ b/tests/ship_restart_test.py
@@ -112,7 +112,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shutil.copyfile(stateHistoryLog, origStateHistoryLog)
     shutil.copyfile(stateHistoryIndex, origStateHistoryIndex)
 
-    ############## Part 1: restart tests while producer node is down  #################
+    ############## Part 1: tests while producer node is down  #################
     
     #-------- Index file is removed. It should be regenerated at restart.
     Print("index file removed test")
@@ -127,7 +127,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shipNode.kill(signal.SIGTERM) # shut down ship node for next test
 
     '''
-    Test failure 1: index file was not regenerated. Reenable this after issue is fixed.
+    Test failure 1: index file was not regenerated. Reenable this after https://github.com/AntelopeIO/spring/issues/990 is fixed.
 
     #-------- Index file last entry is corrupted. It should be regenerated at restart.
     with open(stateHistoryIndex, 'rb+') as stateHistoryIndexFile: # opened as binary file
@@ -153,7 +153,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
 
     with open(stateHistoryIndex, 'rb+') as f:
         indexFileSize = os.path.getsize(stateHistoryIndex)
-        newSize       = indexFileSize - 8
+        newSize       = indexFileSize - 8 # truncate 8 bytes
         f.truncate(newSize)
 
     isRelaunchSuccess = shipNode.relaunch()
@@ -165,18 +165,15 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
 
     #-------- Add an extra entry to index file. It should be regenerated
     #         because index size is not the same as expected size
-    Print("Extra entry index file test")
+    Print("Extra entry in index file test")
 
     # restore log and index
     shutil.copyfile(origStateHistoryLog, stateHistoryLog)
     shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
 
     with open(stateHistoryIndex, 'rb+') as stateHistoryIndexFile: # opened as binary file
-        # seek to last index, 8 bytes before the end of file
-        stateHistoryIndexFile.seek(0, 2) # -8 for backward, 2 for starting at end
-
-        # write a small value
-        stateHistoryIndexFile.write(b'\x00\x00\x00\x00\x00\x00\x01\x0F')
+        stateHistoryIndexFile.seek(0, 2) # seek to end of file
+        stateHistoryIndexFile.write(b'\x00\x00\x00\x00\x00\x00\x01\x0F') # write a small value
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
@@ -186,9 +183,9 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shipNode.kill(signal.SIGTERM) # shut down it for next test
 
     #-------- Remove log file. The log file should be reconstructed from state
+    #         and restart succeeds
     Print("Removed log file test")
 
-    # restore index
     shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
 
     os.remove(stateHistoryLog)
@@ -200,24 +197,14 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
 
     #-------- Corrupt first entry's magic. Relaunch should fail
     Print("first entry magic corruption test")
-
     corruptedHeaderTest(0, b'\x00\x01\x02\x03\x04\x05\x06\x07', shipNode) # 0 is magic's position
 
     #-------- Corrupt first entry's block_id. Relaunch should fail
     Print("first entry block_id corruption test")
-
     corruptedHeaderTest(8, b'\x00\x01\x02\x03\x04\x05\x06\x07', shipNode) # 8 is block_id's position
 
     '''
-    # Test failure 1: Reenable this after issue is fixed.
-    #-------- Corrupt first entry's payload_size. Relaunch should fail
-    Print("first entry payload_size corruption test")
-
-    corruptedHeaderTest(40, b'\x00\x00\x00\x00\x00\x00\x00\x01', shipNode) # 40 is payload_size position
-    '''
-
-    '''
-    # Test failure 2: Reenable this after issue is fixed.
+    # Test failure 2: Reenable this after https://github.com/AntelopeIO/spring/issues/989 is fixed.
     #-------- Corrupt last entry's position . It should be repaired.
     # After producer node restarts, head on SHiP node should advance.
     Print("last entry postion corruption test")
@@ -243,7 +230,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     '''
 
     '''
-    # Test failure 3: Reenable this after issue is fixed.
+    # Test failure 3: Reenable this after https://github.com/AntelopeIO/spring/issues/989 is fixed.
     #-------- Corrupt last entry's header. It should be repaired.
     # After producer node restarts, head on SHiP node should advance.
     Print("last entry header corruption test")
@@ -272,7 +259,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shipNode.kill(signal.SIGTERM)
     '''
 
-    ############## Part 2: restart tests while producer node is up  #################
+    ############## Part 2: tests while producer node is up  #################
 
     isRelaunchSuccess = prodNode.relaunch(chainArg="--enable-stale-production")
     assert isRelaunchSuccess, "Failed to relaunch prodNode"
@@ -281,6 +268,8 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
 
     #-------- Index file is removed. It should be regenerated at restart
+    Print("Index file removed while producer node is up test")
+
     os.remove(stateHistoryIndex)
 
     isRelaunchSuccess = shipNode.relaunch()
@@ -290,8 +279,9 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
     shipNode.kill(signal.SIGTERM) # shut down it for next test
 
     '''
+    # Test failure 4: Reenable this after issue https://github.com/AntelopeIO/spring/issues/989 fixed.
     #-------- Corrupt last entry of log file. It should be repaired
-    # and LIB should advance
+    # and head should advance
     with open(stateHistoryLog, 'rb+') as stateHistoryLogFile: # opened as binary file
         # seek to last index, 8 bytes before the end of file
         stateHistoryLogFile.seek(-8, 2) # -8 for backward, 2 for starting at end
@@ -301,7 +291,7 @@ def corruptedHeaderTest(pos, curruptedValue, shipNode):
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
-    assert shipNode.waitForLibToAdvance(), "LIB did not advance on shipNode"
+    assert shipNode.waitForHeadToAdvance(), "Head did not advance on shipNode"
     '''
 
     testSuccessful = True

From 73120f5bdf822b7dc1205a47b4be59592ac3db39 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Wed, 30 Oct 2024 13:52:22 -0500
Subject: [PATCH 03/17] GH-985 Add ability to interrupt transaction in
 apply_block

---
 libraries/chain/controller.cpp                | 37 +++++++++++++++++--
 .../chain/include/eosio/chain/controller.hpp  |  3 ++
 .../chain/include/eosio/chain/exceptions.hpp  |  8 ++--
 .../include/eosio/chain/platform_timer.hpp    |  1 +
 .../chain/platform_timer_asio_fallback.cpp    |  5 +++
 libraries/chain/platform_timer_kqueue.cpp     |  5 +++
 libraries/chain/platform_timer_posix.cpp      |  5 +++
 libraries/chain/transaction_context.cpp       |  5 ++-
 programs/nodeos/main.cpp                      |  5 +++
 9 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp
index fd953c585b..8fa55cadf0 100644
--- a/libraries/chain/controller.cpp
+++ b/libraries/chain/controller.cpp
@@ -1006,6 +1006,8 @@ struct controller_impl {
    async_t                         async_aggregation = async_t::yes; // by default we process incoming votes asynchronously
    my_finalizers_t                 my_finalizers;
    std::atomic<bool>               writing_snapshot = false;
+   std::atomic<bool>               applying_block = false;
+   platform_timer&                 main_thread_timer;
 
    thread_local static platform_timer timer; // a copy for main thread and each read-only thread
 #if defined(EOSIO_EOS_VM_RUNTIME_ENABLED) || defined(EOSIO_EOS_VM_JIT_RUNTIME_ENABLED)
@@ -1285,6 +1287,7 @@ struct controller_impl {
     read_mode( cfg.read_mode ),
     thread_pool(),
     my_finalizers(cfg.finalizers_dir / config::safety_filename),
+    main_thread_timer(timer), // assumes constructor is called from main thread
     wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() )
    {
       assert(cfg.chain_thread_pool_size > 0);
@@ -3780,6 +3783,9 @@ struct controller_impl {
                }
             }
 
+            applying_block = true;
+            auto apply = fc::make_scoped_exit([&](){ applying_block = false; });
+
             transaction_trace_ptr trace;
 
             size_t packed_idx = 0;
@@ -3806,7 +3812,11 @@ struct controller_impl {
                                            std::holds_alternative<transaction_id_type>(receipt.trx);
 
                if( transaction_failed && !transaction_can_fail) {
-                  edump((*trace));
+                  if (trace->except->code() == interrupt_exception::code_value) {
+                     ilog("Interrupt of trx: ${t}", ("t", *trace));
+                  } else {
+                     edump((*trace));
+                  }
                   throw *trace->except;
                }
 
@@ -3875,7 +3885,8 @@ struct controller_impl {
          } catch ( const boost::interprocess::bad_alloc& ) {
             throw;
          } catch ( const fc::exception& e ) {
-            edump((e.to_detail_string()));
+            if (e.code() != interrupt_exception::code_value)
+               edump((e.to_detail_string()));
             abort_block();
             throw;
          } catch ( const std::exception& e ) {
@@ -4431,8 +4442,12 @@ struct controller_impl {
             } catch ( const boost::interprocess::bad_alloc& ) {
                throw;
             } catch (const fc::exception& e) {
-               elog("exception thrown while applying block ${bn} : ${id}, previous ${p}, error: ${e}",
-                    ("bn", bsp->block_num())("id", bsp->id())("p", bsp->previous())("e", e.to_detail_string()));
+               if (e.code() == interrupt_exception::code_value) {
+                  ilog("interrupt while applying block ${bn} : ${id}", ("bn", bsp->block_num())("id", bsp->id()));
+               } else {
+                  elog("exception thrown while applying block ${bn} : ${id}, previous ${p}, error: ${e}",
+                       ("bn", bsp->block_num())("id", bsp->id())("p", bsp->previous())("e", e.to_detail_string()));
+               }
                except = std::current_exception();
             } catch (const std::exception& e) {
                elog("exception thrown while applying block ${bn} : ${id}, previous ${p}, error: ${e}",
@@ -4495,6 +4510,16 @@ struct controller_impl {
       return applied_trxs;
    }
 
+   void interrupt_transaction() {
+      // Only interrupt transaction if applying a block. Speculative trxs already have a deadline set so they
+      // have limited run time already. This is to allow killing a long-running transaction in a block being
+      // validated.
+      if (applying_block) {
+         ilog("Interrupting apply block");
+         main_thread_timer.expire_now();
+      }
+   }
+
    // @param if_active true if instant finality is active
    static checksum256_type calc_merkle( deque<digest_type>&& digests, bool if_active ) {
       if (if_active) {
@@ -5255,6 +5280,10 @@ deque<transaction_metadata_ptr> controller::abort_block() {
    return my->abort_block();
 }
 
+void controller::interrupt_transaction() {
+   my->interrupt_transaction();
+}
+
 boost::asio::io_context& controller::get_thread_pool() {
    return my->thread_pool.get_executor();
 }
diff --git a/libraries/chain/include/eosio/chain/controller.hpp b/libraries/chain/include/eosio/chain/controller.hpp
index cf8f12e7e4..fe58f39f95 100644
--- a/libraries/chain/include/eosio/chain/controller.hpp
+++ b/libraries/chain/include/eosio/chain/controller.hpp
@@ -206,6 +206,9 @@ namespace eosio::chain {
           */
          deque<transaction_metadata_ptr> abort_block();
 
+         /// Expected to be called from signal handler
+         void interrupt_transaction();
+
        /**
         *
         */
diff --git a/libraries/chain/include/eosio/chain/exceptions.hpp b/libraries/chain/include/eosio/chain/exceptions.hpp
index 1539b7c3be..fc6ef200b3 100644
--- a/libraries/chain/include/eosio/chain/exceptions.hpp
+++ b/libraries/chain/include/eosio/chain/exceptions.hpp
@@ -381,6 +381,9 @@ namespace eosio { namespace chain {
                                     3080005, "Transaction CPU usage is too much for the remaining allowable usage of the current block" )
       FC_DECLARE_DERIVED_EXCEPTION( deadline_exception, resource_exhausted_exception,
                                     3080006, "Transaction took too long" )
+         FC_DECLARE_DERIVED_EXCEPTION( leeway_deadline_exception, deadline_exception,
+                                       3081001, "Transaction reached the deadline set due to leeway on account CPU limits" )
+
       FC_DECLARE_DERIVED_EXCEPTION( greylist_net_usage_exceeded, resource_exhausted_exception,
                                     3080007, "Transaction exceeded the current greylisted account network usage limit" )
       FC_DECLARE_DERIVED_EXCEPTION( greylist_cpu_usage_exceeded, resource_exhausted_exception,
@@ -389,9 +392,8 @@ namespace eosio { namespace chain {
                                     3080009, "Read-only transaction eos-vm-oc compile temporary failure" )
       FC_DECLARE_DERIVED_EXCEPTION( ro_trx_vm_oc_compile_permanent_failure, resource_exhausted_exception,
                                     3080010, "Read-only transaction eos-vm-oc compile permanent failure" )
-
-      FC_DECLARE_DERIVED_EXCEPTION( leeway_deadline_exception, deadline_exception,
-                                    3081001, "Transaction reached the deadline set due to leeway on account CPU limits" )
+      FC_DECLARE_DERIVED_EXCEPTION( interrupt_exception, resource_exhausted_exception,
+                                    3080011, "Transaction interrupted by signal" )
 
    FC_DECLARE_DERIVED_EXCEPTION( authorization_exception, chain_exception,
                                  3090000, "Authorization exception" )
diff --git a/libraries/chain/include/eosio/chain/platform_timer.hpp b/libraries/chain/include/eosio/chain/platform_timer.hpp
index 29a8d62d46..72fb8d1fef 100644
--- a/libraries/chain/include/eosio/chain/platform_timer.hpp
+++ b/libraries/chain/include/eosio/chain/platform_timer.hpp
@@ -17,6 +17,7 @@ struct platform_timer {
 
    void start(fc::time_point tp);
    void stop();
+   void expire_now();
 
    /* Sets a callback for when timer expires. Be aware this could might fire from a signal handling context and/or
       on any particular thread. Only a single callback can be registered at once; trying to register more will
diff --git a/libraries/chain/platform_timer_asio_fallback.cpp b/libraries/chain/platform_timer_asio_fallback.cpp
index 28525c7968..7ec808e128 100644
--- a/libraries/chain/platform_timer_asio_fallback.cpp
+++ b/libraries/chain/platform_timer_asio_fallback.cpp
@@ -84,6 +84,11 @@ void platform_timer::start(fc::time_point tp) {
    }
 }
 
+void platform_timer::expire_now() {
+   expired = 1;
+   call_expiration_callback();
+}
+
 void platform_timer::stop() {
    if(expired)
       return;
diff --git a/libraries/chain/platform_timer_kqueue.cpp b/libraries/chain/platform_timer_kqueue.cpp
index 3cb341a031..3e9dc68104 100644
--- a/libraries/chain/platform_timer_kqueue.cpp
+++ b/libraries/chain/platform_timer_kqueue.cpp
@@ -106,6 +106,11 @@ void platform_timer::start(fc::time_point tp) {
    }
 }
 
+void platform_timer::expire_now() {
+   expired = 1;
+   call_expiration_callback();
+}
+
 void platform_timer::stop() {
    if(expired)
       return;
diff --git a/libraries/chain/platform_timer_posix.cpp b/libraries/chain/platform_timer_posix.cpp
index bb000de5c3..0a50ebcade 100644
--- a/libraries/chain/platform_timer_posix.cpp
+++ b/libraries/chain/platform_timer_posix.cpp
@@ -71,6 +71,11 @@ void platform_timer::start(fc::time_point tp) {
    }
 }
 
+void platform_timer::expire_now() {
+   expired = 1;
+   call_expiration_callback();
+}
+
 void platform_timer::stop() {
    if(expired)
       return;
diff --git a/libraries/chain/transaction_context.cpp b/libraries/chain/transaction_context.cpp
index c59a9f2f8b..279b64a982 100644
--- a/libraries/chain/transaction_context.cpp
+++ b/libraries/chain/transaction_context.cpp
@@ -459,7 +459,10 @@ namespace eosio::chain {
          return;
 
       auto now = fc::time_point::now();
-      if( explicit_billed_cpu_time || deadline_exception_code == deadline_exception::code_value ) {
+      if (explicit_billed_cpu_time) {
+         EOS_THROW( interrupt_exception, "interrupt signaled, ran ${bt}us, start ${s}",
+                    ("bt", now - pseudo_start)("s", start) );
+      } else if( deadline_exception_code == deadline_exception::code_value ) {
          EOS_THROW( deadline_exception, "deadline exceeded ${billing_timer}us",
                      ("billing_timer", now - pseudo_start)("now", now)("deadline", _deadline)("start", start) );
       } else if( deadline_exception_code == block_cpu_usage_exceeded::code_value ) {
diff --git a/programs/nodeos/main.cpp b/programs/nodeos/main.cpp
index a73e7cc582..a3533da6ec 100644
--- a/programs/nodeos/main.cpp
+++ b/programs/nodeos/main.cpp
@@ -171,6 +171,8 @@ int main(int argc, char** argv)
       app->set_stop_executor_cb([&app]() {
          ilog("appbase quit called");
          app->get_io_context().stop();
+         auto& chain = app->get_plugin<chain_plugin>().chain();
+         chain.interrupt_transaction();
       });
       app->set_version(htonl(short_hash));
       app->set_version_string(eosio::version::version_client());
@@ -220,6 +222,9 @@ int main(int argc, char** argv)
             elog( "database dirty flag set (likely due to unclean shutdown): replay required" );
             return DATABASE_DIRTY;
          }
+      } else if (e.code() == interrupt_exception::code_value) {
+         ilog("Interrupted, successfully exiting");
+         return SUCCESS;
       }
       elog( "${e}", ("e", e.to_detail_string()));
       return OTHER_FAIL;

From 2ff14e76cec8a8c5717dd0d1966a9a3648749aa0 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Wed, 30 Oct 2024 13:53:39 -0500
Subject: [PATCH 04/17] GH-985 Add unittest for interrupt_transaction

---
 tests/chain_test_utils.hpp    | 35 ++++++++++++++++++++-----
 unittests/checktime_tests.cpp | 49 +++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/tests/chain_test_utils.hpp b/tests/chain_test_utils.hpp
index 67f4e5f30c..2e4de93272 100644
--- a/tests/chain_test_utils.hpp
+++ b/tests/chain_test_utils.hpp
@@ -49,6 +49,17 @@ struct reqactivated {
    }
 };
 
+inline private_key_type get_private_key( name keyname, string role ) {
+   if (keyname == config::system_account_name)
+      return private_key_type::regenerate<fc::ecc::private_key_shim>(fc::sha256::hash(std::string("nathan")));
+
+   return private_key_type::regenerate<fc::ecc::private_key_shim>(fc::sha256::hash(keyname.to_string()+role));
+}
+
+inline public_key_type  get_public_key( name keyname, string role ){
+   return get_private_key( keyname, role ).get_public_key();
+}
+
 // Create a read-only trx that works with bios reqactivated action
 inline auto make_bios_ro_trx(eosio::chain::controller& control) {
    const auto& pfm = control.get_protocol_feature_manager();
@@ -66,12 +77,7 @@ inline auto make_bios_ro_trx(eosio::chain::controller& control) {
 inline auto push_input_trx(appbase::scoped_app& app, eosio::chain::controller& control, account_name account, signed_transaction& trx) {
    trx.expiration = fc::time_point_sec{fc::time_point::now() + fc::seconds(30)};
    trx.set_reference_block( control.head().id() );
-   if (account == config::system_account_name) {
-      auto default_priv_key = private_key_type::regenerate<fc::ecc::private_key_shim>(fc::sha256::hash(std::string("nathan")));
-      trx.sign(default_priv_key, control.get_chain_id());
-   } else {
-      trx.sign(testing::tester::get_private_key(account, "active"), control.get_chain_id());
-   }
+   trx.sign(get_private_key(account, "active"), control.get_chain_id());
    auto ptrx = std::make_shared<packed_transaction>( trx, packed_transaction::compression_type::zlib );
 
    auto trx_promise = std::make_shared<std::promise<transaction_trace_ptr>>();
@@ -120,6 +126,23 @@ inline auto set_code(appbase::scoped_app& app, eosio::chain::controller& control
    return push_input_trx(app, control, account, trx);
 }
 
+inline transaction_trace_ptr create_account(appbase::scoped_app& app, eosio::chain::controller& control, account_name a, account_name creator) {
+   signed_transaction trx;
+
+   authority owner_auth{ get_public_key( a, "owner" ) };
+   authority active_auth{ get_public_key( a, "active" ) };
+
+   trx.actions.emplace_back( vector<permission_level>{{creator,config::active_name}},
+                             chain::newaccount{
+                                .creator  = creator,
+                                .name     = a,
+                                .owner    = owner_auth,
+                                .active   = active_auth,
+                             });
+
+   return push_input_trx(app, control, creator, trx);
+}
+
 inline void activate_protocol_features_set_bios_contract(appbase::scoped_app& app, chain_plugin* chain_plug) {
    using namespace appbase;
 
diff --git a/unittests/checktime_tests.cpp b/unittests/checktime_tests.cpp
index 37da24b8d7..dae5521b77 100644
--- a/unittests/checktime_tests.cpp
+++ b/unittests/checktime_tests.cpp
@@ -122,6 +122,55 @@ BOOST_AUTO_TEST_CASE_TEMPLATE( checktime_fail_tests, T, validating_testers ) { t
    BOOST_REQUIRE_EQUAL( t.validate(), true );
 } FC_LOG_AND_RETHROW() }
 
+BOOST_AUTO_TEST_CASE( checktime_interrupt_test) { try {
+   savanna_tester t;
+   savanna_tester other;
+   auto block = t.produce_block();
+   other.push_block(block);
+   t.create_account( "testapi"_n );
+   t.set_code( "testapi"_n, test_contracts::test_api_wasm() );
+   block = t.produce_block();
+   other.push_block(block);
+
+   auto [trace, b] = CALL_TEST_FUNCTION_WITH_BLOCK( t, "test_checktime", "checktime_pass", {});
+   BOOST_REQUIRE_EQUAL( b->transactions.size(), 1 );
+
+   // Make a copy of the valid block and swicth the checktime_pass transaction with checktime_failure
+   auto copy_b = std::make_shared<signed_block>(b->clone());
+   auto signed_tx = std::get<packed_transaction>(copy_b->transactions.back().trx).get_signed_transaction();
+   auto& act = signed_tx.actions.back();
+   constexpr chain::name checktime_fail_n{WASM_TEST_ACTION("test_checktime", "checktime_failure")};
+   act.name = checktime_fail_n;
+   act.data = fc::raw::pack(10000000000000000000ULL);
+   // Re-sign the transaction
+   signed_tx.signatures.clear();
+   signed_tx.sign(t.get_private_key("testapi"_n, "active"), t.get_chain_id());
+   // Replace the transaction
+   auto new_packed_tx = packed_transaction(signed_tx);
+   copy_b->transactions.back().trx = std::move(new_packed_tx);
+
+   // Re-calculate the transaction merkle
+   deque<digest_type> trx_digests;
+   const auto& trxs = copy_b->transactions;
+   for( const auto& a : trxs )
+      trx_digests.emplace_back( a.digest() );
+   copy_b->transaction_mroot = calculate_merkle( std::move(trx_digests) );
+   // Re-sign the block
+   copy_b->producer_signature = t.get_private_key(config::system_account_name, "active").sign(copy_b->calculate_id());
+
+   std::thread th( [&c=*other.control]() {
+      std::this_thread::sleep_for( std::chrono::milliseconds(50) );
+      c.interrupt_transaction();
+   } );
+
+   // apply block, caught in an "infinite" loop
+   BOOST_CHECK_EXCEPTION( other.push_block(copy_b), fc::exception,
+                          [](const fc::exception& e) { return e.code() == interrupt_exception::code_value; } );
+
+   th.join();
+
+} FC_LOG_AND_RETHROW() }
+
 BOOST_AUTO_TEST_CASE_TEMPLATE( checktime_pause_max_trx_cpu_extended_test, T, testers ) { try {
    fc::temp_directory tempdir;
    auto conf_genesis = tester::default_config( tempdir );

From a21cf18a6e43e564ed4c8f8a4af26078f0582aa1 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Wed, 30 Oct 2024 13:54:34 -0500
Subject: [PATCH 05/17] GH-985 Add integration test for interrupt_transaction

---
 .../test_control_api_plugin.cpp               |   4 +
 .../test_control_plugin.hpp                   |  13 ++-
 .../test_control_plugin.cpp                   |  91 ++++++++++++++--
 tests/CMakeLists.txt                          |   3 +
 tests/interrupt_trx_test.py                   |  99 ++++++++++++++++++
 .../payloadless/payloadless.abi               |  10 ++
 .../payloadless/payloadless.cpp               |  14 +++
 .../payloadless/payloadless.hpp               |   3 +
 .../payloadless/payloadless.wasm              | Bin 2918 -> 3192 bytes
 9 files changed, 230 insertions(+), 7 deletions(-)
 create mode 100755 tests/interrupt_trx_test.py
 mode change 100644 => 100755 unittests/test-contracts/payloadless/payloadless.wasm

diff --git a/plugins/test_control_api_plugin/test_control_api_plugin.cpp b/plugins/test_control_api_plugin/test_control_api_plugin.cpp
index e2b5a6892a..ce13bd6408 100644
--- a/plugins/test_control_api_plugin/test_control_api_plugin.cpp
+++ b/plugins/test_control_api_plugin/test_control_api_plugin.cpp
@@ -51,6 +51,10 @@ void test_control_api_plugin::plugin_startup() {
       TEST_CONTROL_RW_CALL(throw_on, 202, http_params_types::params_required)
    }, appbase::exec_queue::read_write);
 
+   app().get_plugin<http_plugin>().add_api({
+      TEST_CONTROL_RW_CALL(swap_action, 202, http_params_types::params_required)
+   }, appbase::exec_queue::read_write);
+
 }
 
 void test_control_api_plugin::plugin_shutdown() {}
diff --git a/plugins/test_control_plugin/include/eosio/test_control_plugin/test_control_plugin.hpp b/plugins/test_control_plugin/include/eosio/test_control_plugin/test_control_plugin.hpp
index 970e711673..3f772f4bc2 100644
--- a/plugins/test_control_plugin/include/eosio/test_control_plugin/test_control_plugin.hpp
+++ b/plugins/test_control_plugin/include/eosio/test_control_plugin/test_control_plugin.hpp
@@ -33,7 +33,17 @@ class read_write {
       };
       empty throw_on(const throw_on_params& params) const;
 
-   private:
+      // produce a next block with `from` action replaced with `to` action
+      // requires Savanna to be active, this assumes blocks are is_proper_svnn_block
+      struct swap_action_params {
+         chain::name from;  // replace from action in block to `to` action
+         chain::name to;
+         fc::crypto::private_key trx_priv_key;
+         fc::crypto::private_key blk_priv_key;
+      };
+      empty swap_action(const swap_action_params& params) const;
+
+private:
       test_control_ptr my;
 };
 
@@ -68,3 +78,4 @@ class test_control_plugin : public plugin<test_control_plugin> {
 FC_REFLECT(eosio::test_control_apis::empty, )
 FC_REFLECT(eosio::test_control_apis::read_write::kill_node_on_producer_params, (producer)(where_in_sequence)(based_on_lib) )
 FC_REFLECT(eosio::test_control_apis::read_write::throw_on_params, (signal)(exception) )
+FC_REFLECT(eosio::test_control_apis::read_write::swap_action_params, (from)(to)(trx_priv_key)(blk_priv_key) )
diff --git a/plugins/test_control_plugin/test_control_plugin.cpp b/plugins/test_control_plugin/test_control_plugin.cpp
index 8402623e30..8644e56fa9 100644
--- a/plugins/test_control_plugin/test_control_plugin.cpp
+++ b/plugins/test_control_plugin/test_control_plugin.cpp
@@ -12,10 +12,11 @@ class test_control_plugin_impl {
    void kill_on_head(account_name prod, uint32_t where_in_seq);
 
    void set_throw_on_options(const test_control_apis::read_write::throw_on_params& throw_options);
+   void set_swap_action_options(const test_control_apis::read_write::swap_action_params& swap_options);
 private:
    void block_start(chain::block_num_type block_num);
    void accepted_block_header(const chain::block_id_type& id);
-   void accepted_block(const chain::block_id_type& id);
+   void accepted_block(const chain::block_id_type& id, const chain::signed_block_ptr& block);
    void irreversible_block(const chain::block_id_type& id);
    void applied_transaction();
    void voted_block();
@@ -25,6 +26,9 @@ class test_control_plugin_impl {
    void reset_throw();
    void process_next_block_state(const chain::block_id_type& id);
 
+   void swap_action_in_block(const chain::signed_block_ptr& b);
+   void reset_swap_action() { _swap_on_options = {}; }
+
    chain::controller&  _chain;
    struct kill_options {
       account_name        _producer;
@@ -35,7 +39,8 @@ class test_control_plugin_impl {
       bool                _track_head{false};
    } _kill_options;
 
-   test_control_apis::read_write::throw_on_params _throw_options;
+   test_control_apis::read_write::throw_on_params    _throw_options;
+   test_control_apis::read_write::swap_action_params _swap_on_options;
 
    std::optional<boost::signals2::scoped_connection> _block_start_connection;
    std::optional<boost::signals2::scoped_connection> _accepted_block_header_connection;
@@ -59,7 +64,7 @@ void test_control_plugin_impl::connect() {
    _accepted_block_connection =
          _chain.accepted_block().connect( [&]( const chain::block_signal_params& t ) {
             const auto& [ block, id ] = t;
-            accepted_block( id );
+            accepted_block( id, block );
          } );
    _irreversible_block_connection.emplace(
          _chain.irreversible_block().connect( [&]( const chain::block_signal_params& t ) {
@@ -96,6 +101,71 @@ void test_control_plugin_impl::reset_throw() {
    _throw_options = test_control_apis::read_write::throw_on_params{};
 }
 
+void test_control_plugin_impl::swap_action_in_block(const chain::signed_block_ptr& b) {
+   if (b->transactions.empty())
+      return;
+
+   bool found = std::find_if(b->transactions.cbegin(), b->transactions.cend(), [&](const auto& t) {
+      return std::visit(chain::overloaded{
+                    [](const transaction_id_type&) { return false; },
+                    [&](const chain::packed_transaction& pt) {
+                       for (const auto& a : pt.get_transaction().actions) {
+                          if (a.name == _swap_on_options.from)
+                             return true;
+                       }
+                       return false;
+                    }
+                 }, t.trx);
+   }) != b->transactions.cend();
+   if (!found)
+      return;
+
+   if (!b->is_proper_svnn_block()) {
+      elog("Block is not a Savanna block, swap_action failed.");
+      return;
+   }
+
+   auto copy_b = std::make_shared<chain::signed_block>(b->clone());
+   copy_b->previous = b->calculate_id();
+   copy_b->block_extensions.clear(); // remove QC extension since header will claim same as previous block
+   copy_b->timestamp = b->timestamp.next();
+   // swap out action
+   for (auto& t : copy_b->transactions) {
+      std::visit(chain::overloaded{
+                    [](const transaction_id_type&) {},
+                    [&](chain::packed_transaction& pt) {
+                       for (auto& a : pt.get_transaction().actions) {
+                          if (a.name == _swap_on_options.from) {
+                             auto signed_tx = pt.get_signed_transaction();
+                             auto& act = signed_tx.actions.back();
+                             act.name = _swap_on_options.to;
+                             // Re-sign the transaction
+                             signed_tx.signatures.clear();
+                             signed_tx.sign(_swap_on_options.trx_priv_key, _chain.get_chain_id());
+                             // Replace the transaction
+                             auto new_packed_tx = packed_transaction(signed_tx);
+                             const_cast<packed_transaction&>(pt) = std::move(new_packed_tx);
+                          }
+                       }
+                    }
+                 }, t.trx);
+   }
+   // Re-calculate the transaction merkle
+   std::deque<chain::digest_type> trx_digests;
+   const auto& trxs = copy_b->transactions;
+   for( const auto& tr : trxs )
+      trx_digests.emplace_back( tr.digest() );
+   copy_b->transaction_mroot = chain::calculate_merkle( std::move(trx_digests) );
+   // Re-sign the block
+   copy_b->producer_signature = _swap_on_options.blk_priv_key.sign(copy_b->calculate_id());
+
+   // will be processed on the next start_block if is_new_best_head
+   const auto&[is_new_best_head, bh] = _chain.accept_block(copy_b->calculate_id(), copy_b);
+   ilog("Swapped action ${f} to ${t}, is_new_best_head ${bh}, block ${bn}",
+        ("f", _swap_on_options.from)("t", _swap_on_options.to)("bh", is_new_best_head)("bn", bh ? bh->block_num() : 0));
+   reset_swap_action();
+}
+
 void test_control_plugin_impl::block_start(chain::block_num_type block_num) {
    if (_throw_options.signal == "block_start")
       throw_exception();
@@ -106,11 +176,13 @@ void test_control_plugin_impl::accepted_block_header(const chain::block_id_type&
       throw_exception();
 }
 
-void test_control_plugin_impl::accepted_block(const chain::block_id_type& id) {
+void test_control_plugin_impl::accepted_block(const chain::block_id_type& id, const chain::signed_block_ptr& block) {
    if (_kill_options._track_head)
       process_next_block_state(id);
    if (_throw_options.signal == "accepted_block")
       throw_exception();
+   if (!_swap_on_options.from.empty())
+      swap_action_in_block(block);
 }
 
 void test_control_plugin_impl::irreversible_block(const chain::block_id_type& id) {
@@ -185,12 +257,13 @@ void test_control_plugin_impl::kill_on_head(account_name prod, uint32_t where_in
    _kill_options._track_head = true;
 }
 
-// ----------------- throw_on --------------------------------
-
 void test_control_plugin_impl::set_throw_on_options(const test_control_apis::read_write::throw_on_params& throw_options) {
    _throw_options = throw_options;
 }
 
+void test_control_plugin_impl::set_swap_action_options(const test_control_apis::read_write::swap_action_params& swap_options) {
+   _swap_on_options = swap_options;
+}
 
 test_control_plugin::test_control_plugin() = default;
 
@@ -230,5 +303,11 @@ empty read_write::throw_on(const read_write::throw_on_params& params) const {
    return {};
 }
 
+empty read_write::swap_action(const read_write::swap_action_params& params) const {
+   ilog("received swap_action: ${p}", ("p", params));
+   my->set_swap_action_options(params);
+   return {};
+}
+
 } // namespace test_control_apis
 } // namespace eosio
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2f4dd643ed..b50e052d29 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -89,6 +89,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nodeos_contrl_c_test.py ${CMAKE_CURRE
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/read_only_trx_test.py ${CMAKE_CURRENT_BINARY_DIR}/read_only_trx_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/resource_monitor_plugin_test.py ${CMAKE_CURRENT_BINARY_DIR}/resource_monitor_plugin_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/light_validation_sync_test.py ${CMAKE_CURRENT_BINARY_DIR}/light_validation_sync_test.py COPYONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/interrupt_trx_test.py ${CMAKE_CURRENT_BINARY_DIR}/interrupt_trx_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/trace_plugin_test.py ${CMAKE_CURRENT_BINARY_DIR}/trace_plugin_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nested_container_multi_index_test.py ${CMAKE_CURRENT_BINARY_DIR}/nested_container_multi_index_test.py COPYONLY)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/auto_bp_peering_test.py ${CMAKE_CURRENT_BINARY_DIR}/auto_bp_peering_test.py COPYONLY)
@@ -416,6 +417,8 @@ add_test(NAME light_validation_sync_test COMMAND tests/light_validation_sync_tes
 set_property(TEST light_validation_sync_test PROPERTY LABELS nonparallelizable_tests)
 add_test(NAME light_validation_sync_if_test COMMAND tests/light_validation_sync_test.py --activate-if -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 set_property(TEST light_validation_sync_if_test PROPERTY LABELS nonparallelizable_tests)
+add_test(NAME interrupt_trx_test COMMAND tests/interrupt_trx_test.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+set_property(TEST interrupt_trx_test PROPERTY LABELS nonparallelizable_tests)
 
 add_test(NAME auto_bp_peering_test COMMAND tests/auto_bp_peering_test.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
 set_property(TEST auto_bp_peering_test PROPERTY LABELS long_running_tests)
diff --git a/tests/interrupt_trx_test.py b/tests/interrupt_trx_test.py
new file mode 100755
index 0000000000..ceddfddfd1
--- /dev/null
+++ b/tests/interrupt_trx_test.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+
+import json
+import signal
+
+from TestHarness import Account, Cluster, Node, ReturnType, TestHelper, Utils, WalletMgr
+from TestHarness.TestHelper import AppArgs
+
+###############################################################
+# interrupt_trx_test
+#
+# Test applying a block with an infinite trx and verify SIGTERM kill
+# interrupts the transaction and aborts the block.
+# 
+###############################################################
+
+# Parse command line arguments
+args = TestHelper.parse_args({"-v","--dump-error-details","--leave-running","--keep-logs","--unshared"})
+Utils.Debug = args.v
+dumpErrorDetails=args.dump_error_details
+dontKill=args.leave_running
+keepLogs=args.keep_logs
+
+EOSIO_ACCT_PRIVATE_DEFAULT_KEY = "5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3"
+EOSIO_ACCT_PUBLIC_DEFAULT_KEY = "EOS6MRyAjQq8ud7hVNYcfnVPJqcVpscN5So8BhtHuGYqET5GDW5CV"
+
+walletMgr=WalletMgr(True)
+cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs)
+cluster.setWalletMgr(walletMgr)
+
+testSuccessful = False
+try:
+    TestHelper.printSystemInfo("BEGIN")
+    assert cluster.launch(
+        pnodes=1,
+        prodCount=1,
+        totalProducers=1,
+        totalNodes=2,
+        loadSystemContract=False,
+        activateIF=True,
+        extraNodeosArgs="--plugin eosio::test_control_api_plugin")
+
+    prodNode = cluster.getNode(0)
+    validationNode = cluster.getNode(1)
+
+    # Create a transaction to create an account
+    Utils.Print("create a new account payloadless from the producer node")
+    payloadlessAcc = Account("payloadless")
+    payloadlessAcc.ownerPublicKey = EOSIO_ACCT_PUBLIC_DEFAULT_KEY
+    payloadlessAcc.activePublicKey = EOSIO_ACCT_PUBLIC_DEFAULT_KEY
+    prodNode.createAccount(payloadlessAcc, cluster.eosioAccount)
+
+    contractDir="unittests/test-contracts/payloadless"
+    wasmFile="payloadless.wasm"
+    abiFile="payloadless.abi"
+    Utils.Print("Publish payloadless contract")
+    trans = prodNode.publishContract(payloadlessAcc, contractDir, wasmFile, abiFile, waitForTransBlock=True)
+
+    contract="payloadless"
+    action="doit"
+    data="{}"
+    opts="--permission payloadless@active"
+    trans=prodNode.pushMessage(contract, action, data, opts)
+    assert trans and trans[0], "Failed to push doit action"
+
+    action="doitslow"
+    trans=prodNode.pushMessage(contract, action, data, opts)
+    assert trans and trans[0], "Failed to push doitslow action"
+
+    action="doitforever"
+    trans=prodNode.pushMessage(contract, action, data, opts, silentErrors=True)
+    assert trans and not trans[0], "push doitforever action did not fail as expected"
+
+    prodNode.processUrllibRequest("test_control", "swap_action", {"from": "doitslow", "to": "doitforever"})
+
+    action="doitslow"
+    trans=prodNode.pushMessage(contract, action, data, opts)
+    assert trans and trans[0], "Failed to push doitslow action"
+
+    prodNode.waitForProducer("defproducera")
+
+    # Needs https://github.com/AntelopeIO/spring/issues/876
+    # prodNode.processUrllibRequest("test_control", "swap_action",
+    #                               {"from":"doitslow", "to":"doitforever",
+    #                                "trx_priv_key":EOSIO_ACCT_PRIVATE_DEFAULT_KEY,
+    #                                "blk_priv_key":cluster.defproduceraAccount.activePrivateKey})
+    #
+    # assert not prodNode.waitForHeadToAdvance(3), f"prodNode did advance head after doitforever action"
+    #
+    # prodNode.popenProc.send_signal(signal.SIGTERM)
+    #
+    # assert not prodNode.verifyAlive(), "prodNode did not exit from SIGTERM"
+
+    testSuccessful = True
+finally:
+    TestHelper.shutdown(cluster, walletMgr, testSuccessful, dumpErrorDetails)
+
+exitCode = 0 if testSuccessful else 1
+exit(exitCode)
diff --git a/unittests/test-contracts/payloadless/payloadless.abi b/unittests/test-contracts/payloadless/payloadless.abi
index aafa35c171..f78c5c42f0 100644
--- a/unittests/test-contracts/payloadless/payloadless.abi
+++ b/unittests/test-contracts/payloadless/payloadless.abi
@@ -8,6 +8,11 @@
             "base": "",
             "fields": []
         },
+        {
+            "name": "doitforever",
+            "base": "",
+            "fields": []
+        },
         {
             "name": "doitslow",
             "base": "",
@@ -20,6 +25,11 @@
             "type": "doit",
             "ricardian_contract": ""
         },
+        {
+            "name": "doitforever",
+            "type": "doitforever",
+            "ricardian_contract": ""
+        },
         {
             "name": "doitslow",
             "type": "doitslow",
diff --git a/unittests/test-contracts/payloadless/payloadless.cpp b/unittests/test-contracts/payloadless/payloadless.cpp
index e9556ca5ed..d7dcc99d4d 100644
--- a/unittests/test-contracts/payloadless/payloadless.cpp
+++ b/unittests/test-contracts/payloadless/payloadless.cpp
@@ -50,3 +50,17 @@ void payloadless::doitslow() {
    }
 }
 
+void payloadless::doitforever() {
+   print("Im a payloadless forever action");
+   constexpr size_t max_cpu_prime = std::numeric_limits<size_t>::max();
+
+   while (true) {
+      for (size_t p = 2; p <= max_cpu_prime; p += 1) {
+         if (is_prime(p) && is_mersenne_prime(p)) {
+            // We need to keep an eye on this to make sure it doesn't get optimized out. So far so good.
+            //eosio::print_f(" %u", p);
+         }
+      }
+   }
+}
+
diff --git a/unittests/test-contracts/payloadless/payloadless.hpp b/unittests/test-contracts/payloadless/payloadless.hpp
index 0fea87a29b..10f696fded 100644
--- a/unittests/test-contracts/payloadless/payloadless.hpp
+++ b/unittests/test-contracts/payloadless/payloadless.hpp
@@ -11,4 +11,7 @@ class [[eosio::contract]] payloadless : public eosio::contract {
 
    [[eosio::action]]
    void doitslow();
+
+   [[eosio::action]]
+   void doitforever();
 };
diff --git a/unittests/test-contracts/payloadless/payloadless.wasm b/unittests/test-contracts/payloadless/payloadless.wasm
old mode 100644
new mode 100755
index 53cc07f5603f905f772a3a84bbac90ce58f901b1..6c8396afb64ee4691b9380a46ca213cc3ad5f5ac
GIT binary patch
delta 408
zcmX|)F-rqM5QS%E_dLzsnImclsdf`B>@6&BB@qLH*xMZO1Y_czQN)0F`2$Meu<$2n
zA%cjY7V!sYX=P!fg<xT4=_a7l%zV5z?|tNd3Ri2m;s|?mEMp4*$8orYF(O(F0Y~yI
zLEY_xNI)My{x3U6BdxF%wL1h>5Xa39IGKk@!NJRl%k&(laUy-jd|}dB=)QiwT;9E(
zJb|QD?n{v9O{MQxzP9PcPI|~F!9z;)S06mkq>%aKOVqUJ8n4D3MFUzgL{%4PsNQ;R
ziJetoIB&Xc1j#NGcTniAnFGaz<eHf;Z7E_VP?A1sL{--uk}oUEFg0=}+;TyPK~qDa
z1LFFAYz~(yNJ$w=R3zWS68kG=y31>WE^l0sDKlA2XB#@+*(%XA4B2BvVGKEwdWz|H
rW)Z9DyqKA-wPYaUpwo<k^=7!cD>tHcxF5D<P~Y2(wgEH<-je{oaIaBZ

delta 141
zcmew%@l0$(45Kd>Cp!lx7dtlt4-X3~V*w*0BP%l_10$Olb3LPD!vO~FdIrb02#Q^Z
zi7huZH@~QoiGek-pdhD`frsm?5GM<R06+J%$*UPv7=<QZU=(8%p8Stdd9xDJ7ItPY
mM&`-;x$Gvd<od+KT{`(Ow;QACWGx<5P6kH?Zk7TC1_l7F)*!_I


From ecf2d0a6126cb4a1a6a5d92be14b1ff8b2d98aef Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Thu, 31 Oct 2024 09:49:51 -0500
Subject: [PATCH 06/17] GH-985 Better logic for determining if deadline timer
 was because of interrupt

---
 libraries/chain/transaction_context.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libraries/chain/transaction_context.cpp b/libraries/chain/transaction_context.cpp
index 279b64a982..420b22ebec 100644
--- a/libraries/chain/transaction_context.cpp
+++ b/libraries/chain/transaction_context.cpp
@@ -459,10 +459,10 @@ namespace eosio::chain {
          return;
 
       auto now = fc::time_point::now();
-      if (explicit_billed_cpu_time) {
+      if (explicit_billed_cpu_time && block_deadline > now) {
          EOS_THROW( interrupt_exception, "interrupt signaled, ran ${bt}us, start ${s}",
                     ("bt", now - pseudo_start)("s", start) );
-      } else if( deadline_exception_code == deadline_exception::code_value ) {
+      } else if( explicit_billed_cpu_time || deadline_exception_code == deadline_exception::code_value ) {
          EOS_THROW( deadline_exception, "deadline exceeded ${billing_timer}us",
                      ("billing_timer", now - pseudo_start)("now", now)("deadline", _deadline)("start", start) );
       } else if( deadline_exception_code == block_cpu_usage_exceeded::code_value ) {

From d0ea755794df1a64381bbc4514acc571ec2c6cc8 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Thu, 31 Oct 2024 09:50:21 -0500
Subject: [PATCH 07/17] GH-985 Avoid multiple expiration callback calls.

---
 .../chain/platform_timer_asio_fallback.cpp    | 22 +++++++------
 libraries/chain/platform_timer_kqueue.cpp     | 22 +++++++------
 libraries/chain/platform_timer_posix.cpp      | 32 +++++++++++--------
 3 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/libraries/chain/platform_timer_asio_fallback.cpp b/libraries/chain/platform_timer_asio_fallback.cpp
index 7ec808e128..b82552e412 100644
--- a/libraries/chain/platform_timer_asio_fallback.cpp
+++ b/libraries/chain/platform_timer_asio_fallback.cpp
@@ -57,36 +57,40 @@ platform_timer::~platform_timer() {
 
 void platform_timer::start(fc::time_point tp) {
    if(tp == fc::time_point::maximum()) {
-      expired = 0;
+      expired = false;
       return;
    }
    fc::microseconds x = tp.time_since_epoch() - fc::time_point::now().time_since_epoch();
    if(x.count() <= 0)
-      expired = 1;
+      expired = true;
    else {
 #if 0
       std::promise<void> p;
       auto f = p.get_future();
       checktime_ios->post([&p,this]() {
-         expired = 0;
+         expired = false;
          p.set_value();
       });
       f.get();
 #endif
-      expired = 0;
+      expired = false;
       my->timer->expires_after(std::chrono::microseconds(x.count()));
       my->timer->async_wait([this](const boost::system::error_code& ec) {
          if(ec)
             return;
-         expired = 1;
-         call_expiration_callback();
+         bool expected = false;
+         if (expired.compare_exchange_strong(expected, true)) {
+            call_expiration_callback();
+         }
       });
    }
 }
 
 void platform_timer::expire_now() {
-   expired = 1;
-   call_expiration_callback();
+   bool expected = false;
+   if (expired.compare_exchange_strong(expected, true)) {
+      call_expiration_callback();
+   }
 }
 
 void platform_timer::stop() {
@@ -94,7 +98,7 @@ void platform_timer::stop() {
       return;
 
    my->timer->cancel();
-   expired = 1;
+   expired = true;
 }
 
 }}
diff --git a/libraries/chain/platform_timer_kqueue.cpp b/libraries/chain/platform_timer_kqueue.cpp
index 3e9dc68104..823428c804 100644
--- a/libraries/chain/platform_timer_kqueue.cpp
+++ b/libraries/chain/platform_timer_kqueue.cpp
@@ -58,8 +58,10 @@ platform_timer::platform_timer() {
 
             if(c == 1 && anEvent.filter == EVFILT_TIMER) {
                platform_timer* self = (platform_timer*)anEvent.udata;
-               self->expired = 1;
-               self->call_expiration_callback();
+               bool expected = false;
+               if (self->expired.compare_exchange_strong(expected, true)) {
+                  self->call_expiration_callback();
+               }
             }
             else if(c == 1 && anEvent.filter == EVFILT_USER)
                return;
@@ -90,25 +92,27 @@ platform_timer::~platform_timer() {
 
 void platform_timer::start(fc::time_point tp) {
    if(tp == fc::time_point::maximum()) {
-      expired = 0;
+      expired = false;
       return;
    }
    fc::microseconds x = tp.time_since_epoch() - fc::time_point::now().time_since_epoch();
    if(x.count() <= 0)
-      expired = 1;
+      expired = true;
    else {
       struct kevent64_s aTimerEvent;
       EV_SET64(&aTimerEvent, my->timerid, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, NOTE_USECONDS|NOTE_CRITICAL, x.count(), (uint64_t)this, 0, 0);
 
-      expired = 0;
+      expired = false;
       if(kevent64(kqueue_fd, &aTimerEvent, 1, NULL, 0, KEVENT_FLAG_IMMEDIATE, NULL) != 0)
-         expired = 1;
+         expired = true;
    }
 }
 
 void platform_timer::expire_now() {
-   expired = 1;
-   call_expiration_callback();
+   bool expected = false;
+   if (expired.compare_exchange_strong(expected, true)) {
+      call_expiration_callback();
+   }
 }
 
 void platform_timer::stop() {
@@ -118,7 +122,7 @@ void platform_timer::stop() {
    struct kevent64_s stop_timer_event;
    EV_SET64(&stop_timer_event, my->timerid, EVFILT_TIMER, EV_DELETE, 0, 0, 0, 0, 0);
    kevent64(kqueue_fd, &stop_timer_event, 1, NULL, 0, KEVENT_FLAG_IMMEDIATE, NULL);
-   expired = 1;
+   expired = true;
 }
 
 }}
diff --git a/libraries/chain/platform_timer_posix.cpp b/libraries/chain/platform_timer_posix.cpp
index 0a50ebcade..3702333c32 100644
--- a/libraries/chain/platform_timer_posix.cpp
+++ b/libraries/chain/platform_timer_posix.cpp
@@ -5,12 +5,14 @@
 #include <fc/fwd_impl.hpp>
 #include <fc/exception/exception.hpp>
 
+#include <atomic>
 #include <mutex>
 
 #include <signal.h>
 #include <time.h>
+#include <sys/types.h>
 
-namespace eosio { namespace chain {
+namespace eosio::chain {
 
 static_assert(std::atomic_bool::is_always_lock_free, "Only lock-free atomics AS-safe.");
 
@@ -19,8 +21,10 @@ struct platform_timer::impl {
 
    static void sig_handler(int, siginfo_t* si, void*) {
       platform_timer* self = (platform_timer*)si->si_value.sival_ptr;
-      self->expired = 1;
-      self->call_expiration_callback();
+      bool expected = false;
+      if (self->expired.compare_exchange_strong(expected, true)) {
+         self->call_expiration_callback();
+      }
    }
 };
 
@@ -28,9 +32,9 @@ platform_timer::platform_timer() {
    static_assert(sizeof(impl) <= fwd_size);
 
    static bool initialized;
-   static std::mutex initalized_mutex;
+   static std::mutex initialized_mutex;
 
-   if(std::lock_guard guard(initalized_mutex); !initialized) {
+   if(std::lock_guard guard(initialized_mutex); !initialized) {
       struct sigaction act;
       sigemptyset(&act.sa_mask);
       act.sa_sigaction = impl::sig_handler;
@@ -55,25 +59,27 @@ platform_timer::~platform_timer() {
 
 void platform_timer::start(fc::time_point tp) {
    if(tp == fc::time_point::maximum()) {
-      expired = 0;
+      expired = false;
       return;
    }
    fc::microseconds x = tp.time_since_epoch() - fc::time_point::now().time_since_epoch();
    if(x.count() <= 0)
-      expired = 1;
+      expired = true;
    else {
       time_t secs = x.count() / 1000000;
       long nsec = (x.count() - (secs*1000000)) * 1000;
       struct itimerspec enable = {{0, 0}, {secs, nsec}};
-      expired = 0;
+      expired = false;
       if(timer_settime(my->timerid, 0, &enable, NULL) != 0)
-         expired = 1;
+         expired = true;
    }
 }
 
 void platform_timer::expire_now() {
-   expired = 1;
-   call_expiration_callback();
+   bool expected = false;
+   if (expired.compare_exchange_strong(expected, true)) {
+      call_expiration_callback();
+   }
 }
 
 void platform_timer::stop() {
@@ -81,7 +87,7 @@ void platform_timer::stop() {
       return;
    struct itimerspec disable = {{0, 0}, {0, 0}};
    timer_settime(my->timerid, 0, &disable, NULL);
-   expired = 1;
+   expired = true;
 }
 
-}}
+}

From 078781af1794767cbf8b6d13275d9fe07104fe55 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Thu, 31 Oct 2024 13:54:59 -0500
Subject: [PATCH 08/17] GH-985 Better logging for interrupted apply_block trx

---
 libraries/chain/controller.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp
index 8fa55cadf0..474de6503e 100644
--- a/libraries/chain/controller.cpp
+++ b/libraries/chain/controller.cpp
@@ -3813,7 +3813,7 @@ struct controller_impl {
 
                if( transaction_failed && !transaction_can_fail) {
                   if (trace->except->code() == interrupt_exception::code_value) {
-                     ilog("Interrupt of trx: ${t}", ("t", *trace));
+                     ilog("Interrupt of trx id: ${id}", ("id", trace->id));
                   } else {
                      edump((*trace));
                   }
@@ -4370,7 +4370,15 @@ struct controller_impl {
          log_irreversible();
          transition_to_savanna_if_needed();
          return controller::apply_blocks_result::complete;
-      } FC_LOG_AND_RETHROW( )
+      } catch (fc::exception& e) {
+         if (e.code() != interrupt_exception::code_value) {
+            wlog("${d}", ("d",e.to_detail_string()));
+            FC_RETHROW_EXCEPTION(e, warn, "rethrow");
+         }
+         throw;
+      } catch (...) {
+         try { throw; } FC_LOG_AND_RETHROW()
+      }
    }
 
    controller::apply_blocks_result maybe_apply_blocks( const forked_callback_t& forked_cb, const trx_meta_cache_lookup& trx_lookup )

From 7fef19b97fdd8b3d349e725439f76e13754128c0 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Fri, 1 Nov 2024 08:16:02 -0500
Subject: [PATCH 09/17] Update to appbase with signal handling on different
 thread

---
 libraries/appbase | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libraries/appbase b/libraries/appbase
index 13090992fe..5e09479dde 160000
--- a/libraries/appbase
+++ b/libraries/appbase
@@ -1 +1 @@
-Subproject commit 13090992fe675234716fa69d8bd037dbc4755787
+Subproject commit 5e09479dde4ce251b2dde9f17537ea234e599df6

From f8f327c0cb057331eca932fbe3a923a1fcd0e2b3 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Fri, 1 Nov 2024 08:30:23 -0500
Subject: [PATCH 10/17] GH-985 Update test now that appbase supports interrupt
 off main thread

---
 tests/interrupt_trx_test.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/interrupt_trx_test.py b/tests/interrupt_trx_test.py
index ceddfddfd1..b39172f4c1 100755
--- a/tests/interrupt_trx_test.py
+++ b/tests/interrupt_trx_test.py
@@ -79,17 +79,16 @@
 
     prodNode.waitForProducer("defproducera")
 
-    # Needs https://github.com/AntelopeIO/spring/issues/876
-    # prodNode.processUrllibRequest("test_control", "swap_action",
-    #                               {"from":"doitslow", "to":"doitforever",
-    #                                "trx_priv_key":EOSIO_ACCT_PRIVATE_DEFAULT_KEY,
-    #                                "blk_priv_key":cluster.defproduceraAccount.activePrivateKey})
-    #
-    # assert not prodNode.waitForHeadToAdvance(3), f"prodNode did advance head after doitforever action"
-    #
-    # prodNode.popenProc.send_signal(signal.SIGTERM)
-    #
-    # assert not prodNode.verifyAlive(), "prodNode did not exit from SIGTERM"
+    prodNode.processUrllibRequest("test_control", "swap_action",
+                                  {"from":"doitslow", "to":"doitforever",
+                                   "trx_priv_key":EOSIO_ACCT_PRIVATE_DEFAULT_KEY,
+                                   "blk_priv_key":cluster.defproduceraAccount.activePrivateKey})
+
+    assert not prodNode.waitForHeadToAdvance(3), f"prodNode did advance head after doitforever action"
+
+    prodNode.interruptAndVerifyExitStatus()
+
+    assert not prodNode.verifyAlive(), "prodNode did not exit from SIGINT"
 
     testSuccessful = True
 finally:

From 8012d037b0ac8cd90850da14ce62128439912029 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Fri, 1 Nov 2024 09:49:41 -0500
Subject: [PATCH 11/17] GH-985 chain is not available until after initialize

---
 programs/nodeos/main.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/programs/nodeos/main.cpp b/programs/nodeos/main.cpp
index a3533da6ec..bf161cf79a 100644
--- a/programs/nodeos/main.cpp
+++ b/programs/nodeos/main.cpp
@@ -168,12 +168,6 @@ int main(int argc, char** argv)
       uint32_t short_hash = 0;
       fc::from_hex(eosio::version::version_hash(), (char*)&short_hash, sizeof(short_hash));
 
-      app->set_stop_executor_cb([&app]() {
-         ilog("appbase quit called");
-         app->get_io_context().stop();
-         auto& chain = app->get_plugin<chain_plugin>().chain();
-         chain.interrupt_transaction();
-      });
       app->set_version(htonl(short_hash));
       app->set_version_string(eosio::version::version_client());
       app->set_full_version_string(eosio::version::version_full());
@@ -194,6 +188,12 @@ int main(int argc, char** argv)
          }
          return INITIALIZE_FAIL;
       }
+      controller& chain = app->get_plugin<chain_plugin>().chain();
+      app->set_stop_executor_cb([&app, &chain]() {
+         ilog("appbase quit called");
+         chain.interrupt_transaction();
+         app->get_io_context().stop();
+      });
       if (auto resmon_plugin = app->find_plugin<resource_monitor_plugin>()) {
          resmon_plugin->monitor_directory(app->data_dir());
       } else {

From cae2e950552a2b8ce430271477f956ffada749fd Mon Sep 17 00:00:00 2001
From: Lin Huang <lin.huang@eosnetwork.com>
Date: Fri, 1 Nov 2024 11:29:52 -0400
Subject: [PATCH 12/17] Fix a typo

---
 tests/ship_restart_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ship_restart_test.py b/tests/ship_restart_test.py
index fbc410c50d..9637d2313e 100755
--- a/tests/ship_restart_test.py
+++ b/tests/ship_restart_test.py
@@ -59,14 +59,14 @@ def equalFiles(file1, file2):
                 return True
 
 # Verifies that SHiP should fail to restart with a corrupted first entry header
-def corruptedHeaderTest(pos, curruptedValue, shipNode):
+def corruptedHeaderTest(pos, corruptedValue, shipNode):
     # restore log and index
     shutil.copyfile(origStateHistoryLog, stateHistoryLog)
     shutil.copyfile(origStateHistoryIndex, stateHistoryIndex)
 
     with open(stateHistoryLog, 'rb+') as f: # opened as binary file
         f.seek(pos) # seek to the position to corrupt
-        f.write(curruptedValue) # corrupt it
+        f.write(corruptedValue) # corrupt it
 
     isRelaunchSuccess = shipNode.relaunch()
     assert not isRelaunchSuccess, "SHiP node should have failed to relaunch"

From bb4d159218eb5fb6603f0c82702800c7bd4dae90 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Fri, 1 Nov 2024 10:38:54 -0500
Subject: [PATCH 13/17] GH-985 Remove duplicate code

---
 libraries/chain/platform_timer_asio_fallback.cpp | 14 +-------------
 libraries/chain/platform_timer_kqueue.cpp        |  5 +----
 libraries/chain/platform_timer_posix.cpp         |  5 +----
 3 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/libraries/chain/platform_timer_asio_fallback.cpp b/libraries/chain/platform_timer_asio_fallback.cpp
index b82552e412..547bc5dcc7 100644
--- a/libraries/chain/platform_timer_asio_fallback.cpp
+++ b/libraries/chain/platform_timer_asio_fallback.cpp
@@ -64,24 +64,12 @@ void platform_timer::start(fc::time_point tp) {
    if(x.count() <= 0)
       expired = true;
    else {
-#if 0
-      std::promise<void> p;
-      auto f = p.get_future();
-      checktime_ios->post([&p,this]() {
-         expired = false;
-         p.set_value();
-      });
-      f.get();
-#endif
       expired = false;
       my->timer->expires_after(std::chrono::microseconds(x.count()));
       my->timer->async_wait([this](const boost::system::error_code& ec) {
          if(ec)
             return;
-         bool expected = false;
-         if (expired.compare_exchange_strong(expected, true)) {
-            call_expiration_callback();
-         }
+         expire_now();
       });
    }
 }
diff --git a/libraries/chain/platform_timer_kqueue.cpp b/libraries/chain/platform_timer_kqueue.cpp
index 823428c804..16f076a0cb 100644
--- a/libraries/chain/platform_timer_kqueue.cpp
+++ b/libraries/chain/platform_timer_kqueue.cpp
@@ -58,10 +58,7 @@ platform_timer::platform_timer() {
 
             if(c == 1 && anEvent.filter == EVFILT_TIMER) {
                platform_timer* self = (platform_timer*)anEvent.udata;
-               bool expected = false;
-               if (self->expired.compare_exchange_strong(expected, true)) {
-                  self->call_expiration_callback();
-               }
+               self->expire_now();
             }
             else if(c == 1 && anEvent.filter == EVFILT_USER)
                return;
diff --git a/libraries/chain/platform_timer_posix.cpp b/libraries/chain/platform_timer_posix.cpp
index 3702333c32..4388fa18b9 100644
--- a/libraries/chain/platform_timer_posix.cpp
+++ b/libraries/chain/platform_timer_posix.cpp
@@ -21,10 +21,7 @@ struct platform_timer::impl {
 
    static void sig_handler(int, siginfo_t* si, void*) {
       platform_timer* self = (platform_timer*)si->si_value.sival_ptr;
-      bool expected = false;
-      if (self->expired.compare_exchange_strong(expected, true)) {
-         self->call_expiration_callback();
-      }
+      self->expire_now();
    }
 };
 

From 7cc8ea4ec67a512551ed3f9968c861550d1ac159 Mon Sep 17 00:00:00 2001
From: Lin Huang <lin.huang@eosnetwork.com>
Date: Fri, 1 Nov 2024 12:05:20 -0400
Subject: [PATCH 14/17] Add file open mode as an argument to
 Utils.compareFiles(); use Utils.compareFiles() instead of own equalFiles()

---
 tests/TestHarness/testUtils.py |  6 +++---
 tests/ship_restart_test.py     | 35 ++++++++--------------------------
 2 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/tests/TestHarness/testUtils.py b/tests/TestHarness/testUtils.py
index 3253715c60..f44f0b8ead 100644
--- a/tests/TestHarness/testUtils.py
+++ b/tests/TestHarness/testUtils.py
@@ -514,9 +514,9 @@ def compare(obj1,obj2,context):
         return "comparison of %s type is not supported, context=%s" % (typeName,context)
 
     @staticmethod
-    def compareFiles(file1: str, file2: str):
-        f1 = open(file1)
-        f2 = open(file2)
+    def compareFiles(file1: str, file2: str, mode="r"):
+        f1 = open(file1, mode)
+        f2 = open(file2, mode)
 
         i = 0
         same = True
diff --git a/tests/ship_restart_test.py b/tests/ship_restart_test.py
index 9637d2313e..de059460f3 100755
--- a/tests/ship_restart_test.py
+++ b/tests/ship_restart_test.py
@@ -39,25 +39,6 @@
 origStateHistoryIndex = ""
 stateHistoryIndex     = ""
 
-# Returns True if file1 and file2 contain the same content
-def equalFiles(file1, file2):
-    # not the same if sizes are different
-    if os.path.getsize(file1) != os.path.getsize(file2):
-        return False
-
-    readSize=1024
-    with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
-        while True:
-            bytes1 = f1.read(readSize)
-            bytes2 = f2.read(readSize)
-
-            if bytes1 != bytes2:
-                return False
-
-            # end of both files
-            if not bytes1:
-                return True
-
 # Verifies that SHiP should fail to restart with a corrupted first entry header
 def corruptedHeaderTest(pos, corruptedValue, shipNode):
     # restore log and index
@@ -121,8 +102,8 @@ def corruptedHeaderTest(pos, corruptedValue, shipNode):
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
-    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log unchanged
-    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index regenerated
+    assert Utils.compareFiles(stateHistoryLog, origStateHistoryLog, mode="rb") # log unchanged
+    assert Utils.compareFiles(stateHistoryIndex, origStateHistoryIndex, mode="rb") # index regenerated
 
     shipNode.kill(signal.SIGTERM) # shut down ship node for next test
 
@@ -139,8 +120,8 @@ def corruptedHeaderTest(pos, corruptedValue, shipNode):
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
-    assert equalFiles(stateHistoryLog, origStateHistoryLog)
-    assert equalFiles(stateHistoryIndex, origStateHistoryIndex)
+    assert Utils.compareFiles(stateHistoryLog, origStateHistoryLog, mode="rb")
+    assert Utils.compareFiles(stateHistoryIndex, origStateHistoryIndex, mode="rb")
     '''
 
     #-------- Truncate index file. It should be regenerated
@@ -158,8 +139,8 @@ def corruptedHeaderTest(pos, corruptedValue, shipNode):
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
-    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log file unchanged
-    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index file regenerated
+    assert Utils.compareFiles(stateHistoryLog, origStateHistoryLog, mode="rb") # log file unchanged
+    assert Utils.compareFiles(stateHistoryIndex, origStateHistoryIndex, mode="rb") # index file regenerated
 
     shipNode.kill(signal.SIGTERM) # shut down it for next test
 
@@ -177,8 +158,8 @@ def corruptedHeaderTest(pos, corruptedValue, shipNode):
 
     isRelaunchSuccess = shipNode.relaunch()
     assert isRelaunchSuccess, "Failed to relaunch shipNode"
-    assert equalFiles(stateHistoryLog, origStateHistoryLog) # log file not changed
-    assert equalFiles(stateHistoryIndex, origStateHistoryIndex) # index file regenerated
+    assert Utils.compareFiles(stateHistoryLog, origStateHistoryLog, mode="rb") # log file not changed
+    assert Utils.compareFiles(stateHistoryIndex, origStateHistoryIndex, mode="rb") # index file regenerated
 
     shipNode.kill(signal.SIGTERM) # shut down it for next test
 

From e636de3e89917e0778195aa40f44c7f95d1c7ed8 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Mon, 4 Nov 2024 10:00:59 -0600
Subject: [PATCH 15/17] GH-1003 Advance forkdb root when pending lib greater
 than head

---
 libraries/chain/controller.cpp | 9 ++++++---
 unittests/fork_db_tests.cpp    | 8 ++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp
index bda29fa837..07c7c2133a 100644
--- a/libraries/chain/controller.cpp
+++ b/libraries/chain/controller.cpp
@@ -1497,9 +1497,12 @@ struct controller_impl {
          assert(!irreversible_mode() || forkdb.head());
          const auto& head_id = irreversible_mode() ? forkdb.head()->id() : chain_head.id();
          // verifies lib is on head branch, otherwise returns an empty branch
-         // The new lib needs to be on the head branch because the forkdb.advance_root() below could purge blocks that
-         // would be needed to be re-applied on a fork switch from the exiting chain_head.
-         auto branch = forkdb.fetch_branch(head_id, new_lib_id);
+         //   The new lib needs to be on the head branch because the forkdb.advance_root() below could purge blocks that
+         //   would be needed to be re-applied on a fork switch from the exiting chain_head.
+         // Pending LIB can be greater than chain head, for example when syncing, in that case fetch branch from the
+         // pending LIB. If the pending LIB not found on the head branch then fetch_branch returns an empty branch.
+         // Otherwise fetch_branch will return from chain_head to root iff chain_head on pending LIB branch.
+         auto branch = new_lib_num <= chain_head.block_num() ? forkdb.fetch_branch(head_id, new_lib_id) : forkdb.fetch_branch(new_lib_id, head_id);
          try {
             auto should_process = [&](auto& bsp) {
                // Only make irreversible blocks that have been validated. Blocks in the fork database may not be on our current best head
diff --git a/unittests/fork_db_tests.cpp b/unittests/fork_db_tests.cpp
index 410fe3c63d..31e9fed7a9 100644
--- a/unittests/fork_db_tests.cpp
+++ b/unittests/fork_db_tests.cpp
@@ -138,6 +138,14 @@ BOOST_FIXTURE_TEST_CASE(add_remove_test, generate_forkdb_state) try {
    BOOST_REQUIRE(branch.size() == 2);
    BOOST_TEST(branch[0] == bsp12a);
    BOOST_TEST(branch[1] == bsp11a);
+
+   // test fetch branch when lib is greater than head
+   branch = forkdb.fetch_branch(bsp13b->id(), bsp12a->id());
+   BOOST_TEST(branch.empty());
+   branch = forkdb.fetch_branch(bsp13b->id(), bsp12b->id());
+   BOOST_REQUIRE(branch.size() == 2);
+   BOOST_TEST(branch[0] == bsp12b);
+   BOOST_TEST(branch[1] == bsp11b);
 } FC_LOG_AND_RETHROW();
 
 
From cef82331be203a4f4d59e7853f75a9a491b679c0 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Mon, 4 Nov 2024 11:45:45 -0600
Subject: [PATCH 16/17] GH-1003 Use forkdb head in irreversible mode

---
 libraries/chain/controller.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp
index 07c7c2133a..161aac3191 100644
--- a/libraries/chain/controller.cpp
+++ b/libraries/chain/controller.cpp
@@ -1496,13 +1496,14 @@ struct controller_impl {
       auto mark_branch_irreversible = [&, this](auto& forkdb) {
          assert(!irreversible_mode() || forkdb.head());
          const auto& head_id = irreversible_mode() ? forkdb.head()->id() : chain_head.id();
+         const auto head_num = block_header::num_from_id(head_id);
          // verifies lib is on head branch, otherwise returns an empty branch
          //   The new lib needs to be on the head branch because the forkdb.advance_root() below could purge blocks that
          //   would be needed to be re-applied on a fork switch from the exiting chain_head.
          // Pending LIB can be greater than chain head, for example when syncing, in that case fetch branch from the
          // pending LIB. If the pending LIB not found on the head branch then fetch_branch returns an empty branch.
          // Otherwise fetch_branch will return from chain_head to root iff chain_head on pending LIB branch.
-         auto branch = new_lib_num <= chain_head.block_num() ? forkdb.fetch_branch(head_id, new_lib_id) : forkdb.fetch_branch(new_lib_id, head_id);
+         auto branch = new_lib_num <= head_num ? forkdb.fetch_branch(head_id, new_lib_id) : forkdb.fetch_branch(new_lib_id, head_id);
          try {
             auto should_process = [&](auto& bsp) {
                // Only make irreversible blocks that have been validated. Blocks in the fork database may not be on our current best head

From b519e9c6e403762cee6e63a36780cd855b5c71f7 Mon Sep 17 00:00:00 2001
From: Kevin Heifner <heifnerk@objectcomputing.com>
Date: Tue, 5 Nov 2024 15:30:48 -0600
Subject: [PATCH 17/17] Update to appbase main with signal handling off the
 main thread

---
 libraries/appbase | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libraries/appbase b/libraries/appbase
index 5e09479dde..0b2f151bea 160000
--- a/libraries/appbase
+++ b/libraries/appbase
@@ -1 +1 @@
-Subproject commit 5e09479dde4ce251b2dde9f17537ea234e599df6
+Subproject commit 0b2f151beaceb1340e537bb43766428391ceddc7