From 96bd7a5c4a286ba96f9083c6a70e1e8a9ba32e7b Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 21:50:20 +0200 Subject: [PATCH 1/9] Adds a timeout to extrinsic submissions with handling for polling the results. --- bittensor/core/extrinsics/utils.py | 64 ++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index 6c896372b6..a9478962d8 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -1,21 +1,34 @@ """Module with helper functions for extrinsics.""" -from typing import TYPE_CHECKING -from substrateinterface.exceptions import SubstrateRequestException +import signal +import time +from typing import TYPE_CHECKING, Optional + +from substrateinterface.exceptions import SubstrateRequestException, ExtrinsicNotFound + from bittensor.utils.btlogging import logging from bittensor.utils import format_error_message if TYPE_CHECKING: - from substrateinterface import SubstrateInterface + from substrateinterface import SubstrateInterface, ExtrinsicReceipt from scalecodec.types import GenericExtrinsic +class _SignalTimeoutException(Exception): + """ + Exception raised for timeout. Different than TimeoutException because this also triggers + a websocket failure. This exception should only be used with `signal.alarm`. + """ + + pass + + def submit_extrinsic( substrate: "SubstrateInterface", extrinsic: "GenericExtrinsic", wait_for_inclusion: bool, wait_for_finalization: bool, -): +) -> Optional["ExtrinsicReceipt"]: """ Submits an extrinsic to the substrate blockchain and handles potential exceptions. @@ -35,15 +48,58 @@ def submit_extrinsic( Raises: SubstrateRequestException: If the submission of the extrinsic fails, the error is logged and re-raised. """ + extrinsic_hash = extrinsic.extrinsic_hash + starting_block = substrate.get_block() + + def _handler(signum, frame): + """ + Timeout handler for signal. Will raise a TimeoutError if timeout is exceeded. + """ + logging.error("Timed out waiting for extrinsic submission.") + raise _SignalTimeoutException + try: + # sets a timeout timer for the next call to 20 seconds + # will raise a _SignalTimeoutException if it reaches this point + signal.signal(signal.SIGALRM, _handler) + signal.alarm(120) # two minute timeout + response = substrate.submit_extrinsic( extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, ) + signal.alarm(0) # remove timeout timer except SubstrateRequestException as e: logging.error(format_error_message(e.args[0], substrate=substrate)) # Re-rise the exception for retrying of the extrinsic call. If we remove the retry logic, the raise will need # to be removed. + signal.alarm(0) # remove timeout timer raise + + except _SignalTimeoutException: + after_timeout_block = substrate.get_block() + if ( + after_timeout_block["header"]["number"] + == starting_block["header"]["number"] + ): + # if we immediately reconnect (unlikely), we will wait for one full block to check + time.sleep(12) + after_timeout_block = substrate.get_block() + + response = None + for block_num in range( + starting_block["header"]["number"], + after_timeout_block["header"]["number"] + 1, + ): + block_hash = substrate.get_block_hash(block_num) + try: + response = substrate.retrieve_extrinsic_by_hash( + block_hash, f"0x{extrinsic_hash.hex()}" + ) + except ExtrinsicNotFound: + continue + if response: + break + return response From f7796e68a6ef91d894946abd57719397572de761 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 21:54:05 +0200 Subject: [PATCH 2/9] Typo --- bittensor/core/extrinsics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index a9478962d8..b21be50af7 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -59,7 +59,7 @@ def _handler(signum, frame): raise _SignalTimeoutException try: - # sets a timeout timer for the next call to 20 seconds + # sets a timeout timer for the next call to 120 seconds # will raise a _SignalTimeoutException if it reaches this point signal.signal(signal.SIGALRM, _handler) signal.alarm(120) # two minute timeout From 4546ccc559feb19e14a45fd19336ede26a0a4eb1 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 22:23:25 +0200 Subject: [PATCH 3/9] Changed all sync extrinsics to use the submit_extrinsics util --- bittensor/core/extrinsics/registration.py | 15 +++++++-------- bittensor/core/extrinsics/root.py | 7 +++++-- bittensor/core/extrinsics/serving.py | 3 ++- bittensor/core/extrinsics/utils.py | 1 + 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/bittensor/core/extrinsics/registration.py b/bittensor/core/extrinsics/registration.py index fdb67619e6..e9836938d8 100644 --- a/bittensor/core/extrinsics/registration.py +++ b/bittensor/core/extrinsics/registration.py @@ -13,11 +13,8 @@ from bittensor.utils import format_error_message, unlock_key from bittensor.utils.btlogging import logging from bittensor.utils.networking import ensure_connected -from bittensor.utils.registration import ( - create_pow, - torch, - log_no_torch_error, -) +from bittensor.utils.registration import create_pow, torch, log_no_torch_error +from bittensor.core.extrinsics.utils import submit_extrinsic # For annotation and lazy import purposes if TYPE_CHECKING: @@ -68,8 +65,9 @@ def _do_pow_register( }, ) extrinsic = self.substrate.create_signed_extrinsic(call=call, keypair=wallet.hotkey) - response = self.substrate.submit_extrinsic( - extrinsic, + response = submit_extrinsic( + substrate=self.substrate, + extrinsic=extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, ) @@ -298,7 +296,8 @@ def _do_burned_register( extrinsic = self.substrate.create_signed_extrinsic( call=call, keypair=wallet.coldkey ) - response = self.substrate.submit_extrinsic( + response = submit_extrinsic( + self.substrate, extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, diff --git a/bittensor/core/extrinsics/root.py b/bittensor/core/extrinsics/root.py index a90c1b94d6..a9ad3a00fa 100644 --- a/bittensor/core/extrinsics/root.py +++ b/bittensor/core/extrinsics/root.py @@ -5,6 +5,7 @@ from numpy.typing import NDArray from bittensor.core.settings import version_as_int +from bittensor.core.extrinsics.utils import submit_extrinsic from bittensor.utils import format_error_message, weight_utils, unlock_key from bittensor.utils.btlogging import logging from bittensor.utils.networking import ensure_connected @@ -31,7 +32,8 @@ def _do_root_register( extrinsic = self.substrate.create_signed_extrinsic( call=call, keypair=wallet.coldkey ) - response = self.substrate.submit_extrinsic( + response = submit_extrinsic( + self.substrate, extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, @@ -155,7 +157,8 @@ def _do_set_root_weights( keypair=wallet.coldkey, era={"period": 5}, ) - response = self.substrate.submit_extrinsic( + response = submit_extrinsic( + self.substrate, extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, diff --git a/bittensor/core/extrinsics/serving.py b/bittensor/core/extrinsics/serving.py index 864726b97c..1c26b87e76 100644 --- a/bittensor/core/extrinsics/serving.py +++ b/bittensor/core/extrinsics/serving.py @@ -288,7 +288,8 @@ def publish_metadata( ) extrinsic = substrate.create_signed_extrinsic(call=call, keypair=wallet.hotkey) - response = substrate.submit_extrinsic( + response = submit_extrinsic( + substrate, extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index b21be50af7..4d3e5ff7c1 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -61,6 +61,7 @@ def _handler(signum, frame): try: # sets a timeout timer for the next call to 120 seconds # will raise a _SignalTimeoutException if it reaches this point + logging.warning("OKAY BUDDY RETARD") signal.signal(signal.SIGALRM, _handler) signal.alarm(120) # two minute timeout From b2d0e3782d556a8edcf82476ee5f755cadd498fa Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 23:20:16 +0200 Subject: [PATCH 4/9] Removed typo --- bittensor/core/extrinsics/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index 4d3e5ff7c1..b21be50af7 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -61,7 +61,6 @@ def _handler(signum, frame): try: # sets a timeout timer for the next call to 120 seconds # will raise a _SignalTimeoutException if it reaches this point - logging.warning("OKAY BUDDY RETARD") signal.signal(signal.SIGALRM, _handler) signal.alarm(120) # two minute timeout From dc6ea3a9b2fcbe96526c891e55a0c1c9ed5031c8 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 23:32:50 +0200 Subject: [PATCH 5/9] Edge case --- bittensor/core/extrinsics/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index b21be50af7..0cdf817ae2 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -2,7 +2,7 @@ import signal import time -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from substrateinterface.exceptions import SubstrateRequestException, ExtrinsicNotFound @@ -28,7 +28,7 @@ def submit_extrinsic( extrinsic: "GenericExtrinsic", wait_for_inclusion: bool, wait_for_finalization: bool, -) -> Optional["ExtrinsicReceipt"]: +) -> "ExtrinsicReceipt": """ Submits an extrinsic to the substrate blockchain and handles potential exceptions. @@ -102,4 +102,8 @@ def _handler(signum, frame): if response: break + if response is None: + logging.error("Extrinsic not submitted.") + raise SubstrateRequestException + return response From ce3a03287e21e2de791c6ff3e4a9225865799f5d Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 23:41:19 +0200 Subject: [PATCH 6/9] Removed testing stuff. --- bittensor/core/extrinsics/utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index 0cdf817ae2..d656df1278 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -79,13 +79,6 @@ def _handler(signum, frame): except _SignalTimeoutException: after_timeout_block = substrate.get_block() - if ( - after_timeout_block["header"]["number"] - == starting_block["header"]["number"] - ): - # if we immediately reconnect (unlikely), we will wait for one full block to check - time.sleep(12) - after_timeout_block = substrate.get_block() response = None for block_num in range( From fe01bbf45bd34152d6e057189f87f948d7700f92 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 20 Nov 2024 23:47:29 +0200 Subject: [PATCH 7/9] import cleanup --- bittensor/core/extrinsics/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index d656df1278..a935e61eea 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -1,7 +1,6 @@ """Module with helper functions for extrinsics.""" import signal -import time from typing import TYPE_CHECKING from substrateinterface.exceptions import SubstrateRequestException, ExtrinsicNotFound From 0346f1b3fa94c9b6a31ec234955dbaf698e414df Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Thu, 21 Nov 2024 17:28:42 +0200 Subject: [PATCH 8/9] Expanded extrinsic timeout to 200 seconds. Added better logging including extrinsic hash and block submission. --- bittensor/core/extrinsics/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index a935e61eea..9235a571b6 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -58,10 +58,10 @@ def _handler(signum, frame): raise _SignalTimeoutException try: - # sets a timeout timer for the next call to 120 seconds + # sets a timeout timer for the next call to 200 seconds # will raise a _SignalTimeoutException if it reaches this point signal.signal(signal.SIGALRM, _handler) - signal.alarm(120) # two minute timeout + signal.alarm(200) # two minute timeout response = substrate.submit_extrinsic( extrinsic, @@ -95,7 +95,10 @@ def _handler(signum, frame): break if response is None: - logging.error("Extrinsic not submitted.") + logging.error( + f"Extrinsic '0x{extrinsic_hash.hex()}' not submitted. " + f"Initially attempted to submit at block {starting_block['header']['number']}." + ) raise SubstrateRequestException return response From 5a69f4f965a95c14f3bcf5321fd33b0fc0d74a57 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Fri, 22 Nov 2024 11:52:01 +0200 Subject: [PATCH 9/9] Changed signal to threading because signal only works in the main thread, and our multithreaded e2e tests don't work well with it. --- bittensor/core/extrinsics/utils.py | 30 ++++++++++++------------- tests/e2e_tests/utils/e2e_test_utils.py | 1 + 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/bittensor/core/extrinsics/utils.py b/bittensor/core/extrinsics/utils.py index 9235a571b6..6af14b25fb 100644 --- a/bittensor/core/extrinsics/utils.py +++ b/bittensor/core/extrinsics/utils.py @@ -1,6 +1,6 @@ """Module with helper functions for extrinsics.""" -import signal +import threading from typing import TYPE_CHECKING from substrateinterface.exceptions import SubstrateRequestException, ExtrinsicNotFound @@ -13,10 +13,10 @@ from scalecodec.types import GenericExtrinsic -class _SignalTimeoutException(Exception): +class _ThreadingTimeoutException(Exception): """ - Exception raised for timeout. Different than TimeoutException because this also triggers - a websocket failure. This exception should only be used with `signal.alarm`. + Exception raised for timeout. Different from TimeoutException because this also triggers + a websocket failure. This exception should only be used with `threading` timer.. """ pass @@ -50,33 +50,31 @@ def submit_extrinsic( extrinsic_hash = extrinsic.extrinsic_hash starting_block = substrate.get_block() - def _handler(signum, frame): + def _handler(): """ - Timeout handler for signal. Will raise a TimeoutError if timeout is exceeded. + Timeout handler for threading. Will raise a TimeoutError if timeout is exceeded. """ logging.error("Timed out waiting for extrinsic submission.") - raise _SignalTimeoutException + raise _ThreadingTimeoutException - try: - # sets a timeout timer for the next call to 200 seconds - # will raise a _SignalTimeoutException if it reaches this point - signal.signal(signal.SIGALRM, _handler) - signal.alarm(200) # two minute timeout + # sets a timeout timer for the next call to 200 seconds + # will raise a _ThreadingTimeoutException if it reaches this point + timer = threading.Timer(200, _handler) + try: + timer.start() response = substrate.submit_extrinsic( extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=wait_for_finalization, ) - signal.alarm(0) # remove timeout timer except SubstrateRequestException as e: logging.error(format_error_message(e.args[0], substrate=substrate)) # Re-rise the exception for retrying of the extrinsic call. If we remove the retry logic, the raise will need # to be removed. - signal.alarm(0) # remove timeout timer raise - except _SignalTimeoutException: + except _ThreadingTimeoutException: after_timeout_block = substrate.get_block() response = None @@ -93,6 +91,8 @@ def _handler(signum, frame): continue if response: break + finally: + timer.cancel() if response is None: logging.error( diff --git a/tests/e2e_tests/utils/e2e_test_utils.py b/tests/e2e_tests/utils/e2e_test_utils.py index 2568908bea..7aac17a016 100644 --- a/tests/e2e_tests/utils/e2e_test_utils.py +++ b/tests/e2e_tests/utils/e2e_test_utils.py @@ -50,6 +50,7 @@ def clone_or_update_templates(specific_commit=None): os.chdir(install_dir) for repo, git_link in repo_mapping.items(): + print(os.path.abspath(repo)) if not os.path.exists(repo): print(f"\033[94mCloning {repo}...\033[0m") subprocess.run(["git", "clone", git_link, repo], check=True)