From 70da202da8fcc36f63c7fb71e682a8192619ebea Mon Sep 17 00:00:00 2001 From: Rares Stanciu Date: Thu, 10 Jun 2021 12:30:56 +0300 Subject: [PATCH] fix: use global keyword when updating global variables --- monitor.py | 82 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/monitor.py b/monitor.py index b4e0e1d..2a56d1b 100644 --- a/monitor.py +++ b/monitor.py @@ -5,34 +5,45 @@ from time import sleep -logging.basicConfig(format='[%(asctime)s] %(levelname)s:%(message)s', level=logging.DEBUG) +logging.basicConfig( + format="[%(asctime)s] %(levelname)s:%(message)s", level=logging.DEBUG +) client = docker.from_env() MAX_RETRY_COUNT = 5 -GRAPHQL_URI="http://node:3085/graphql" +GRAPHQL_URI = "http://node:3085/graphql" INITIAL_STATUS_COUNT = { "SYNCED": 0, "CONNECTING": 0, "OFFLINE": 0, "CATCHUP": 0, - 'BOOTSTRAP': 0 + "BOOTSTRAP": 0, } STATUS_COUNT = INITIAL_STATUS_COUNT OUTOFSYNC_COUNT = 0 + class NodeOutOfSyncException(Exception): """Exception for triggering the node restart.""" + pass + class NodeNotReachableException(Exception): """Exception for waiting the node to be reachable.""" + pass + def check_mina_node_status(): """ Fetch Mina node status using the GraphQL client. """ logging.debug("Fetching node status") + global MAX_RETRY_COUNT + global GRAPHQL_URI + global STATUS_COUNT + retry_count = 0 while retry_count < MAX_RETRY_COUNT: @@ -54,9 +65,14 @@ def check_mina_node_status(): } """ - # Fetch node status using the GraphQL API + # Fetch node status using the GraphQL API try: - r = requests.post(GRAPHQL_URI, json={'query': query}, headers={'Content-Type': 'application/json'}, timeout=60) + r = requests.post( + GRAPHQL_URI, + json={"query": query}, + headers={"Content-Type": "application/json"}, + timeout=60, + ) except requests.exceptions.ConnectionError: # Node is not reachable. # Raise NodeOutOfSyncException in order to skip a few syncs @@ -66,19 +82,21 @@ def check_mina_node_status(): # Check response status if r.status_code == 200: logging.debug("Status fetched successfully") - response = r.json()['data']['daemonStatus'] + response = r.json()["data"]["daemonStatus"] logging.debug(response) # Node sync status - sync_status = response['syncStatus'] + sync_status = response["syncStatus"] # Node uptime (in seconds) - uptime = response['uptimeSecs'] + uptime = response["uptimeSecs"] # Blockchain length - blockchain_length = response['blockchainLength'] + blockchain_length = response["blockchainLength"] # Highest block - highest_block = response['highestBlockLengthReceived'] + highest_block = response["highestBlockLengthReceived"] # Highest unvalidated block - highest_unvalidated_block = response['highestUnvalidatedBlockLengthReceived'] + highest_unvalidated_block = response[ + "highestUnvalidatedBlockLengthReceived" + ] # Compute difference between unvalidated and validated blocks blocks_validated_diff = highest_unvalidated_block - highest_block @@ -86,24 +104,32 @@ def check_mina_node_status(): STATUS_COUNT[sync_status] += 1 logging.debug(STATUS_COUNT) - if STATUS_COUNT['CONNECTING'] > 60: - logging.error("Node has been too long in the CONNECTING state. (more than 5 minutes") + if STATUS_COUNT["CONNECTING"] > 60: + logging.error( + "Node has been too long in the CONNECTING state. (more than 5 minutes" + ) raise NodeOutOfSyncException() - if STATUS_COUNT['CATCHUP'] > 540: - logging.debug("Node has been too long in the CATHUP state (more than 45 minutes).") + if STATUS_COUNT["CATCHUP"] > 540: + logging.debug( + "Node has been too long in the CATHUP state (more than 45 minutes)." + ) raise NodeOutOfSyncException() - if STATUS_COUNT['BOOTSTRAP'] > 240: - logging.error("Node has been too long in the BOOTSTRAP state (more than 20 minutes).") + if STATUS_COUNT["BOOTSTRAP"] > 240: + logging.error( + "Node has been too long in the BOOTSTRAP state (more than 20 minutes)." + ) raise NodeOutOfSyncException() - if sync_status == 'BOOTSTRAP': + if sync_status == "BOOTSTRAP": logging.debug("Node is bootstrapping...") return if blocks_validated_diff > 2: - logging.error("Difference between highest validated block and highest unvalidated block. (delta > 2)") + logging.error( + "Difference between highest validated block and highest unvalidated block. (delta > 2)" + ) raise NodeOutOfSyncException() logging.info("Node is synced.") @@ -116,27 +142,37 @@ def check_mina_node_status(): # Raise NodeOutOfSyncException in order to restart the node raise NodeOutOfSyncException() + def restart_node(): """Restart Mina node""" logging.debug("Restarting node") + global STATUS_COUNT + global INITIAL_STATUS_COUNT + global client + for item in client.containers.list(): - if item.name == 'node' or item.name == 'sidecar': + if item.name == "node" or item.name == "sidecar": item.stop() break STATUS_COUNT = INITIAL_STATUS_COUNT + def start_monitor(): """Main event loop""" logging.info("mina-monitor started") + global OUTOFSYNC_COUNT + while True: try: check_mina_node_status() except NodeOutOfSyncException: OUTOFSYNC_COUNT += 1 - logging.error("Node is out of sync. (OUTOFSYNC_COUNT={})".format(OUTOFSYNC_COUNT)) + logging.error( + "Node is out of sync. (OUTOFSYNC_COUNT={})".format(OUTOFSYNC_COUNT) + ) restart_node() sleep(30) except NodeNotReachableException: @@ -145,6 +181,6 @@ def start_monitor(): finally: sleep(5) -if __name__ == '__main__': - start_monitor() +if __name__ == "__main__": + start_monitor()