Skip to content

Commit

Permalink
container_checker on RP should check containers based on asic presence
Browse files Browse the repository at this point in the history
    On Supervisor/RP card, some application containers may not run if
    the asic is not present due to a missing Fabric card. The container checker
    should skip those container instances.
    Container instances which run only if asic present: swss, syncd, lldp,
    teamd
    Exception: All instances of database and bgp containers run irrespective
    of asic presence.

Signed-off-by: anamehra [email protected]
  • Loading branch information
anamehra authored Jul 13, 2022
1 parent 850e456 commit 579ca2b
Showing 1 changed file with 40 additions and 4 deletions.
44 changes: 40 additions & 4 deletions files/image_config/monit/container_checker
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,34 @@ import docker
import sys

import swsssdk
from sonic_py_common import multi_asic, device_info
from sonic_py_common import multi_asic, device_info, daemon_base
from swsscommon import swsscommon

def get_asic_presence_list():
"""
@summary: This function will get the asic presence list. On Supervisor, the list includes only the asics
for inserted and detected fabric cards. For non-supervisor cards, e.g. line card, the list should
contain all supported asics by the card. The function gets the asic list from CHASSIS_ASIC_TABLE from
CHASSIS_STATE_DB. The function assumes that the first N asic ids (asic0 to asic(N-1)) in
CHASSIS_ASIC_TABLE belongs to the supervisor, where N is the max number of asics supported by the Chassis
@return: List of asics present
"""
asics_list = []
if multi_asic.is_multi_asic():
if not device_info.is_supervisor():
# Supervisor has FRU Fabric cards. If not supervisor, all asics
# should be present. Add all asics, 0 - num_asics to the list.
asics_list = list(range(0,multi_asic.get_num_asics()))
else:
# Get asic list from CHASSIS_ASIC_TABLE
chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
asic_table = swsscommon.Table(chassis_state_db, 'CHASSIS_ASIC_TABLE')
if asic_table:
asics_presence_list = list(asic_table.getKeys())
for asic in asics_presence_list:
# asic is asid id: asic0, asic1.... asicN. Get the numeric value.
asics_list.append(int(asic[4:]))
return asics_list

def get_expected_running_containers():
"""
Expand All @@ -41,7 +66,15 @@ def get_expected_running_containers():

expected_running_containers = set()
always_running_containers = set()


# Get current asic presence list. For multi_asic system, multi instance containers
# should be checked only for asics present.
asics_id_presence = get_asic_presence_list()

# Some services, like database and bgp run all the instances irrespective of asic presence.
# Add those to exception list.
run_all_instance_list = ['database', 'bgp']

for container_name in feature_table.keys():
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
if multi_asic.is_multi_asic():
Expand All @@ -50,7 +83,8 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
expected_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
expected_running_containers.add(container_name + str(asic_id))
else:
expected_running_containers.add(container_name)
if feature_table[container_name]["state"] == 'always_enabled':
Expand All @@ -60,9 +94,11 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
always_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
always_running_containers.add(container_name + str(asic_id))
else:
always_running_containers.add(container_name)

if device_info.is_supervisor():
always_running_containers.add("database-chassis")
return expected_running_containers, always_running_containers
Expand Down

0 comments on commit 579ca2b

Please sign in to comment.