Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Auto Techsupport] Event driven Techsupport Changes #15

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
436f4ec
AUTO_TECHUPPORT yang model added
vivekrnv Aug 10, 2021
c88529f
auto TS changes
vivekrnv Aug 11, 2021
ba435fb
Flag added to slave.mk
vivekrnv Aug 11, 2021
9c68bc2
auto_ts model updated
vivekrnv Aug 11, 2021
995034b
Merge branch 'master' of https://github.com/Azure/sonic-buildimage in…
vivekrnv Aug 11, 2021
acca347
init_cfg is modified
vivekrnv Aug 12, 2021
09dd71a
Merge branch 'master' of https://github.com/Azure/sonic-buildimage in…
vivekrnv Aug 12, 2021
f2754d6
YANG model edited
vivekrnv Aug 12, 2021
9128b6e
init_cfg edit
vivekrnv Aug 12, 2021
91a1468
init_cfg corrected
vivekrnv Aug 13, 2021
ea0966e
tests modified
vivekrnv Aug 13, 2021
3531d47
Made the Compile time flag interactive
vivekrnv Aug 13, 2021
2359bb8
exit-listener updated
vivekrnv Aug 16, 2021
b375827
Yang model updated
vivekrnv Aug 17, 2021
750afc9
Table Name Change for proc_exit_event
vivekrnv Aug 20, 2021
8324fc3
Yang Updated for comments
vivekrnv Aug 29, 2021
7479a1f
Merge Conflict Resolved
vivekrnv Aug 29, 2021
e10bf07
init_cfg and YANG updated
vivekrnv Aug 30, 2021
ae855e0
Script updated for backward compatibility
vivekrnv Aug 30, 2021
301fb48
Description tag added to YANG
vivekrnv Aug 30, 2021
9b29fca
Merge branch 'master' of https://github.com/Azure/sonic-buildimage in…
vivekrnv Aug 30, 2021
fc7b979
Indentation changes and minor edits
vivekrnv Aug 31, 2021
6dd95b8
YANG model updated
vivekrnv Aug 31, 2021
d0c70ed
init_cfg edited
vivekrnv Sep 1, 2021
c822b76
init_cfg updated
vivekrnv Sep 1, 2021
8bc38f9
YANG Models Updated
vivekrnv Sep 1, 2021
661065c
Merge branch 'master' into event_driven_ts
vivekrnv Sep 2, 2021
1f957c9
Updated the YANG model
vivekrnv Sep 2, 2021
2b79d52
Merge branch 'event_driven_ts' of https://github.com/vivekreddynv/son…
vivekrnv Sep 2, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile.work
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
# * SONIC_DPKG_CACHE_SOURCE: Debian package cache location when cache enabled for debian packages
# * BUILD_LOG_TIMESTAMP: Set timestamp in the build log (simple/none)
# * DOCKER_EXTRA_OPTS: Extra command line arguments for dockerd running in slave container.
# * ENABLE_AUTO_TECH_SUPPORT: Enable the configuration for event-driven techsupport & coredump mgmt feature
# * Default: y
# * Values: y,n
#
###############################################################################

Expand Down Expand Up @@ -279,6 +282,7 @@ SONIC_BUILD_INSTRUCTION := make \
SONIC_ENABLE_IMAGE_SIGNATURE=$(ENABLE_IMAGE_SIGNATURE) \
ENABLE_HOST_SERVICE_ON_START=$(ENABLE_HOST_SERVICE_ON_START) \
SLAVE_DIR=$(SLAVE_DIR) \
ENABLE_AUTO_TECH_SUPPORT=$(ENABLE_AUTO_TECH_SUPPORT) \
$(SONIC_OVERRIDE_BUILD_VARS)

.PHONY: sonic-slave-build sonic-slave-bash init reset
Expand Down
24 changes: 23 additions & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,30 @@
{%- if feature in ["lldp", "pmon", "radv", "snmp", "telemetry"] %}
"set_owner": "kube", {% else %}
"set_owner": "local", {% endif %} {% endif %}
"high_mem_alert": "disabled"
"high_mem_alert": "disabled",
{%- if enable_auto_tech_support == "y" %}
"auto_techsupport" : "enabled", {% else %}
"auto_techsupport" : "disabled", {% endif %}
"cooloff" : "300"
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
}{% if not loop.last %},{% endif -%}
{% endfor %}
},
"AUTO_TECHSUPPORT" : {
"global": {
{%- if enable_auto_tech_support == "y" %}
"auto_invoke_ts" : "enabled",
"coredump_cleanup" : "enabled",
"techsupport_cleanup" : "enabled",
{% else %}
"auto_invoke_ts" : "disabled",
"coredump_cleanup" : "disabled",
"techsupport_cleanup" : "disabled",
{% endif %}
"cooloff" : "180",
"max_techsupport_size" : "10.0",
"core_usage" : "5.0",
"since" : "2 days ago"
}
}

}
64 changes: 55 additions & 9 deletions files/scripts/supervisor-proc-exit-listener
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ import sys
import syslog
import time
from collections import defaultdict
import subprocess
from swsscommon.swsscommon import DBConnector

import swsssdk

from supervisor import childutils

# Each line of this file should specify either one critical process or one
Expand All @@ -31,6 +32,48 @@ SELECT_TIMEOUT_SECS = 1.0
# Alerting message will be written into syslog in the following interval
ALERTING_INTERVAL_SECS = 60

# Saves the mapping between PID and the command/executable name
PID_COMM_FILE_MAP = dict()
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved


def subprocess_exec(cmd):
output = subprocess.run(
cmd,
capture_output=True,
text=True
)
return output.returncode, output.stdout, output.stderr


def handle_proc_event(dbconn, event, supervisor_proc_name, pid, container_name):
"""
@summary:
1) Set/Del the entry in the AUTO_TECHSUPPORT|FEATURE_PROC_INFO table of STATE DB
2) Update the PID_COMM_FILE_MAP dict
Table Schema:
key = "AUTO_TECHSUPPORT|FEATURE_PROC_INFO"
<feature_name;supervisor_proc_name> = <pid:comm>
"""
try:
if event == "PROCESS_STATE_EXITED":
comm_name = PID_COMM_FILE_MAP.get(pid, "")
if not comm_name:
comm_name = "<unknown>"
# Create an entry in the Table
field = "{};{}".format(container_name, supervisor_proc_name)
value = "{};{}".format(pid, comm_name)
dbconn.hset("AUTO_TECHSUPPORT|FEATURE_PROC_INFO", field, value)
del PID_COMM_FILE_MAP[pid]
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
elif event == "PROCESS_STATE_RUNNING":
rc, stdout, stderr = subprocess_exec(["cat", "/proc/{}/comm".format(pid)])
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
if not rc:
PID_COMM_FILE_MAP[pid] = stdout.strip() # Update the comm<->pid map
# Delete the Event Entry, since the process has restarted
field = "{};{}".format(container_name, supervisor_proc_name)
dbconn.hdel("AUTO_TECHSUPPORT|FEATURE_PROC_INFO", field)
except Exception as e:
syslog.syslog(syslog.LOG_NOTICE, "handle_proc_event method failed, ERR: {}".format(e))
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved


def get_critical_group_and_process_list():
"""
Expand Down Expand Up @@ -124,20 +167,26 @@ def main(argv):
# Transition from ACKNOWLEDGED to READY
childutils.listener.ready()

# STATE_DB dbconnector
dbconn = DBConnector('STATE_DB', 0)
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved

while True:
file_descriptor_list = select.select([sys.stdin], [], [], SELECT_TIMEOUT_SECS)[0]
if len(file_descriptor_list) > 0:
line = file_descriptor_list[0].readline()
headers = childutils.get_headers(line)
payload = sys.stdin.read(int(headers['len']))
payload_headers, payload_data = childutils.eventdata(payload + '\n')
process_name = payload_headers['processname']
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
group_name = payload_headers['groupname']
pid = payload_headers['pid']

if process_name in critical_process_list or group_name in critical_group_list:
handle_proc_event(dbconn, headers['eventname'], process_name, pid, container_name)

# Handle the PROCESS_STATE_EXITED event
if headers['eventname'] == 'PROCESS_STATE_EXITED':
payload_headers, payload_data = childutils.eventdata(payload + '\n')

expected = int(payload_headers['expected'])
process_name = payload_headers['processname']
group_name = payload_headers['groupname']

if (process_name in critical_process_list or group_name in critical_group_list) and expected == 0:
is_auto_restart = get_autorestart_state(container_name)
Expand All @@ -152,9 +201,6 @@ def main(argv):

# Handle the PROCESS_STATE_RUNNING event
elif headers['eventname'] == 'PROCESS_STATE_RUNNING':
payload_headers, payload_data = childutils.eventdata(payload + '\n')
process_name = payload_headers['processname']

if process_name in process_under_alerting:
process_under_alerting.pop(process_name)

Expand All @@ -176,4 +222,4 @@ def main(argv):


if __name__ == "__main__":
main(sys.argv[1:])
main(sys.argv[1:])
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions rules/config
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ INCLUDE_NAT = y
# INCLUDE_DHCP_RELAY - build and install dhcp-relay package
INCLUDE_DHCP_RELAY = y

# ENABLE_AUTO_TECH_SUPPORT - Enable the configuration for event-driven techsupport & coredump mgmt feature
ENABLE_AUTO_TECH_SUPPORT = y

# TELEMETRY_WRITABLE - Enable write/config operations via the gNMI interface.
# Uncomment to enable:
# TELEMETRY_WRITABLE = y
Expand Down
6 changes: 6 additions & 0 deletions slave.mk
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ ifeq ($(SONIC_INCLUDE_MACSEC),y)
INCLUDE_MACSEC = y
endif

ifeq ($(ENABLE_AUTO_TECH_SUPPORT),y)
ENABLE_AUTO_TECH_SUPPORT = y
endif

include $(RULES_PATH)/functions

ifeq ($(SONIC_USE_PDDF_FRAMEWORK),y)
Expand Down Expand Up @@ -269,6 +273,7 @@ $(info "INCLUDE_DHCP_RELAY" : "$(INCLUDE_DHCP_RELAY)")
$(info "INCLUDE_KUBERNETES" : "$(INCLUDE_KUBERNETES)")
$(info "INCLUDE_MACSEC" : "$(INCLUDE_MACSEC)")
$(info "TELEMETRY_WRITABLE" : "$(TELEMETRY_WRITABLE)")
$(info "ENABLE_AUTO_TECH_SUPPORT" : "$(ENABLE_AUTO_TECH_SUPPORT)")
$(info "PDDF_SUPPORT" : "$(PDDF_SUPPORT)")
$(info "MULTIARCH_QEMU_ENVIRON" : "$(MULTIARCH_QEMU_ENVIRON)")
$(info "SONIC_VERSION_CONTROL_COMPONENTS": "$(SONIC_VERSION_CONTROL_COMPONENTS)")
Expand Down Expand Up @@ -935,6 +940,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
export include_restapi="$(INCLUDE_RESTAPI)"
export include_nat="$(INCLUDE_NAT)"
export include_sflow="$(INCLUDE_SFLOW)"
export enable_auto_tech_support="$(ENABLE_AUTO_TECH_SUPPORT)"
export include_macsec="$(INCLUDE_MACSEC)"
export include_mgmt_framework="$(INCLUDE_MGMT_FRAMEWORK)"
export include_iccpd="$(INCLUDE_ICCPD)"
Expand Down
1 change: 1 addition & 0 deletions src/sonic-yang-models/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
version='1.0',
data_files=[
('yang-models', ['./yang-models/sonic-acl.yang',
'./yang-models/sonic-auto_techsupport.yang',
'./yang-models/sonic-bgp-common.yang',
'./yang-models/sonic-bgp-global.yang',
'./yang-models/sonic-bgp-neighbor.yang',
Expand Down
12 changes: 11 additions & 1 deletion src/sonic-yang-models/tests/files/sample_config_db.json
Original file line number Diff line number Diff line change
Expand Up @@ -1041,10 +1041,20 @@
"trap_ids": "ip2me",
"trap_group": "queue1_group1"
}
},
"AUTO_TECHSUPPORT": {
"global": {
"auto_invoke_ts" : "enabled",
"coredump_cleanup" : "enabled",
"techsupport_cleanup" : "enabled",
"cooloff" : "180",
"max_techsupport_size" : "10.0",
"core_usage" : "5.0",
"since" : "2 days ago"
}
}

},

"SAMPLE_CONFIG_DB_UNKNOWN": {
"UNKNOWN_TABLE": {
"Error": "This Table is for testing, This Table does not have YANG models."
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"AUTO_TECHSUPPORT_TEST":{
"desc": "Configure auto techsupport params in AUTO_TECHSUPPORT Table"
},
"AUTO_TECHSUPPORT_WRONG_AUTO_INVOKE_TS_VALUE": {
"desc": "Configure state key with invalid value",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_INVALID_COOLOFF_FORMAT": {
"desc" : "Configure cooloff with a value of invalid format",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"desc" : "Configure a value for core-uage outside the range [0, 100)",
"eStr": "Value \"100.00\" does not satisfy the constraint \"0..99.99\" (range, length, or pattern)."
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
}
}
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"AUTO_TECHSUPPORT_TEST": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"auto_invoke_ts" : "enabled",
"coredump_cleanup" : "enabled",
"techsupport_cleanup" : "enabled",
"cooloff" : "180",
"max_techsupport_size" : "10",
"core_usage" : "5",
"since" : "2 days ago"
}
}
}
},
"AUTO_TECHSUPPORT_WRONG_AUTO_INVOKE_TS_VALUE": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"auto_invoke_ts" : "start"
}
}
}
},
"AUTO_TECHSUPPORT_INVALID_COOLOFF_FORMAT": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"cooloff" : "whatever"
}
}
}
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"core_usage" : "100.00"
}
}
}
},
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"max_techsupport_size" : "11.23"
}
}
}
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"global": {
"max_techsupport_size" : "11.111"
}
}
}
}
}
vivekrnv marked this conversation as resolved.
Show resolved Hide resolved
Loading