Skip to content

Commit

Permalink
hw-mgmt: infra: Extend leakage events and add infrastructure for L1 s…
Browse files Browse the repository at this point in the history
…witch systems

Extend leakage events and rules.
Extend power events.

Signed-off-by: Vadim Pasternak <[email protected]>
  • Loading branch information
vadimp-nvidia committed Jan 17, 2024
1 parent 0889f39 commit 21aab05
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 13 deletions.
6 changes: 4 additions & 2 deletions usr/lib/udev/rules.d/50-hw-management-events.rules
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,10 @@ SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwm
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE7}=="1", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE7 1"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE8}=="0", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE8 0"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE8}=="1", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE8 1"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE}=="0", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE 0"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE}=="1", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE 1"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE1}=="0", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE1 0"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE1}=="1", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE1 1"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE2}=="0", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE2 0"
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{LEAKAGE_ROPE2}=="1", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event LEAKAGE_ROPE2 1"

# External Root of Trust devices events.
SUBSYSTEM=="hwmon", DEVPATH=="/devices/platform/mlxplat/mlxreg-hotplug/hwmon/hwmon*", ENV{EROT1_AP}=="0", ACTION=="change", RUN+="/usr/bin/hw-management-chassis-events.sh hotplug-event EROT1_AP 0"
Expand Down
2 changes: 2 additions & 0 deletions usr/usr/bin/hw-management-helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ system_ver_file=/sys/devices/virtual/dmi/id/product_version
devtree_file=$config_path/devtree
dpu2host_events_file=$config_path/dpu_to_host_events
dpu_events_file=$config_path/dpu_events
power_events_file=$config_path/power_events
i2c_bus_def_off_eeprom_cpu_file=$config_path/i2c_bus_def_off_eeprom_cpu
i2c_comex_mon_bus_default_file=$config_path/i2c_comex_mon_bus_default
l1_switch_health_events=("intrusion" "pwm_pg" "thermal1_pdb" "thermal2_pdb")
Expand All @@ -64,6 +65,7 @@ smart_switch_dpu_events=("pg_1v8" "pg_dvdd" "pg_vdd pg_vddio" "thermal_trip" \
"ufm_upgrade_done" "vdd_cpu_hot_alert" "vddq_hot_alert" \
"pg_comparator" "pg_hvdd pg_vdd_cpu" "pg_vddq" \
"vdd_cpu_alert" "vddq_alert")
l1_power_events=("power_button")
ui_tree_sku=`cat $sku_file`
ui_tree_archive="/etc/hw-management-sensors/ui_tree_$ui_tree_sku.tar.gz"
udev_event_log="/var/log/udev_events.log"
Expand Down
21 changes: 13 additions & 8 deletions usr/usr/bin/hw-management-thermal-events.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ max_pwm=4
max_lcs=8
max_erots=2
max_leakage=8
max_leakage_rope=2
max_health_events=4
max_power_events=1
min_module_gbox_ind=2
Expand Down Expand Up @@ -629,21 +630,23 @@ if [ "$1" == "add" ]; then
fi
done
for ((i=1; i<=max_leakage; i+=1)); do
if [ -f "$3""$4"/leakage$i ]; then
if [ -f "$3""$4"/leakage"$i" ]; then
check_n_link "$3""$4"/leakage$i $system_path/leakage"$i"
event=$(< $system_path/leakage"$i")
if [ "$event" -eq 1 ]; then
echo 1 > $events_path/leakage"$i"
fi
fi
done
if [ -f "$3""$4"/leakage_rope ]; then
check_n_link "$3""$4"/leakage_rope $system_path/leakage_rope
event=$(< $system_path/leakage_rope)
if [ "$event" -eq 1 ]; then
echo 1 > $events_path/leakage_rope
for ((i=1; i<=max_leakage_rope; i+=1)); do
if [ -f "$3""$4"/leakage_rope"$i" ]; then
check_n_link "$3""$4"/leakage_rope"$i" $system_path/leakage_rope"$i"
event=$(< $system_path/leakage_rope"$i")
if [ "$event" -eq 1 ]; then
echo 1 > $events_path/leakage_rope"$i"
fi
fi
fi
done
for ((i=0; i<=max_health_events; i+=1)); do
if [ -f "$3""$4"/${l1_switch_health_events[$i]} ]; then
check_n_link "$3""$4"/${l1_switch_health_events[$i]} $system_path/${l1_switch_health_events[$i]}
Expand Down Expand Up @@ -1237,7 +1240,9 @@ else
for ((i=1; i<=max_leakage; i+=1)); do
check_n_unlink $system_path/leakage"$i"
done
check_n_unlink $system_path/leakage_rope
for ((i=1; i<=max_leakage_rope; i+=1)); do
check_n_unlink $system_path/leakage_rope"$i"
done
if [ -d /sys/module/mlxsw_pci ]; then
exit 0
fi
Expand Down
54 changes: 51 additions & 3 deletions usr/usr/bin/hw-management.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ nv4_pci_id=22a3
nv4_rev_a1_pci_id=22a4
dpu_bf3_pci_id=c2d5
leakage_count=0
leakage_rope_count=0
asic_chipup_retry=2
chipup_log_size=4096
reset_dflt_attr_num=18
Expand Down Expand Up @@ -1602,6 +1603,7 @@ mqm9510_specific()
max_tachos=2
hotplug_fans=2
leakage_count=3
leakage_rope_count=1
echo 4 > $config_path/cpld_num
lm_sensors_config="$lm_sensors_configs_path/mqm9510_sensors.conf"
thermal_control_config="$thermal_control_configs_path/tc_config_not_supported.json"
Expand Down Expand Up @@ -1629,6 +1631,7 @@ mqm9520_specific()
max_tachos=2
hotplug_fans=2
leakage_count=8
leakage_rope_count=1
echo 5 > $config_path/cpld_num
lm_sensors_config="$lm_sensors_configs_path/mqm9520_sensors.conf"
thermal_control_config="$thermal_control_configs_path/tc_config_not_supported.json"
Expand Down Expand Up @@ -2130,6 +2133,38 @@ smart_switch_common()
i2c_bus_def_off_eeprom_cpu=$((smart_switch_cpu_bus_offset+6))
}

p4977_ns_specific()
{
local cpu_bus_offset=18
if [ ! -e "$devtree_file" ]; then
connect_table+=(${p4300_base_connect_table[@]})
add_cpu_board_to_connection_table $cpu_bus_offset
fi
echo 1 > $config_path/global_wp_wait_step
echo 20 > $config_path/global_wp_timeout
echo 2 > $config_path/cpld_num
hotplug_fans=4
leakage_count=4
leakage_rope_count=2
max_tachos=4
hotplug_pwrs=0
hotplug_psus=0
erot_count=1
asic_control=0
health_events_count=2
pwr_events_count=1
i2c_comex_mon_bus_default=23
i2c_bus_def_off_eeprom_cpu=24
lm_sensors_config="$lm_sensors_configs_path/p4300_sensors.conf"
thermal_control_config="$thermal_control_configs_path/tc_config_not_supported.json"
add_i2c_dynamic_bus_dev_connection_table "${p43002_dynamic_i2c_bus_connect_table[@]}"
named_busses+=(${p4300_named_busses[@]})
add_come_named_busses $ndr_cpu_bus_offset
echo -n "${named_busses[@]}" > $config_path/named_busses
echo -n "${l1_power_events[@]}" > "$power_events_file"
echo "$reset_dflt_attr_num" > $config_path/reset_attr_num
}

check_system()
{
# Check ODM
Expand Down Expand Up @@ -2182,6 +2217,9 @@ check_system()
VMOD0019)
smart_switch_common
;;
VMOD0020)
p4977_ns_specific
;;
*)
product=$(< /sys/devices/virtual/dmi/id/product_name)
case $product in
Expand Down Expand Up @@ -2312,15 +2350,25 @@ create_event_files()
fi
if [ $leakage_count -ne 0 ]; then
for ((i=1; i<=leakage_count; i+=1)); do
check_n_init $events_path/leakage$i 0
check_n_init $events_path/leakage"$i" 0
done
fi
if [ $leakage_rope_count -ne 0 ]; then
for ((i=1; i<=leakage_rope_count; i+=1)); do
check_n_init $events_path/leakage_rope"$i" 0
done
check_n_init $events_path/leakage_rope 0
fi
for ((i=0; i<health_events_count; i+=1)); do
check_n_init $events_path/${l1_switch_health_events[$i]}
done
if [ $pwr_events_count -ne 0 ]; then
check_n_init $events_path/power_button 0
if [ -f "$power_events_file" ]; then
for ((i=0; i<=pwr_events_count; i+=1)); do
check_n_init $events_path/${power_events[$i]} 0
done
else
check_n_init $events_path/power_button 0
fi
fi
if [ $dpu_count -ne 0 ]; then
create_hotplug_smart_switch_event_files "$dpu2host_events_file" "$dpu_events_file"
Expand Down

0 comments on commit 21aab05

Please sign in to comment.