Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hw-mgmt: services: fix fast sysfs monitor service #1624

Merged
merged 1 commit into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Options:
start Start hw-mngmt-fast-sysfs-monitor.
stop Stop hw-mngmt-fast-sysfs-monitor.
restart
force-reload Performs hw-mngmt-fast-sysfs-monitor 'stop' and the 'start.
force-reload Performs hw-mngmt-fast-sysfs-monitor 'stop' and the 'start.
"

$EXECUTABLE $ACTION
2 changes: 1 addition & 1 deletion debian/hw-management.hw-management-sysfs-monitor.init
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Options:
start Start hw-mngmt-sysfs-monitor.
stop Stop hw-mngmt-sysfs-monitor.
restart
force-reload Performs hw-mngmt-sysfs-monitor 'stop' and the 'start.
force-reload Performs hw-mngmt-sysfs-monitor 'stop' and the 'start.
"

$EXECUTABLE $ACTION
2 changes: 1 addition & 1 deletion usr/lib/udev/rules.d/49-hw-management-fast-events.rules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

###########################################################################
# Copyright (c) 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
Expand Down
66 changes: 44 additions & 22 deletions usr/usr/bin/hw-management-fast-sysfs-monitor.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
##################################################################################
# Copyright (c) 2020 - 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -41,49 +41,71 @@ Usage: $(basename "$0") [Options]

Options:
start Start hw-mngmt-fast-sysfs-monitor.
stop Stop hw-mngmt-fast-sysfs-monitor.
stop Stop hw-mngmt-fast-sysfs-monitor.
restart
force-reload Performs hw-mngmt-fast-sysfs-monitor 'stop' and the 'start.
force-reload Performs hw-mngmt-fast-sysfs-monitor 'stop' and the 'start.
"

do_start_fast_sysfs_monitor()
{
log_info "Starting hw-mngmt-fast-sysfs-monitor logic."
# Extract file paths from JSON manually (removes brackets, quotes, and spaces)
# Extract file paths from JSON manually (removes brackets, quotes, and spaces).
FILES=($(grep -o '"[^"]*"' "$FAST_SYSFS_MONITOR_LABELS_JSON" | tr -d '"' ))
# Extract the last element (filename) from each JSON path.
DEV_FILES=($(for file in "${FILES[@]}"; do basename "$file"; done))
# Get the total number of files to check.
TOTAL_FILES=${#FILES[@]}
declare -A FOUND_FILES
declare -A DEVICE_ADDED # Track added devices per file name.
ELAPSED=0
log_info "Monitoring ${TOTAL_FILES} files..."
# Loop until all files exist or timeout is reached.
while (( $(echo "$ELAPSED < $FAST_SYSFS_MONITOR_TIMEOUT" | bc -l) )); do
for FILE in "${FILES[@]}"; do
if [[ -f "$FILE" ]]; then
FOUND_FILES["$FILE"]=1
while (( ELAPSED < FAST_SYSFS_MONITOR_TIMEOUT )); do
# Check and add missing devices from devtree_file.
if [ -e "$devtree_file" ] && [[ ${#DEVICE_ADDED[@]} -lt ${#DEV_FILES[@]} ]]; then
# Read the entire content into an array (space-separated tokens).
read -ra DEVTREE_ENTRIES < "$devtree_file"
# Process every 4 tokens as one device entry
for ((i = 0; i < ${#DEVTREE_ENTRIES[@]}; i += 4)); do
driver_name=${DEVTREE_ENTRIES[i]}
address=${DEVTREE_ENTRIES[i+1]}
bus=${DEVTREE_ENTRIES[i+2]}
file_name=${DEVTREE_ENTRIES[i+3]}
# Check if file_name is in monitored devices and also hasn't been added yet.
if [[ " ${DEV_FILES[@]} " =~ " $file_name " && -z "${DEVICE_ADDED[$file_name]}" ]]; then
log_info "Adding device: $driver_name $address $bus $file_name"
echo "$driver_name $address" > "/sys/bus/i2c/devices/i2c-$bus/new_device"
sleep 1 # Let the filesystem relax.
DEVICE_ADDED[$file_name]=1
fi
done
# Check if all files are found.
if [[ ${#FOUND_FILES[@]} -eq $TOTAL_FILES ]]; then
log_info "All fast sysfs lables exist. Done."
# Get the current time with milliseconds.
local current_time=$(awk '{print int($1 * 1000)}' /proc/uptime)
# Write the current time into the fast sysfs ready file.
echo "$current_time" > "$FAST_SYSFS_MONITOR_RDY_FILE"
exit 0
fi
# Check monitored files.
for FILE in "${FILES[@]}"; do
if [[ -f "$FILE" ]]; then
FOUND_FILES["$FILE"]=1
fi
# Wait and increment elapsed time
sleep "$FAST_SYSFS_MONITOR_INTERVAL"
ELAPSED=$(echo "$ELAPSED + $FAST_SYSFS_MONITOR_INTERVAL" | bc)
done

# Exit if all monitored files exist.
if [[ ${#FOUND_FILES[@]} -eq $TOTAL_FILES ]]; then
log_info "All fast sysfs labels exist. Done."
# Get the current time in milliseconds.
local current_time=$(awk '{print int($1 * 1000)}' /proc/uptime)
# Write the current time into the fast sysfs ready file.
echo "$current_time" > "$FAST_SYSFS_MONITOR_RDY_FILE"
exit 0
fi
# Sleep for the defined interval.
sleep "$FAST_SYSFS_MONITOR_INTERVAL"
# Increment elapsed time.
(( ELAPSED += FAST_SYSFS_MONITOR_INTERVAL ))
done
log_info "Timeout reached. Not all files were found."
exit 1
}

do_stop_fast_sysfs_monitor()
{
# Remove older WD process if it exists.
# Remove older fast-sysfs-monitor process if it exists.
if [ -f "$FAST_SYSFS_MONITOR_PID_FILE" ]; then
local FAST_MONITOR_PID
FAST_MONITOR_PID=$(cat "$FAST_SYSFS_MONITOR_PID_FILE")
Expand Down
2 changes: 1 addition & 1 deletion usr/usr/bin/hw-management-helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ SYSFS_MONITOR_RESET_FILE_B="/tmp/sysfs_monitor_time_b"
SYSFS_MONITOR_PID_FILE="/tmp/sysfs_monitor.pid"

# hw-mngmt-fast-sysfs-monitor GLOBALS
FAST_SYSFS_MONITOR_INTERVAL=0.5 # 500 milliseconds
FAST_SYSFS_MONITOR_INTERVAL=1 # 1 seconds
FAST_SYSFS_MONITOR_TIMEOUT=120 # 2 minutes
FAST_SYSFS_MONITOR_LABELS_JSON="/etc/hw-management-fast-sysfs-monitor/fast_sysfs_labels.json"
FAST_SYSFS_MONITOR_PID_FILE="/tmp/fast_sysfs_monitor.pid"
Expand Down
8 changes: 4 additions & 4 deletions usr/usr/bin/hw-management-sysfs-monitor.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
##################################################################################
# Copyright (c) 2020 - 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -41,9 +41,9 @@ Usage: $(basename "$0") [Options]

Options:
start Start hw-mngmt-sysfs-monitor.
stop Stop hw-mngmt-sysfs-monitor.
stop Stop hw-mngmt-sysfs-monitor.
restart
force-reload Performs hw-mngmt-sysfs-monitor 'stop' and the 'start.
force-reload Performs hw-mngmt-sysfs-monitor 'stop' and the 'start.
"

do_start_sysfs_monitor()
Expand Down Expand Up @@ -85,7 +85,7 @@ do_start_sysfs_monitor()

do_stop_sysfs_monitor()
{
# Remove older WD process if it exists.
# Remove older sysfs-monitor process if it exists.
if [ -f "$SYSFS_MONITOR_PID_FILE" ]; then
local MONITOR_PID
MONITOR_PID=$(cat "$SYSFS_MONITOR_PID_FILE")
Expand Down