Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Portsorch]: Add lag-name and oid map to counter table COUNTERS_PORT_NAME_MAP #510

Closed
wants to merge 10 commits into from
1 change: 1 addition & 0 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dist_swss_DATA = \
pfc_detect_mellanox.lua \
pfc_detect_broadcom.lua \
pfc_detect_barefoot.lua \
pfc_detect_nephos.lua \
pfc_restore.lua \
watermark_queue.lua \
watermark_pg.lua
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const char state_db_key_delimiter = '|';
#define BRCM_PLATFORM_SUBSTRING "broadcom"
#define BFN_PLATFORM_SUBSTRING "barefoot"
#define VS_PLATFORM_SUBSTRING "vs"
#define NPS_PLATFORM_SUBSTRING "nephos"

#define CONFIGDB_KEY_SEPARATOR "|"
#define DEFAULT_KEY_SEPARATOR ":"
Expand Down
3 changes: 2 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ bool OrchDaemon::init()
CFG_PFC_WD_TABLE_NAME
};

if (platform == MLNX_PLATFORM_SUBSTRING)
if (platform == MLNX_PLATFORM_SUBSTRING
|| platform == NPS_PLATFORM_SUBSTRING)
{

static const vector<sai_port_stat_t> portStatIds =
Expand Down
99 changes: 99 additions & 0 deletions orchagent/pfc_detect_nephos.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3])

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE')
if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then
local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')
if detection_time then
detection_time = tonumber(detection_time)
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION'

-- Get all counters
local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')
local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')
local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)
local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key)

if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then
occupancy_bytes = tonumber(occupancy_bytes)
packets = tonumber(packets)
pfc_rx_packets = tonumber(pfc_rx_packets)
pfc_duration = tonumber(pfc_duration)

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_duration_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_duration_last = tonumber(pfc_duration_last)

-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration)
end
end
end
end

return rets

8 changes: 8 additions & 0 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2748,6 +2748,12 @@ bool PortsOrch::addLag(string lag_alias)
lag.m_members = set<string>();
m_portList[lag_alias] = lag;


/* Add lag name map to counter table */
FieldValueTuple tuple(lag_alias, sai_serialize_object_id(lag_id));
vector<FieldValueTuple> fields;
fields.push_back(tuple);
m_counterTable->set("", fields);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need the remove part as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The modification has been uploaded, please take a moment to review it. I want to continue uploading code here(sonic-swss), thanks.

PortUpdate update = { lag, true };
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));

Expand Down Expand Up @@ -2779,6 +2785,8 @@ bool PortsOrch::removeLag(Port lag)

SWSS_LOG_NOTICE("Remove LAG %s lid:%lx", lag.m_alias.c_str(), lag.m_lag_id);

m_counterTable->del(lag.m_alias);

m_portList.erase(lag.m_alias);

PortUpdate update = { lag, false };
Expand Down