Skip to content

Commit

Permalink
Add support for reconciliation after warm restart (#76)
Browse files Browse the repository at this point in the history
Description of PR
Summary:
Fixes # (issue)

This PR is to add support for linkmgrd process reconciliation after warm restart.

sign-off: Jing Zhang [email protected]

Type of change
 Bug fix
 New feature
 Doc/Design
 Unit test
Approach
What is the motivation for this PR?
One step of warm reboot procedure for dual ToR is to config the switch into manual mode. Before warm reboot finalizer executes config save, we want to config the switch back into auto mode, so config_db.json will be consistent before and after the reboot.

How did you do it?
When linkmgrd is initializing, get the systemwide warm reboot flag from WARM_RESTART_ENABLE_TABLE. If flag == true, start a reconciliation timer.
Maintenance a mux port count based on MUX_CABLE|PORTNAME count. When one port completes reconciliation, if warm restart flag == true, config it back into auto mode, reduce reconciliation port count by 1.
If reconciliation timer expires or port count == 0, set state to reconciled in WARM_RESTART_TABLE|linkmgrd.
How did you verify/test it?
Unit tests
Tested on dual ToR testbed. Ports were auto mode after warm restart completed. Entry WARM_RESTART_TABLE|linkmgrd was added as expected.
  • Loading branch information
zjswhhh authored Jul 18, 2022
1 parent 58d8aae commit 9044962
Show file tree
Hide file tree
Showing 13 changed files with 352 additions and 2 deletions.
49 changes: 49 additions & 0 deletions src/DbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,7 @@ void DbInterface::getServerIpAddress(std::shared_ptr<swss::DBConnector> configDb
std::vector<swss::KeyOpFieldsValuesTuple> entries;

configDbMuxCableTable.getContent(entries);
mMuxManagerPtr->updateWarmRestartReconciliationCount(entries.size());
processServerIpAddress(entries);
}

Expand Down Expand Up @@ -817,6 +818,54 @@ void DbInterface::getSoCIpAddress(std::shared_ptr<swss::DBConnector> configDbCon
processSoCIpAddress(entries);
}

// ---> warmRestartReconciliation(const std::string &portName);
//
// port warm restart reconciliation procedure
//
void DbInterface::warmRestartReconciliation(const std::string &portName)
{
MUXLOGDEBUG(portName);

if (isWarmStart()) {
setMuxMode(portName, "auto");
mMuxManagerPtr->updateWarmRestartReconciliationCount(-1);
}
}

//
// ---> setMuxMode
//
// set config db mux mode
//
void DbInterface::setMuxMode(const std::string &portName, const std::string state)
{
MUXLOGDEBUG(portName);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
&DbInterface::handleSetMuxMode,
this,
portName,
state
)));
}

//
// ---> handleSetMuxmode
//
// handle set mux mode
//
void DbInterface::handleSetMuxMode(const std::string &portName, const std::string state)
{
MUXLOGWARNING(boost::format("%s: configuring mux mode to %s after warm restart") % portName % state);

std::shared_ptr<swss::DBConnector> configDbPtr = std::make_shared<swss::DBConnector> ("CONFIG_DB", 0);
std::shared_ptr<swss::Table> configDbMuxCableTablePtr = std::make_shared<swss::Table> (
configDbPtr.get(), CFG_MUX_CABLE_TABLE_NAME
);
configDbMuxCableTablePtr->hset(portName, "state", state);
}

//
// ---> processMuxPortConfigNotifiction(std::deque<swss::KeyOpFieldsValuesTuple> &entries);
//
Expand Down
63 changes: 63 additions & 0 deletions src/DbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "swss/dbconnector.h"
#include "swss/producerstatetable.h"
#include "swss/subscriberstatetable.h"
#include "swss/warm_restart.h"

#include "link_manager/LinkManagerStateMachineActiveStandby.h"
#include "mux_state/MuxState.h"
Expand Down Expand Up @@ -274,6 +275,56 @@ class DbInterface
*/
void stopSwssNotificationPoll() {mPollSwssNotifcation = false;};

/**
* @method setMuxMode
*
* @brief set config db mux mode
*
* @param portName (in) MUX port name
* @param state (in) MUX mode state
*
* @return none
*/
void setMuxMode(const std::string &portName, const std::string state);

/**
* @method warmRestartReconciliation
*
* @brief port warm restart reconciliation procedure
*
* @param portName(in) Mux port name
*
* @return none
*/
void warmRestartReconciliation(const std::string &portName);

/**
* @method isWarmStart
*
* @brief is warm start or not
*
* @return system flag for warm start context
*/
virtual bool isWarmStart(){return swss::WarmStart::isWarmStart();};

/**
* @method getWarmStartTimer
*
* @brief get warm start time out in sec
*
* @return timeout in sec
*/
virtual uint32_t getWarmStartTimer(){return swss::WarmStart::getWarmStartTimer("linkmgrd", "mux");};

/**
* @method setWarmStartStateReconciled
*
* @brief set warm start state reconciled
*
* @return none
*/
virtual void setWarmStartStateReconciled(){swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::RECONCILED);};

private:
friend class test::MuxManagerTest;

Expand Down Expand Up @@ -399,6 +450,18 @@ class DbInterface
const uint64_t expectedPacketCount
);

/**
* @method handleSetMuxMode
*
* @brief handle set mux mode
*
* @param portName (in) MUX port name
* @param state (in) MUX mode state
*
* @return none
*/
virtual void handleSetMuxMode(const std::string &portName, const std::string state);

/**
*@method processTorMacAddress
*
Expand Down
9 changes: 9 additions & 0 deletions src/LinkMgrdMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>

#include "swss/warm_restart.h"

#include "MuxManager.h"
#include "MuxPort.h"
#include "common/MuxConfig.h"
Expand Down Expand Up @@ -123,6 +125,13 @@ int main(int argc, const char* argv[])
// initialize static data
link_prober::IcmpPayload::generateGuid();

// warm restart static
swss::WarmStart::initialize("linkmgrd", "mux");
swss::WarmStart::checkWarmStart("linkmgrd", "mux");
if (swss::WarmStart::isWarmStart()) {
swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::INITIALIZED);
}

std::shared_ptr<mux::MuxManager> muxManagerPtr = std::make_shared<mux::MuxManager> ();
muxManagerPtr->initialize(measureSwitchover, defaultRoute);
muxManagerPtr->run();
Expand Down
70 changes: 69 additions & 1 deletion src/MuxManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ MuxManager::MuxManager() :
mMuxConfig(),
mWork(mIoService),
mSignalSet(boost::asio::signal_set(mIoService, SIGINT, SIGTERM)),
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService))
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService)),
mStrand(mIoService),
mReconciliationTimer(mIoService)
{
mSignalSet.add(SIGUSR1);
mSignalSet.add(SIGUSR2);
Expand Down Expand Up @@ -85,6 +87,11 @@ void MuxManager::initialize(bool enable_feature_measurement, bool enable_feature

mDbInterfacePtr->initialize();

if (mDbInterfacePtr->isWarmStart()) {
MUXLOGINFO("Detected warm restart context, starting reconciliation timer.");
startWarmRestartReconciliationTimer(mDbInterfacePtr->getWarmStartTimer());
}

mMuxConfig.enableSwitchoverMeasurement(enable_feature_measurement);
mMuxConfig.enableDefaultRouteFeature(enable_feature_default_route);
}
Expand Down Expand Up @@ -507,4 +514,65 @@ void MuxManager::generateServerMac(uint16_t serverId, std::array<uint8_t, ETHER_
}
}

// ---> updateWarmRestartReconciliationCount(int increment);
//
// update warm restart reconciliation count
//
void MuxManager::updateWarmRestartReconciliationCount(int increment)
{
MUXLOGDEBUG(increment);

boost::asio::io_service &ioService = mStrand.context();

ioService.post(mStrand.wrap(boost::bind(
&MuxManager::handleUpdateReconciliationCount,
this,
increment
)));
}

// ---> handleUpdateReconciliationCount(int increment);
//
// handler of updating reconciliation port count
//
void MuxManager::handleUpdateReconciliationCount(int increment)
{
MUXLOGDEBUG(mPortReconciliationCount);

mPortReconciliationCount += increment;

if(mPortReconciliationCount == 0) {
mReconciliationTimer.cancel();
}
}

// ---> startWarmRestartReconciliationTimer
//
// start warm restart reconciliation timer
//
void MuxManager::startWarmRestartReconciliationTimer(uint32_t timeout)
{
mReconciliationTimer.expires_from_now(boost::posix_time::seconds(
timeout == 0? mMuxConfig.getMuxReconciliationTimeout_sec():timeout
));
mReconciliationTimer.async_wait(mStrand.wrap(boost::bind(
&MuxManager::handleWarmRestartReconciliationTimeout,
this,
boost::asio::placeholders::error
)));
}

// ---> handleWarmRestartReconciliationTimeout
//
// handle warm restart reconciliationTimeout
//
void MuxManager::handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode)
{
if (errorCode == boost::system::errc::success) {
MUXLOGWARNING("Reconciliation timed out after warm restart, set service to reconciled now.");
}

mDbInterfacePtr->setWarmStartStateReconciled();
}

} /* namespace mux */
47 changes: 47 additions & 0 deletions src/MuxManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,17 @@ class MuxManager
*/
void addOrUpdateDefaultRouteState(bool is_v4, const std::string &routeState);

/**
* @method updateWarmRestartReconciliationCount
*
* @brief update warm restart reconciliation count
*
* @param increment
*
* @return none
*/
void updateWarmRestartReconciliationCount(int increment);

private:
/**
*@method getMuxPortCableType
Expand Down Expand Up @@ -505,6 +516,38 @@ class MuxManager
*/
void setDbInterfacePtr(std::shared_ptr<mux::DbInterface> dbInterfacePtr) {mDbInterfacePtr = dbInterfacePtr;};

private:
/**
* @method startWarmRestartReconciliationTimer
*
* @brief start warm restart reconciliation timer
*
* @return none
*/
void startWarmRestartReconciliationTimer(uint32_t timeout=0);

/**
* @method handleWarmRestartReconciliationTimeout
*
* @brief handle warm restart reconciliationTimeout
*
* @param errorCode (in) Boost error code
*
* @return none
*/
void handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode);

/**
* @method handleUpdateReconciliationCount
*
* @brief handler of updating reconciliation port count
*
* @param increment
*
* @return none
*/
void handleUpdateReconciliationCount(int increment);

private:
common::MuxConfig mMuxConfig;

Expand All @@ -513,6 +556,10 @@ class MuxManager
boost::thread_group mThreadGroup;
boost::asio::signal_set mSignalSet;

boost::asio::io_service::strand mStrand;
boost::asio::deadline_timer mReconciliationTimer;
uint16_t mPortReconciliationCount = 0;

std::shared_ptr<mux::DbInterface> mDbInterfacePtr;

PortMap mPortMap;
Expand Down
12 changes: 12 additions & 0 deletions src/MuxPort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,4 +405,16 @@ void MuxPort::probeMuxState()
}
}

//
// ---> warmRestartReconciliation();
//
// brief port warm restart reconciliation procedure
//
void MuxPort::warmRestartReconciliation()
{
if (mMuxPortConfig.getMode() != common::MuxPortConfig::Mode::Auto) {
mDbInterfacePtr->warmRestartReconciliation(mMuxPortConfig.getPortName());
}
}

} /* namespace mux */
9 changes: 9 additions & 0 deletions src/MuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,15 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*/
void resetPckLossCount();

/**
* @method warmRestartReconciliation
*
* @brief port warm restart reconciliation procedure
*
* @return none
*/
void warmRestartReconciliation();

protected:
friend class test::MuxManagerTest;
friend class test::FakeMuxPort;
Expand Down
11 changes: 11 additions & 0 deletions src/common/MuxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,15 @@ class MuxConfig
*/
inline bool getIfEnableUseTorMac() {return mEnableUseTorMac;};

/**
* @method getMuxReconciliationTimeout
*
* @brief getter of mux reconciliation time out
*
* @return timeout in sec
*/
inline uint32_t getMuxReconciliationTimeout_sec(){return mMuxReconciliationTimeout_sec;};

private:
uint8_t mNumberOfThreads = 5;
uint32_t mTimeoutIpv4_msec = 100;
Expand All @@ -387,6 +396,8 @@ class MuxConfig
bool mEnableSwitchoverMeasurement = false;
uint32_t mDecreasedTimeoutIpv4_msec = 10;

uint32_t mMuxReconciliationTimeout_sec = 10;

bool mEnableDefaultRouteFeature = false;
bool mUseWellKnownMacActiveActive = true;

Expand Down
2 changes: 2 additions & 0 deletions src/link_manager/LinkManagerStateMachineActiveStandby.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ void ActiveStandbyStateMachine::activateStateMachine()
mStartProbingFnPtr();

updateMuxLinkmgrState();

mMuxPortPtr->warmRestartReconciliation();
}
}

Expand Down
Loading

0 comments on commit 9044962

Please sign in to comment.