From 904496288307a9648c8ead2ca05ede03f4af83a9 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 18 Jul 2022 15:38:04 -0700 Subject: [PATCH] Add support for reconciliation after warm restart (#76) Description of PR Summary: Fixes # (issue) This PR is to add support for linkmgrd process reconciliation after warm restart. sign-off: Jing Zhang zhangjing@microsoft.com Type of change Bug fix New feature Doc/Design Unit test Approach What is the motivation for this PR? One step of warm reboot procedure for dual ToR is to config the switch into manual mode. Before warm reboot finalizer executes config save, we want to config the switch back into auto mode, so config_db.json will be consistent before and after the reboot. How did you do it? When linkmgrd is initializing, get the systemwide warm reboot flag from WARM_RESTART_ENABLE_TABLE. If flag == true, start a reconciliation timer. Maintenance a mux port count based on MUX_CABLE|PORTNAME count. When one port completes reconciliation, if warm restart flag == true, config it back into auto mode, reduce reconciliation port count by 1. If reconciliation timer expires or port count == 0, set state to reconciled in WARM_RESTART_TABLE|linkmgrd. How did you verify/test it? Unit tests Tested on dual ToR testbed. Ports were auto mode after warm restart completed. Entry WARM_RESTART_TABLE|linkmgrd was added as expected. --- src/DbInterface.cpp | 49 +++++++++++++ src/DbInterface.h | 63 +++++++++++++++++ src/LinkMgrdMain.cpp | 9 +++ src/MuxManager.cpp | 70 ++++++++++++++++++- src/MuxManager.h | 47 +++++++++++++ src/MuxPort.cpp | 12 ++++ src/MuxPort.h | 9 +++ src/common/MuxConfig.h | 11 +++ .../LinkManagerStateMachineActiveStandby.cpp | 2 + test/FakeDbInterface.cpp | 20 ++++++ test/FakeDbInterface.h | 9 ++- test/MuxManagerTest.cpp | 50 +++++++++++++ test/MuxManagerTest.h | 3 + 13 files changed, 352 insertions(+), 2 deletions(-) diff --git a/src/DbInterface.cpp b/src/DbInterface.cpp index c10db44e..b1401eb2 100644 --- a/src/DbInterface.cpp +++ b/src/DbInterface.cpp @@ -715,6 +715,7 @@ void DbInterface::getServerIpAddress(std::shared_ptr configDb std::vector entries; configDbMuxCableTable.getContent(entries); + mMuxManagerPtr->updateWarmRestartReconciliationCount(entries.size()); processServerIpAddress(entries); } @@ -817,6 +818,54 @@ void DbInterface::getSoCIpAddress(std::shared_ptr configDbCon processSoCIpAddress(entries); } +// ---> warmRestartReconciliation(const std::string &portName); +// +// port warm restart reconciliation procedure +// +void DbInterface::warmRestartReconciliation(const std::string &portName) +{ + MUXLOGDEBUG(portName); + + if (isWarmStart()) { + setMuxMode(portName, "auto"); + mMuxManagerPtr->updateWarmRestartReconciliationCount(-1); + } +} + +// +// ---> setMuxMode +// +// set config db mux mode +// +void DbInterface::setMuxMode(const std::string &portName, const std::string state) +{ + MUXLOGDEBUG(portName); + + boost::asio::io_service &ioService = mStrand.context(); + ioService.post(mStrand.wrap(boost::bind( + &DbInterface::handleSetMuxMode, + this, + portName, + state + ))); +} + +// +// ---> handleSetMuxmode +// +// handle set mux mode +// +void DbInterface::handleSetMuxMode(const std::string &portName, const std::string state) +{ + MUXLOGWARNING(boost::format("%s: configuring mux mode to %s after warm restart") % portName % state); + + std::shared_ptr configDbPtr = std::make_shared ("CONFIG_DB", 0); + std::shared_ptr configDbMuxCableTablePtr = std::make_shared ( + configDbPtr.get(), CFG_MUX_CABLE_TABLE_NAME + ); + configDbMuxCableTablePtr->hset(portName, "state", state); +} + // // ---> processMuxPortConfigNotifiction(std::deque &entries); // diff --git a/src/DbInterface.h b/src/DbInterface.h index f9b4a613..0a288543 100644 --- a/src/DbInterface.h +++ b/src/DbInterface.h @@ -34,6 +34,7 @@ #include "swss/dbconnector.h" #include "swss/producerstatetable.h" #include "swss/subscriberstatetable.h" +#include "swss/warm_restart.h" #include "link_manager/LinkManagerStateMachineActiveStandby.h" #include "mux_state/MuxState.h" @@ -274,6 +275,56 @@ class DbInterface */ void stopSwssNotificationPoll() {mPollSwssNotifcation = false;}; + /** + * @method setMuxMode + * + * @brief set config db mux mode + * + * @param portName (in) MUX port name + * @param state (in) MUX mode state + * + * @return none + */ + void setMuxMode(const std::string &portName, const std::string state); + + /** + * @method warmRestartReconciliation + * + * @brief port warm restart reconciliation procedure + * + * @param portName(in) Mux port name + * + * @return none + */ + void warmRestartReconciliation(const std::string &portName); + + /** + * @method isWarmStart + * + * @brief is warm start or not + * + * @return system flag for warm start context + */ + virtual bool isWarmStart(){return swss::WarmStart::isWarmStart();}; + + /** + * @method getWarmStartTimer + * + * @brief get warm start time out in sec + * + * @return timeout in sec + */ + virtual uint32_t getWarmStartTimer(){return swss::WarmStart::getWarmStartTimer("linkmgrd", "mux");}; + + /** + * @method setWarmStartStateReconciled + * + * @brief set warm start state reconciled + * + * @return none + */ + virtual void setWarmStartStateReconciled(){swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::RECONCILED);}; + private: friend class test::MuxManagerTest; @@ -399,6 +450,18 @@ class DbInterface const uint64_t expectedPacketCount ); + /** + * @method handleSetMuxMode + * + * @brief handle set mux mode + * + * @param portName (in) MUX port name + * @param state (in) MUX mode state + * + * @return none + */ + virtual void handleSetMuxMode(const std::string &portName, const std::string state); + /** *@method processTorMacAddress * diff --git a/src/LinkMgrdMain.cpp b/src/LinkMgrdMain.cpp index dbf8f429..9d070f2a 100644 --- a/src/LinkMgrdMain.cpp +++ b/src/LinkMgrdMain.cpp @@ -26,6 +26,8 @@ #include #include +#include "swss/warm_restart.h" + #include "MuxManager.h" #include "MuxPort.h" #include "common/MuxConfig.h" @@ -123,6 +125,13 @@ int main(int argc, const char* argv[]) // initialize static data link_prober::IcmpPayload::generateGuid(); + // warm restart static + swss::WarmStart::initialize("linkmgrd", "mux"); + swss::WarmStart::checkWarmStart("linkmgrd", "mux"); + if (swss::WarmStart::isWarmStart()) { + swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::INITIALIZED); + } + std::shared_ptr muxManagerPtr = std::make_shared (); muxManagerPtr->initialize(measureSwitchover, defaultRoute); muxManagerPtr->run(); diff --git a/src/MuxManager.cpp b/src/MuxManager.cpp index 566d7821..11e64b1f 100644 --- a/src/MuxManager.cpp +++ b/src/MuxManager.cpp @@ -43,7 +43,9 @@ MuxManager::MuxManager() : mMuxConfig(), mWork(mIoService), mSignalSet(boost::asio::signal_set(mIoService, SIGINT, SIGTERM)), - mDbInterfacePtr(std::make_shared (this, &mIoService)) + mDbInterfacePtr(std::make_shared (this, &mIoService)), + mStrand(mIoService), + mReconciliationTimer(mIoService) { mSignalSet.add(SIGUSR1); mSignalSet.add(SIGUSR2); @@ -85,6 +87,11 @@ void MuxManager::initialize(bool enable_feature_measurement, bool enable_feature mDbInterfacePtr->initialize(); + if (mDbInterfacePtr->isWarmStart()) { + MUXLOGINFO("Detected warm restart context, starting reconciliation timer."); + startWarmRestartReconciliationTimer(mDbInterfacePtr->getWarmStartTimer()); + } + mMuxConfig.enableSwitchoverMeasurement(enable_feature_measurement); mMuxConfig.enableDefaultRouteFeature(enable_feature_default_route); } @@ -507,4 +514,65 @@ void MuxManager::generateServerMac(uint16_t serverId, std::array updateWarmRestartReconciliationCount(int increment); +// +// update warm restart reconciliation count +// +void MuxManager::updateWarmRestartReconciliationCount(int increment) +{ + MUXLOGDEBUG(increment); + + boost::asio::io_service &ioService = mStrand.context(); + + ioService.post(mStrand.wrap(boost::bind( + &MuxManager::handleUpdateReconciliationCount, + this, + increment + ))); +} + +// ---> handleUpdateReconciliationCount(int increment); +// +// handler of updating reconciliation port count +// +void MuxManager::handleUpdateReconciliationCount(int increment) +{ + MUXLOGDEBUG(mPortReconciliationCount); + + mPortReconciliationCount += increment; + + if(mPortReconciliationCount == 0) { + mReconciliationTimer.cancel(); + } +} + +// ---> startWarmRestartReconciliationTimer +// +// start warm restart reconciliation timer +// +void MuxManager::startWarmRestartReconciliationTimer(uint32_t timeout) +{ + mReconciliationTimer.expires_from_now(boost::posix_time::seconds( + timeout == 0? mMuxConfig.getMuxReconciliationTimeout_sec():timeout + )); + mReconciliationTimer.async_wait(mStrand.wrap(boost::bind( + &MuxManager::handleWarmRestartReconciliationTimeout, + this, + boost::asio::placeholders::error + ))); +} + +// ---> handleWarmRestartReconciliationTimeout +// +// handle warm restart reconciliationTimeout +// +void MuxManager::handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode) +{ + if (errorCode == boost::system::errc::success) { + MUXLOGWARNING("Reconciliation timed out after warm restart, set service to reconciled now."); + } + + mDbInterfacePtr->setWarmStartStateReconciled(); +} + } /* namespace mux */ diff --git a/src/MuxManager.h b/src/MuxManager.h index ae6b4407..59b75165 100644 --- a/src/MuxManager.h +++ b/src/MuxManager.h @@ -441,6 +441,17 @@ class MuxManager */ void addOrUpdateDefaultRouteState(bool is_v4, const std::string &routeState); + /** + * @method updateWarmRestartReconciliationCount + * + * @brief update warm restart reconciliation count + * + * @param increment + * + * @return none + */ + void updateWarmRestartReconciliationCount(int increment); + private: /** *@method getMuxPortCableType @@ -505,6 +516,38 @@ class MuxManager */ void setDbInterfacePtr(std::shared_ptr dbInterfacePtr) {mDbInterfacePtr = dbInterfacePtr;}; +private: + /** + * @method startWarmRestartReconciliationTimer + * + * @brief start warm restart reconciliation timer + * + * @return none + */ + void startWarmRestartReconciliationTimer(uint32_t timeout=0); + + /** + * @method handleWarmRestartReconciliationTimeout + * + * @brief handle warm restart reconciliationTimeout + * + * @param errorCode (in) Boost error code + * + * @return none + */ + void handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode); + + /** + * @method handleUpdateReconciliationCount + * + * @brief handler of updating reconciliation port count + * + * @param increment + * + * @return none + */ + void handleUpdateReconciliationCount(int increment); + private: common::MuxConfig mMuxConfig; @@ -513,6 +556,10 @@ class MuxManager boost::thread_group mThreadGroup; boost::asio::signal_set mSignalSet; + boost::asio::io_service::strand mStrand; + boost::asio::deadline_timer mReconciliationTimer; + uint16_t mPortReconciliationCount = 0; + std::shared_ptr mDbInterfacePtr; PortMap mPortMap; diff --git a/src/MuxPort.cpp b/src/MuxPort.cpp index 9375d170..b6bb8fd8 100644 --- a/src/MuxPort.cpp +++ b/src/MuxPort.cpp @@ -405,4 +405,16 @@ void MuxPort::probeMuxState() } } +// +// ---> warmRestartReconciliation(); +// +// brief port warm restart reconciliation procedure +// +void MuxPort::warmRestartReconciliation() +{ + if (mMuxPortConfig.getMode() != common::MuxPortConfig::Mode::Auto) { + mDbInterfacePtr->warmRestartReconciliation(mMuxPortConfig.getPortName()); + } +} + } /* namespace mux */ diff --git a/src/MuxPort.h b/src/MuxPort.h index 128f7358..02252707 100644 --- a/src/MuxPort.h +++ b/src/MuxPort.h @@ -386,6 +386,15 @@ class MuxPort: public std::enable_shared_from_this */ void resetPckLossCount(); + /** + * @method warmRestartReconciliation + * + * @brief port warm restart reconciliation procedure + * + * @return none + */ + void warmRestartReconciliation(); + protected: friend class test::MuxManagerTest; friend class test::FakeMuxPort; diff --git a/src/common/MuxConfig.h b/src/common/MuxConfig.h index 2ab20232..afd9f1aa 100644 --- a/src/common/MuxConfig.h +++ b/src/common/MuxConfig.h @@ -374,6 +374,15 @@ class MuxConfig */ inline bool getIfEnableUseTorMac() {return mEnableUseTorMac;}; + /** + * @method getMuxReconciliationTimeout + * + * @brief getter of mux reconciliation time out + * + * @return timeout in sec + */ + inline uint32_t getMuxReconciliationTimeout_sec(){return mMuxReconciliationTimeout_sec;}; + private: uint8_t mNumberOfThreads = 5; uint32_t mTimeoutIpv4_msec = 100; @@ -387,6 +396,8 @@ class MuxConfig bool mEnableSwitchoverMeasurement = false; uint32_t mDecreasedTimeoutIpv4_msec = 10; + uint32_t mMuxReconciliationTimeout_sec = 10; + bool mEnableDefaultRouteFeature = false; bool mUseWellKnownMacActiveActive = true; diff --git a/src/link_manager/LinkManagerStateMachineActiveStandby.cpp b/src/link_manager/LinkManagerStateMachineActiveStandby.cpp index 751068d1..aa09bd96 100644 --- a/src/link_manager/LinkManagerStateMachineActiveStandby.cpp +++ b/src/link_manager/LinkManagerStateMachineActiveStandby.cpp @@ -426,6 +426,8 @@ void ActiveStandbyStateMachine::activateStateMachine() mStartProbingFnPtr(); updateMuxLinkmgrState(); + + mMuxPortPtr->warmRestartReconciliation(); } } diff --git a/test/FakeDbInterface.cpp b/test/FakeDbInterface.cpp index a02c4b9c..1b559280 100644 --- a/test/FakeDbInterface.cpp +++ b/test/FakeDbInterface.cpp @@ -97,4 +97,24 @@ void FakeDbInterface::postPckLossRatio( mExpectedPacketCount = expectedPacketCount; } +void FakeDbInterface::handleSetMuxMode(const std::string &portName, const std::string state) +{ + mSetMuxModeInvokeCount += 1; +} + +bool FakeDbInterface::isWarmStart() +{ + return mWarmStartFlag; +} + +uint32_t FakeDbInterface::getWarmStartTimer() +{ + return 0; +} + +void FakeDbInterface::setWarmStartStateReconciled() +{ + mSetWarmStartStateReconciledInvokeCount++; +} + } /* namespace test */ diff --git a/test/FakeDbInterface.h b/test/FakeDbInterface.h index b2381ae0..5b64c1ff 100644 --- a/test/FakeDbInterface.h +++ b/test/FakeDbInterface.h @@ -59,10 +59,14 @@ class FakeDbInterface: public mux::DbInterface const uint64_t unknownEventCount, const uint64_t expectedPacketCount ) override; - + virtual bool isWarmStart() override; + virtual uint32_t getWarmStartTimer() override; + virtual void setWarmStartStateReconciled() override; void setNextMuxState(mux_state::MuxState::Label label) {mNextMuxState = label;}; +private: + virtual void handleSetMuxMode(const std::string &portName, const std::string state) override; public: mux_state::MuxState::Label mNextMuxState; @@ -82,6 +86,9 @@ class FakeDbInterface: public mux::DbInterface uint32_t mPostLinkProberMetricsInvokeCount = 0; uint64_t mUnknownEventCount = 0; uint64_t mExpectedPacketCount = 0; + uint32_t mSetMuxModeInvokeCount = 0; + uint32_t mSetWarmStartStateReconciledInvokeCount = 0; + bool mWarmStartFlag = false; }; } /* namespace test */ diff --git a/test/MuxManagerTest.cpp b/test/MuxManagerTest.cpp index 25e3da3b..aea4d378 100644 --- a/test/MuxManagerTest.cpp +++ b/test/MuxManagerTest.cpp @@ -245,6 +245,26 @@ void MuxManagerTest::updatePortCableType(const std::string &port, const std::str mMuxManagerPtr->updatePortCableType(port, cableType); } +void MuxManagerTest::warmRestartReconciliation(const std::string &portName) +{ + std::shared_ptr muxPortPtr = mMuxManagerPtr->mPortMap[portName]; + + muxPortPtr->warmRestartReconciliation(); +} + +void MuxManagerTest::updatePortReconciliationCount(int increment) +{ + mMuxManagerPtr->updateWarmRestartReconciliationCount(increment); + runIoService(1); +} + +void MuxManagerTest::startWarmRestartReconciliationTimer(uint32_t timeout) +{ + mMuxManagerPtr->startWarmRestartReconciliationTimer( + timeout + ); +} + void MuxManagerTest::initLinkProberActiveActive(std::shared_ptr linkManagerStateMachineActiveActive) { mFakeLinkProber = std::make_shared (linkManagerStateMachineActiveActive->getLinkProberStateMachinePtr().get()); @@ -818,4 +838,34 @@ INSTANTIATE_TEST_CASE_P( ) ); +TEST_F(MuxManagerTest, WarmRestart) +{ + std::string port = "Ethernet0"; + + createPort(port); + + mDbInterfacePtr->mWarmStartFlag = true; + startWarmRestartReconciliationTimer(UINT32_MAX); + updatePortReconciliationCount(1); + warmRestartReconciliation(port); + + runIoService(3); + + EXPECT_EQ(mDbInterfacePtr->mSetMuxModeInvokeCount, 1); + EXPECT_EQ(mDbInterfacePtr->mSetWarmStartStateReconciledInvokeCount, 1); +} + +TEST_F(MuxManagerTest, WarmRestartTimeout) +{ + std::string port = "Ethernet0"; + + createPort(port); + + mDbInterfacePtr->mWarmStartFlag = true; + startWarmRestartReconciliationTimer(mDbInterfacePtr->getWarmStartTimer()); + + runIoService(1); + EXPECT_EQ(mDbInterfacePtr->mSetWarmStartStateReconciledInvokeCount, 1); +} + } /* namespace test */ diff --git a/test/MuxManagerTest.h b/test/MuxManagerTest.h index 038a3512..22024eae 100644 --- a/test/MuxManagerTest.h +++ b/test/MuxManagerTest.h @@ -83,6 +83,9 @@ class MuxManagerTest: public testing::Test void initLinkProberActiveStandby(std::shared_ptr linkManagerStateMachine); void generateServerMac(const std::string &portName, std::array &address); void createPort(std::string port, common::MuxPortConfig::PortCableType portCableType = common::MuxPortConfig::PortCableType::ActiveStandby); + void warmRestartReconciliation(const std::string &portName); + void updatePortReconciliationCount(int increment); + void startWarmRestartReconciliationTimer(uint32_t timeout); public: static const std::string PortName;