diff --git a/fpmsyncd/Makefile.am b/fpmsyncd/Makefile.am index bae2fd73a7..3727f531b8 100644 --- a/fpmsyncd/Makefile.am +++ b/fpmsyncd/Makefile.am @@ -1,4 +1,4 @@ -INCLUDES = -I $(top_srcdir) -I $(FPM_PATH) +INCLUDES = -I $(top_srcdir) -I $(top_srcdir)/warmrestart -I $(FPM_PATH) bin_PROGRAMS = fpmsyncd @@ -8,9 +8,8 @@ else DBGFLAGS = -g endif -fpmsyncd_SOURCES = fpmsyncd.cpp fpmlink.cpp routesync.cpp +fpmsyncd_SOURCES = fpmsyncd.cpp fpmlink.cpp routesync.cpp $(top_srcdir)/warmrestart/warmRestartHelper.cpp $(top_srcdir)/warmrestart/warmRestartHelper.h $(top_srcdir)/warmrestart/warm_restart.cpp $(top_srcdir)/warmrestart/warm_restart.h fpmsyncd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) fpmsyncd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) fpmsyncd_LDADD = -lnl-3 -lnl-route-3 -lswsscommon - diff --git a/fpmsyncd/fpmsyncd.cpp b/fpmsyncd/fpmsyncd.cpp index d5a54f8fee..8083b27273 100644 --- a/fpmsyncd/fpmsyncd.cpp +++ b/fpmsyncd/fpmsyncd.cpp @@ -1,13 +1,24 @@ #include #include "logger.h" #include "select.h" +#include "selectabletimer.h" #include "netdispatcher.h" +#include "warmRestartHelper.h" #include "fpmsyncd/fpmlink.h" #include "fpmsyncd/routesync.h" + using namespace std; using namespace swss; + +/* + * Default warm-restart timer interval for routing-stack app. To be used only if + * no explicit value has been defined in configuration. + */ +const uint32_t DEFAULT_ROUTING_RESTART_INTERVAL = 120; + + int main(int argc, char **argv) { swss::Logger::linkToDbNative("fpmsyncd"); @@ -18,25 +29,70 @@ int main(int argc, char **argv) NetDispatcher::getInstance().registerMessageHandler(RTM_NEWROUTE, &sync); NetDispatcher::getInstance().registerMessageHandler(RTM_DELROUTE, &sync); - while (1) + while (true) { try { FpmLink fpm; Select s; - cout << "Waiting for connection..." << endl; + SelectableTimer warmStartTimer(timespec{0, 0}); + + cout << "Waiting for fpm-client connection..." << endl; fpm.accept(); cout << "Connected!" << endl; s.addSelectable(&fpm); + + /* Initialize warm-restart logic if this one is enabled */ + bool warmStartEnabled = sync.m_warmStartHelper.isEnabled(); + if (warmStartEnabled) + { + /* Obtain warm-restart timer defined for routing application */ + uint32_t warmRestartIval = sync.m_warmStartHelper.getRestartTimer(); + if (!warmRestartIval) + { + warmStartTimer.setInterval(timespec{DEFAULT_ROUTING_RESTART_INTERVAL, 0}); + } + else + { + warmStartTimer.setInterval(timespec{warmRestartIval, 0}); + } + + /* Execute recovery instruction and kick off warm-restart timer */ + if (sync.m_warmStartHelper.runRecovery()) + { + warmStartTimer.start(); + s.addSelectable(&warmStartTimer); + } + } + while (true) { Selectable *temps; - /* Reading FPM messages forever (and calling "readData" to read them) */ + + /* Reading FPM messages forever (and calling "readMe" to read them) */ s.select(&temps); - pipeline.flush(); - SWSS_LOG_DEBUG("Pipeline flushed"); + + /* + * Upon expiration of the warm-restart timer, proceed to run the + * reconciliation process and remove warm-restart timer from + * select() loop. + */ + if (warmStartEnabled && temps == &warmStartTimer) + { + SWSS_LOG_NOTICE("Warm-Restart timer expired."); + sync.m_warmStartHelper.reconciliate(); + s.removeSelectable(&warmStartTimer); + + pipeline.flush(); + SWSS_LOG_NOTICE("Pipeline flushed"); + } + else if (!warmStartEnabled || sync.m_warmStartHelper.isReconciled()) + { + pipeline.flush(); + SWSS_LOG_NOTICE("Pipeline flushed"); + } } } catch (FpmLink::FpmConnectionClosedException &e) diff --git a/fpmsyncd/routesync.cpp b/fpmsyncd/routesync.cpp index 488410c9e3..b4fef3f783 100644 --- a/fpmsyncd/routesync.cpp +++ b/fpmsyncd/routesync.cpp @@ -14,7 +14,8 @@ using namespace std; using namespace swss; RouteSync::RouteSync(RedisPipeline *pipeline) : - m_routeTable(pipeline, APP_ROUTE_TABLE_NAME, true) + m_routeTable(pipeline, APP_ROUTE_TABLE_NAME, true), + m_warmStartHelper(pipeline, &m_routeTable, "bgp", "bgp") { m_nl_sock = nl_socket_alloc(); nl_connect(m_nl_sock, NETLINK_ROUTE); @@ -38,10 +39,30 @@ void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj) return; } + /* + * Upon arrival of a delete msg we could either push the change right away, + * or we could opt to defer it if we are in the middle of a warm-reboot + * process. The goal here is to avoid unnecessary churn in swss/syncd layers. + */ + auto warmState = m_warmStartHelper.getState(); + if (nlmsg_type == RTM_DELROUTE) { - m_routeTable.del(destipprefix); - return; + if (warmState == WarmStart::INITIALIZED || + warmState == WarmStart::RECONCILED) + { + m_routeTable.del(destipprefix); + return; + } + else + { + SWSS_LOG_INFO("Warm-Restart: Receiving delete msg: %s\n", destipprefix); + + vector fvVector; + const KeyOpFieldsValuesTuple kfv = std::make_tuple(destipprefix, "", fvVector); + m_warmStartHelper.removeRecoveryMap(kfv, WarmStartHelper::DELETE); + return; + } } else if (nlmsg_type != RTM_NEWROUTE) { @@ -118,8 +139,28 @@ void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj) vector fvVector; FieldValueTuple nh("nexthop", nexthops); FieldValueTuple idx("ifname", ifnames); + fvVector.push_back(nh); fvVector.push_back(idx); - m_routeTable.set(destipprefix, fvVector); - SWSS_LOG_DEBUG("RoutTable set: %s %s %s\n", destipprefix, nexthops.c_str(), ifnames.c_str()); + + if (warmState == WarmStart::INITIALIZED || + warmState == WarmStart::RECONCILED) + { + m_routeTable.set(destipprefix, fvVector); + SWSS_LOG_DEBUG("RouteTable set msg: %s %s %s\n", + destipprefix, nexthops.c_str(), ifnames.c_str()); + } + + /* + * During routing-stack restarting scenarios route-updates will be temporarily + * put on hold by warm-reboot logic. + */ + else + { + SWSS_LOG_INFO("Warm-Restart: RouteTable set msg: %s %s %s\n", + destipprefix, nexthops.c_str(), ifnames.c_str()); + + const KeyOpFieldsValuesTuple kfv = std::make_tuple(destipprefix, "", fvVector); + m_warmStartHelper.insertRecoveryMap(kfv, WarmStartHelper::CLEAN); + } } diff --git a/fpmsyncd/routesync.h b/fpmsyncd/routesync.h index 43b6305287..1652bedee7 100644 --- a/fpmsyncd/routesync.h +++ b/fpmsyncd/routesync.h @@ -4,6 +4,8 @@ #include "dbconnector.h" #include "producerstatetable.h" #include "netmsg.h" +#include "warmRestartHelper.h" + namespace swss { @@ -16,10 +18,12 @@ class RouteSync : public NetMsg virtual void onMsg(int nlmsg_type, struct nl_object *obj); + WarmStartHelper m_warmStartHelper; + private: - ProducerStateTable m_routeTable; - struct nl_cache *m_link_cache; - struct nl_sock *m_nl_sock; + ProducerStateTable m_routeTable; + struct nl_cache *m_link_cache; + struct nl_sock *m_nl_sock; }; } diff --git a/warmrestart/warmRestartHelper.cpp b/warmrestart/warmRestartHelper.cpp new file mode 100644 index 0000000000..6ad65097d6 --- /dev/null +++ b/warmrestart/warmRestartHelper.cpp @@ -0,0 +1,472 @@ +#include +#include + +#include "warmRestartHelper.h" + + +using namespace swss; + + +WarmStartHelper::WarmStartHelper(RedisPipeline *pipeline, + ProducerStateTable *syncTable, + const std::string &dockerName, + const std::string &appName) : + m_recoveryTable(pipeline, APP_ROUTE_TABLE_NAME, false), + m_syncTable(syncTable), + m_dockName(dockerName), + m_appName(appName) +{ + WarmStart::initialize(appName, dockerName); +} + + +WarmStartHelper::~WarmStartHelper() +{ +} + + +void WarmStartHelper::setState(WarmStart::WarmStartState state) +{ + WarmStart::setWarmStartState(m_appName, state); + + m_state = state; +} + + +WarmStart::WarmStartState WarmStartHelper::getState(void) const +{ + return m_state; +} + + +bool WarmStartHelper::isEnabled(void) const +{ + return WarmStart::checkWarmStart(m_appName, m_dockName); +} + + +bool WarmStartHelper::isReconciled(void) const +{ + return (m_state == WarmStart::RECONCILED); +} + + +uint32_t WarmStartHelper::getRestartTimer(void) const +{ + return WarmStart::getWarmStartTimer(m_appName, m_dockName); +} + + +/* + * Invoked by warmStartHelper clients during initialization. All interested parties + * are expected to call this method to upload their associated redisDB state into + * a temporary buffer, which will eventually serve to resolve any conflict between + * 'old' and 'new' state. + */ +bool WarmStartHelper::runRecovery() +{ + bool state_available; + + SWSS_LOG_NOTICE("Initiating AppDB restoration process"); + + if (buildRecoveryMap()) + { + setState(WarmStart::RESTORED); + state_available = true; + } + else + { + setState( WarmStart::RECONCILED); + state_available = false; + } + + SWSS_LOG_NOTICE("Completed AppDB restoration process"); + + return state_available; +} + + +bool WarmStartHelper::buildRecoveryMap(void) +{ + std::vector recoveryVector; + + m_recoveryTable.getContent(recoveryVector); + if (!recoveryVector.size()) + { + SWSS_LOG_NOTICE("Warm-Restart: No records received from AppDB\n"); + return false; + } + SWSS_LOG_NOTICE("Warm-Restart: Received %d records from AppDB\n", + static_cast(recoveryVector.size())); + + /* Proceed to insert every recovered element into the reconciliation buffer */ + for (auto &elem : recoveryVector) + { + insertRecoveryMap(elem, STALE); + } + + return true; +} + + +/* + * Method in charge of populating the recoveryMap with old/new state. This state + * can either come from southbound data-stores (old/existing state) or from any + * of the applications (new state) interested in graceful-restart capabilities. + */ +void WarmStartHelper::insertRecoveryMap(const KeyOpFieldsValuesTuple &kfv, + fvState_t state) +{ + std::string key = kfvKey(kfv); + std::vector fieldValues = kfvFieldsValues(kfv); + + fieldValuesTupleVoV fvVector; + + /* + * Iterate through all the fieldValue-tuples present in this kfv entry to + * split its values into separated tuples. Store these separated tuples in + * a temporary fieldValue vector. + * + * Here we are simply converting from KFV format to a split-based layout + * represented by the fvVector variable. + * + * input kfv: 1.1.1.1/30, vector{nexthop: 10.1.1.1, 10.1.1.2, ifname: eth1, eth2} + * + * output fvVector: vector{v1{nexthop: 10.1.1.1, ifname: eth1}, + * v2{nexthop: 10.1.1.2, ifname: eth2}} + */ + for (auto &fv : fieldValues) + { + std::string field = fvField(fv); + std::vector splitValues = tokenize(fvValue(fv), ','); + + /* + * Dealing with tuples with empty values. Example: directly connected + * intfs will show up as [ nexthop = "" ] + */ + if (!splitValues.size()) + { + splitValues.push_back(""); + } + + for (int j = 0; j < static_cast(splitValues.size()); ++j) + { + if (j < static_cast(fvVector.size())) + { + fvVector[j].emplace_back(field, splitValues[j]); + } + else + { + fvVector.emplace_back(std::vector{make_pair(field, splitValues[j])}); + } + } + } + + /* + * Now that we have a fvVector with separated fieldvalue-tuples, let's proceed + * to insert/update its fieldvalue entries into our recoveryMap. + */ + fvRecoveryMap fvMap; + + if (m_recoveryMap.count(key)) + { + fvMap = m_recoveryMap[key]; + } + + /* + * Let's now deal with transient best-path selections, which is only required + * when we are receiving new/refreshed state from north-bound apps (CLEAN + * flag). + */ + if (state == CLEAN) + { + adjustRecoveryMap(fvMap, fvVector, key); + } + + /* + * We will iterate through each of the fieldvalue-tuples in the fvVector to + * either insert or update the corresponding entry in the map. + */ + for (auto &elem : fvVector) + { + if (fvMap.find(elem) == fvMap.end()) + { + if (state == STALE) + { + fvMap[elem] = STALE; + } + else if (state == CLEAN) + { + fvMap[elem] = NEW; + } + } + else + { + fvMap[elem] = state; + } + } + + m_recoveryMap[key] = fvMap; +} + + +/* + * Method takes care of marking eliminated entries (e.g. route paths) within the + * recoveryMap buffer. + */ +void WarmStartHelper::removeRecoveryMap(const KeyOpFieldsValuesTuple &kfv, + fvState_t state) +{ + fvRecoveryMap fvMap; + + /* + * Notice that there's no point in processing bgp-withdrawal if an associated + * entry doesn't exist in the recoveryMap. + */ + std::string key = kfvKey(kfv); + if (!m_recoveryMap.count(key)) + { + return; + } + + fvMap = m_recoveryMap[key]; + + /* + * Iterate through all elements in the map and update the state of the + * entries being withdrawwn (i.e. 'paths' in routing case) with the proper + * flag. + */ + for (auto &fv : fvMap) + { + fv.second = state; + } + + m_recoveryMap[key] = fvMap; +} + + +/* + * This method is currently required to deal with a specific limitation of quagga + * and frr routing-stacks, which causes transient best-path selections to arrive + * at fpmSyncd during bgp's initial peering establishments. In these scenarios we + * must identify the 'transient' character of a routing-update and eliminate it + * from the recoveryMap whenever a better one is received. + * + * As this issue is only observed when interacting with the routing-stack, we can + * safely avoid this call when collecting state from AppDB (restoration phase); + * hence caller should invoke this method only if/when the state of the new entry + * to add is set to CLEAN. + */ +void WarmStartHelper::adjustRecoveryMap(fvRecoveryMap &fvMap, + const fieldValuesTupleVoV &fvVector, + const std::string &key) +{ + /* + * Iterate through all field-value entries in the fvMap and determine if there's + * matching entry in the fvVector. If that's not the case, and this entry has + * been recently added by the north-bound app (NEW flag), then proceed to + * eliminate it from the fvMap. + * + * Notice that even though this is an O(n^2) logic, 'n' here is small (number + * of ecmp-paths per prefix), and this is only executed during restarting + * events. + */ + for (auto it = fvMap.begin(); it != fvMap.end(); ) + { + bool found = false; + /* + * Transient best-path selections would only apply to entries marked as + * NEW. + */ + if (it->second != NEW) + { + it++; + continue; + } + + for (auto const &fv : fvVector) + { + if (it->first == fv) + { + found = true; + break; + } + } + + if (!found) + { + SWSS_LOG_INFO("Warm-Restart: Deleting transient best-path selection " + "for entry %s\n", key.c_str()); + it = fvMap.erase(it); + } + else + { + it++; + } + } +} + + +/* + * Reconciliation process takes place here. + * + * In a nutshell, the process relies on the following basic guidelines: + * + * - An element in the recoveryMap with all its entries in the fvRecMap showing + * as STALE, will be eliminated from AppDB. + * + * - An element in the recoveryMap with all its entries in the fvRecMap showing + * as CLEAN, will have a NO-OP associated with it -- no changes in AppDB. + * + * - An element in the recoveryMap with all its entries in the fvRecMap showing + * as NEW, will correspond to a brand-new state, and as such, will be pushed to + * AppDB. + * + * - An element in the recoveryMap with some of its entries in the fvRecMap + * showing as CLEAN, will have these CLEAN entries, along with any NEW one, + * being pushed down to AppDB. + * + * - An element in the recoveryMap with some of its entries in the fvRecMap + * showing as NEW, will have these new entries, along with any CLEAN one, + * being pushed to AppDB. + * + * - An element in the recoveryMap with some/all of its entries in the + * fvRecMap showing as DELETE, will have these entries being eliminated + * from AppDB. + * + */ +void WarmStartHelper::reconciliate(void) +{ + SWSS_LOG_NOTICE("Initiating AppDB reconciliation process..."); + + assert(getState() == WarmStart::RESTORED); + + /* + * Iterate through all the entries in the recoveryMap and take note of the + * attributes associated to each. + */ + auto it = m_recoveryMap.begin(); + while (it != m_recoveryMap.end()) + { + std::string key = it->first; + fvRecoveryMap fvMap = it->second; + + int totalRecElems, staleRecElems, cleanRecElems, newRecElems, deleteRecElems; + totalRecElems = staleRecElems = cleanRecElems = newRecElems = deleteRecElems = 0; + + std::vector fvVector; + + for (auto itMap = fvMap.begin(); itMap != fvMap.end(); ) + { + totalRecElems++; + + auto recElem = itMap->first; + auto recElemState = itMap->second; + + if (recElemState == STALE) + { + itMap = fvMap.erase(itMap); + staleRecElems++; + } + else if (recElemState == CLEAN) + { + cleanRecElems++; + transformKFV(recElem, fvVector); + ++itMap; + } + else if (recElemState == NEW) + { + newRecElems++; + transformKFV(recElem, fvVector); + ++itMap; + } + else if (recElemState == DELETE) + { + deleteRecElems++; + ++itMap; + } + } + + if (staleRecElems == totalRecElems) + { + m_syncTable->del(key); + SWSS_LOG_NOTICE("Route reconciliation: deleting stale prefix %s\n", + key.c_str()); + } + else if (cleanRecElems == totalRecElems) + { + SWSS_LOG_NOTICE("Route reconciliation: no changes needed for existing" + " prefix %s\n", key.c_str()); + } + else if (newRecElems == totalRecElems) + { + m_syncTable->set(key, fvVector); + SWSS_LOG_NOTICE("Route reconciliation: creating new prefix %s\n", + key.c_str()); + } + else if (cleanRecElems) + { + m_syncTable->set(key, fvVector); + SWSS_LOG_NOTICE("Route reconciliation: updating attributes for prefix" + " %s\n", key.c_str()); + } + else if (newRecElems) + { + m_syncTable->set(key, fvVector); + SWSS_LOG_NOTICE("Route reconciliation: creating new attributes for " + "prefix %s\n", key.c_str()); + } + else if (deleteRecElems) + { + m_syncTable->del(key); + SWSS_LOG_NOTICE("Route reconciliation: deleting withdrawn prefix %s\n", + key.c_str()); + } + + it = m_recoveryMap.erase(it); + } + + /* Recovery map should be entirely empty by now */ + assert(m_recoveryMap.size() == 0); + + setState(WarmStart::RECONCILED); +} + + +/* + * Method useful to transform fieldValueTuples from the split-format utilized + * in warmStartHelper's reconciliation process to the regular format used + * everywhere else. + */ +void WarmStartHelper::transformKFV(const std::vector &data, + std::vector &fvVector) +{ + bool emptyVector = false; + + /* + * Both input vectors should contain the same number of elements, with the + * exception of fvVector not being initialized yet. + */ + if (data.size() != fvVector.size() && fvVector.size()) + { + return; + } + else if (!fvVector.size()) + { + emptyVector = true; + } + + /* Define fields in fvVector result-parameter */ + for (int i = 0; i < static_cast(data.size()); ++i) + { + if (emptyVector) + { + fvVector.push_back(data[i]); + continue; + } + + std::string newVal = fvValue(fvVector[i]) + "," + fvValue(data[i]); + + fvVector[i].second = newVal; + } +} diff --git a/warmrestart/warmRestartHelper.h b/warmrestart/warmRestartHelper.h new file mode 100644 index 0000000000..77e7789598 --- /dev/null +++ b/warmrestart/warmRestartHelper.h @@ -0,0 +1,128 @@ +#ifndef __WARMRESTART_HELPER__ +#define __WARMRESTART_HELPER__ + + +#include +#include +#include + +#include "dbconnector.h" +#include "producerstatetable.h" +#include "netmsg.h" +#include "table.h" +#include "tokenize.h" +#include "warm_restart.h" + + +namespace swss { + + +/* FieldValueTuple functor to serve as comparator for recoveryMap */ +struct fvComparator +{ + bool operator()(const std::vector &left, + const std::vector &right) const + { + /* + * The sizes of both tuple-vectors should always match within any given + * application, otherwise we are running into some form of bug. + */ + assert(left.size() == right.size()); + + /* + * Iterate through all the tuples present in left-vector and compare them + * with those in the right one. + */ + for (auto &fvLeft : left) + { + /* + * Notice that we are forced to iterate through all the tuples in the + * right-vector given that the order of fields within a tuple is not + * fully deterministic (i.e. 'left' could hold 'nh: 1.1.1.1 / if: eth0' + * and 'right' could be 'if: eth0, nh: 1.1.1.1'). + */ + for (auto &fvRight : right) + { + if (fvField(fvRight) == fvField(fvLeft)) + { + return fvLeft < fvRight; + } + } + } + + return true; + } +}; + + +class WarmStartHelper { + public: + + WarmStartHelper(RedisPipeline *pipeline, + ProducerStateTable *syncTable, + const std::string &dockerName, + const std::string &appName); + + ~WarmStartHelper(); + + /* State of collected fieldValue tuples */ + enum fvState_t + { + INVALID = 0, + STALE = 1, + CLEAN = 2, + NEW = 3, + DELETE = 4 + }; + + /* + * RecoveryMap types serve as the buffer data-struct where to hold the state + * over which to run the reconciliation logic. + */ + using fvRecoveryMap = std::map, fvState_t, fvComparator>; + using recoveryMap = std::unordered_map; + + /* Useful type for recoveryMap manipulation */ + using fieldValuesTupleVoV = std::vector>; + + + void setState(WarmStart::WarmStartState state); + + WarmStart::WarmStartState getState(void) const; + + bool isEnabled(void) const; + + bool isReconciled(void) const; + + uint32_t getRestartTimer(void) const; + + bool runRecovery(void); + + bool buildRecoveryMap(void); + + void insertRecoveryMap(const KeyOpFieldsValuesTuple &kfv, fvState_t state); + + void removeRecoveryMap(const KeyOpFieldsValuesTuple &kfv, fvState_t state); + + void adjustRecoveryMap(fvRecoveryMap &fvMap, + const fieldValuesTupleVoV &fvVector, + const std::string &key); + + void reconciliate(void); + + void transformKFV(const std::vector &data, + std::vector &fvVector); + + private: + Table m_recoveryTable; // redis table to import current-state from + ProducerStateTable *m_syncTable; // producer-table to sync/push state to + recoveryMap m_recoveryMap; // buffer struct to hold old&new state + WarmStart::WarmStartState m_state; // cached value of warmStart's FSM state + std::string m_dockName; // sonic-docker requesting warmStart services + std::string m_appName; // sonic-app requesting warmStart services +}; + + +} + +#endif