From cd959723749a660fdc1e850ae029e21ee2a48033 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Thu, 19 Jan 2023 23:27:54 +0800 Subject: [PATCH] Fix issue #13341 ARP entry can be out of sync between kernel and APPL_DB if multiple updates are received from RTNL (#2619) - What I did Fix issue sonic-net/sonic-buildimage#13341 the issue that ARP entry is out of sync between kernel and APPL_DB In AppRestartAssist::insertToMap, in case an entry has been updated more than once with the same value but different from the stored one, keep the state as NEW. Eg. Assume the entry's value that is restored from the warm reboot is V0, the following events received. The first update with value V1 is received and handled by the if (found != appTableCacheMap[tableName].end()) branch, * the state is set to NEW * value is updated to V1 The second update with the same value V1 is received and handled by this branch Originally, the state was set to SAME, which is wrong because V1 is different from the stored value V0 The correct logic should be: set the state to SAME only if the state is not NEW This is a very rare case because most of times, the entry won't be updated multiple times - Why I did it To fix the issue. - How I verified it Mock test is added to cover the case. Signed-off-by: Stephen Sun --- tests/mock_tests/Makefile.am | 6 ++- tests/mock_tests/warmrestartassist_ut.cpp | 64 +++++++++++++++++++++++ warmrestart/warmRestartAssist.cpp | 29 ++++++++-- 3 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 tests/mock_tests/warmrestartassist_ut.cpp diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 8faab837c6ed..685a30b53a4e 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -21,7 +21,7 @@ LDADD_GTEST = -L/usr/src/gtest ## Orchagent Unit Tests -tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests +tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests -I$(top_srcdir)/warmrestart tests_SOURCES = aclorch_ut.cpp \ portsorch_ut.cpp \ @@ -49,6 +49,7 @@ tests_SOURCES = aclorch_ut.cpp \ swssnet_ut.cpp \ flowcounterrouteorch_ut.cpp \ orchdaemon_ut.cpp \ + warmrestartassist_ut.cpp \ $(top_srcdir)/lib/gearboxutils.cpp \ $(top_srcdir)/lib/subintf.cpp \ $(top_srcdir)/orchagent/orchdaemon.cpp \ @@ -105,7 +106,8 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/srv6orch.cpp \ $(top_srcdir)/orchagent/nvgreorch.cpp \ $(top_srcdir)/cfgmgr/portmgr.cpp \ - $(top_srcdir)/cfgmgr/buffermgrdyn.cpp + $(top_srcdir)/cfgmgr/buffermgrdyn.cpp \ + $(top_srcdir)/warmrestart/warmRestartAssist.cpp tests_SOURCES += $(FLEX_CTR_DIR)/flex_counter_manager.cpp $(FLEX_CTR_DIR)/flex_counter_stat_manager.cpp $(FLEX_CTR_DIR)/flow_counter_handler.cpp $(FLEX_CTR_DIR)/flowcounterrouteorch.cpp tests_SOURCES += $(DEBUG_CTR_DIR)/debug_counter.cpp $(DEBUG_CTR_DIR)/drop_counter.cpp diff --git a/tests/mock_tests/warmrestartassist_ut.cpp b/tests/mock_tests/warmrestartassist_ut.cpp new file mode 100644 index 000000000000..6adcd08baf79 --- /dev/null +++ b/tests/mock_tests/warmrestartassist_ut.cpp @@ -0,0 +1,64 @@ +#define protected public +#include "orch.h" +#undef protected +#include "ut_helper.h" +//#include "mock_orchagent_main.h" +#include "mock_table.h" +#include "warm_restart.h" +#define private public +#include "warmRestartAssist.h" +#undef private + +#define APP_WRA_TEST_TABLE_NAME "TEST_TABLE" + +namespace warmrestartassist_test +{ + using namespace std; + + shared_ptr m_app_db = make_shared("APPL_DB", 0); + shared_ptr m_app_db_pipeline = make_shared(m_app_db.get()); + shared_ptr m_wra_test_table = make_shared(m_app_db.get(), APP_WRA_TEST_TABLE_NAME); + + AppRestartAssist *appRestartAssist; + + struct WarmrestartassistTest : public ::testing::Test + { + WarmrestartassistTest() + { + appRestartAssist = new AppRestartAssist(m_app_db_pipeline.get(), "testsyncd", "swss", 0); + appRestartAssist->m_warmStartInProgress = true; + appRestartAssist->registerAppTable(APP_WRA_TEST_TABLE_NAME, m_wra_test_table.get()); + } + + void SetUp() override + { + testing_db::reset(); + + Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME); + testTable.set("key", + { + {"field", "value0"}, + }); + } + + void TearDown() override + { + } + }; + + TEST_F(WarmrestartassistTest, warmRestartAssistTest) + { + appRestartAssist->readTablesToMap(); + vector fvVector; + fvVector.emplace_back("field", "value1"); + appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false); + appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false); + appRestartAssist->reconcile(); + + fvVector.clear(); + Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME); + ASSERT_TRUE(testTable.get("key", fvVector)); + ASSERT_EQ(fvField(fvVector[0]), "field"); + ASSERT_EQ(fvValue(fvVector[0]), "value1"); + } +} diff --git a/warmrestart/warmRestartAssist.cpp b/warmrestart/warmRestartAssist.cpp index 988f8279dba0..9b1a8dfddd6b 100644 --- a/warmrestart/warmRestartAssist.cpp +++ b/warmrestart/warmRestartAssist.cpp @@ -208,10 +208,31 @@ void AppRestartAssist::insertToMap(string tableName, string key, vectorsecond, SAME); + auto state = getCacheEntryState(found->second); + /* + * In case an entry has been updated for more than once with the same value but different from the stored one, + * keep the state as NEW. + * Eg. + * Assume the entry's value that is restored from last warm reboot is V0. + * 1. The first update with value V1 is received and handled by the above `if (found != appTableCacheMap[tableName].end())` branch, + * - state is set to NEW + * - value is updated to V1 + * 2. The second update with the same value V1 is received and handled by this branch + * - Originally, state was set to SAME, which is wrong because V1 is different from the stored value V0 + * - The correct logic should be: set the state to same only if the state is not NEW + * This is a very rare case because in most of times the entry won't be updated for multiple times + */ + if (state == NEW) + { + SWSS_LOG_NOTICE("%s, found key: %s, it has been updated for the second time, keep state as NEW", + tableName.c_str(), key.c_str()); + } + else + { + SWSS_LOG_INFO("%s, found key: %s, same value", tableName.c_str(), key.c_str()); + // mark as SAME flag + setCacheEntryState(found->second, SAME); + } } } else