From 7ba4e4327c21d32efe4fd2d1cede64e1831d908e Mon Sep 17 00:00:00 2001 From: weixchen1215 <44277328+weixchen1215@users.noreply.github.com> Date: Wed, 30 Dec 2020 11:34:18 -0800 Subject: [PATCH] [fgnhgorch] add warm reboot support for fgnhg (#1538) - Add bake() functions in FgnhgOrch bake() function retrieves the state database information so that when the new route is added, fgnhg orch is able to recover the nexthop and bucket the same as before warm reboot. - Add priority to fgnhg tables so that fgnhg tables can be loaded prior to route table. Co-authored-by: Ubuntu --- orchagent/fgnhgorch.cpp | 72 ++++++++++++- orchagent/fgnhgorch.h | 12 ++- orchagent/orchdaemon.cpp | 10 +- tests/mock_tests/aclorch_ut.cpp | 10 +- tests/test_fgnhg.py | 181 +++++++++++++++++++++++++++----- 5 files changed, 246 insertions(+), 39 deletions(-) diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp index 4e03c8462ce3..051f59d51e9b 100644 --- a/orchagent/fgnhgorch.cpp +++ b/orchagent/fgnhgorch.cpp @@ -6,6 +6,7 @@ #include "swssnet.h" #include "crmorch.h" #include +#include #define LINK_DOWN 0 #define LINK_UP 1 @@ -20,7 +21,7 @@ extern RouteOrch *gRouteOrch; extern CrmOrch *gCrmOrch; extern PortsOrch *gPortsOrch; -FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : +FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : Orch(db, tableNames), m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), @@ -105,6 +106,39 @@ void FgNhgOrch::update(SubjectType type, void *cntx) } } +bool FgNhgOrch::bake() +{ + SWSS_LOG_ENTER(); + + deque entries; + vector keys; + m_stateWarmRestartRouteTable.getKeys(keys); + + SWSS_LOG_NOTICE("Warm reboot: recovering entry %lu from state", keys.size()); + + for (const auto &key : keys) + { + vector tuples; + m_stateWarmRestartRouteTable.get(key, tuples); + + NextHopIndexMap nhop_index_map(tuples.size(), std::string()); + for (const auto &tuple : tuples) + { + const auto index = stoi(fvField(tuple)); + const auto nextHop = fvValue(tuple); + + nhop_index_map[index] = nextHop; + SWSS_LOG_INFO("Storing next hop %s at index %d", nhop_index_map[index].c_str(), index); + } + + // Recover nexthop with index relationship + m_recoveryMap[key] = nhop_index_map; + + m_stateWarmRestartRouteTable.del(key); + } + + return Orch::bake(); +} /* calculateBankHashBucketStartIndices: generates the hash_bucket_indices for all banks * and stores it in fgNhgEntry for the group. @@ -191,7 +225,6 @@ void FgNhgOrch::setStateDbRouteEntry(const IpPrefix &ipPrefix, uint32_t index, N } - bool FgNhgOrch::writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid, const IpPrefix &ipPrefix, NextHopKey nextHop) { @@ -881,6 +914,8 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh SWSS_LOG_ENTER(); sai_status_t status; + bool isWarmReboot = false; + auto nexthopsMap = m_recoveryMap.find(ipPrefix.to_string()); for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++) { uint32_t bank = i; @@ -913,11 +948,33 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh return false; } + // recover state before warm reboot + if (nexthopsMap != m_recoveryMap.end()) + { + isWarmReboot = true; + } + + SWSS_LOG_INFO("Warm reboot is set to %d", isWarmReboot); + for (uint32_t j = fgNhgEntry->hash_bucket_indices[i].start_index; j <= fgNhgEntry->hash_bucket_indices[i].end_index; j++) { - NextHopKey bank_nh_memb = bank_member_changes[bank].nhs_to_add[j % - bank_member_changes[bank].nhs_to_add.size()]; + NextHopKey bank_nh_memb; + if (isWarmReboot) + { + bank_nh_memb = nexthopsMap->second[j]; + SWSS_LOG_INFO("Recovering nexthop %s with bucket %d", bank_nh_memb.ip_address.to_string().c_str(), j); + // case nhps in bank are all down + if (fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank != i) + { + syncd_fg_route_entry.inactive_to_active_map[i] = fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank; + } + } + else + { + bank_nh_memb = bank_member_changes[bank].nhs_to_add[j % + bank_member_changes[bank].nhs_to_add.size()]; + } // Create a next hop group member sai_attribute_t nhgm_attr; @@ -961,6 +1018,11 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh } } + if (isWarmReboot) + { + m_recoveryMap.erase(nexthopsMap); + } + return true; } @@ -1592,7 +1654,7 @@ bool FgNhgOrch::doTaskFgNhgMember(const KeyOpFieldsValuesTuple & t) } fgNhg_entry->second.next_hops[next_hop] = fg_nh_info; SWSS_LOG_INFO("FG_NHG member added for group %s, next-hop %s", - fgNhg_entry->second.fg_nhg_name.c_str(), next_hop.to_string().c_str()); + fgNhg_entry->second.fg_nhg_name.c_str(), nhk.to_string().c_str()); } } else if (op == DEL_COMMAND) diff --git a/orchagent/fgnhgorch.h b/orchagent/fgnhgorch.h index a45faec4e0ca..c096f7b90ebd 100644 --- a/orchagent/fgnhgorch.h +++ b/orchagent/fgnhgorch.h @@ -83,11 +83,14 @@ typedef struct std::vector active_nhs; } BankMemberChanges; +typedef std::vector NextHopIndexMap; +typedef map WarmBootRecoveryMap; + class FgNhgOrch : public Orch, public Observer { public: FgNhgPrefixes fgNhgPrefixes; - FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); + FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); void update(SubjectType type, void *cntx); bool addRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&); @@ -95,6 +98,9 @@ class FgNhgOrch : public Orch, public Observer bool validNextHopInNextHopGroup(const NextHopKey&); bool invalidNextHopInNextHopGroup(const NextHopKey&); + // warm reboot support + bool bake() override; + private: NeighOrch *m_neighOrch; IntfsOrch *m_intfsOrch; @@ -106,6 +112,10 @@ class FgNhgOrch : public Orch, public Observer FgPrefixOpCache m_fgPrefixAddCache; FgPrefixOpCache m_fgPrefixDelCache; + // warm reboot support for recovery + // < ip_prefix, < HashBuckets, nh_ip>> + WarmBootRecoveryMap m_recoveryMap; + bool setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix&); diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 4612dc1ff11d..a1f77854e8ab 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -123,10 +123,12 @@ bool OrchDaemon::init() gIntfsOrch = new IntfsOrch(m_applDb, APP_INTF_TABLE_NAME, vrf_orch); gNeighOrch = new NeighOrch(m_applDb, APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch); - vector fgnhg_tables = { - CFG_FG_NHG, - CFG_FG_NHG_PREFIX, - CFG_FG_NHG_MEMBER + const int fgnhgorch_pri = 15; + + vector fgnhg_tables = { + { CFG_FG_NHG, fgnhgorch_pri }, + { CFG_FG_NHG_PREFIX, fgnhgorch_pri }, + { CFG_FG_NHG_MEMBER, fgnhgorch_pri } }; gFgNhgOrch = new FgNhgOrch(m_configDb, m_applDb, m_stateDb, fgnhg_tables, gNeighOrch, gIntfsOrch, vrf_orch); diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index c1b36337a06a..5f71471a576a 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -323,10 +323,12 @@ namespace aclorch_test gNeighOrch = new NeighOrch(m_app_db.get(), APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch); ASSERT_EQ(gFgNhgOrch, nullptr); - vector fgnhg_tables = { - CFG_FG_NHG, - CFG_FG_NHG_PREFIX, - CFG_FG_NHG_MEMBER + const int fgnhgorch_pri = 15; + + vector fgnhg_tables = { + { CFG_FG_NHG, fgnhgorch_pri }, + { CFG_FG_NHG_PREFIX, fgnhgorch_pri }, + { CFG_FG_NHG_MEMBER, fgnhgorch_pri } }; gFgNhgOrch = new FgNhgOrch(m_config_db.get(), m_app_db.get(), m_state_db.get(), fgnhg_tables, gNeighOrch, gIntfsOrch, gVrfOrch); diff --git a/tests/test_fgnhg.py b/tests/test_fgnhg.py index 16ad325b2765..6b6af86385fd 100644 --- a/tests/test_fgnhg.py +++ b/tests/test_fgnhg.py @@ -192,6 +192,19 @@ def startup_link(dvs, db, port): dvs.servers[port].runcmd("ip link set up dev eth0") == 0 db.wait_for_field_match("PORT_TABLE", "Ethernet%d" % (port * 4), {"oper_status": "up"}) +def run_warm_reboot(dvs): + dvs.runcmd("config warm_restart enable swss") + + # Stop swss before modifing the configDB + dvs.stop_swss() + + # start to apply new port_config.ini + dvs.start_swss() + dvs.runcmd(['sh', '-c', 'supervisorctl start neighsyncd']) + dvs.runcmd(['sh', '-c', 'supervisorctl start restore_neighbors']) + + # Enabling some extra logging for validating the order of orchagent + dvs.runcmd("swssloglevel -l INFO -c orchagent") def verify_programmed_fg_state_db_entry(state_db,nh_memb_exp_count): memb_dict = nh_memb_exp_count @@ -237,6 +250,25 @@ def program_route_and_validate_fine_grained_ecmp(app_db, asic_db, state_db, ip_t validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) +def fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, active_nhs): + # remove fgnhg prefix: The fine grained route should transition to regular ECMP/route + remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + + # Validate regular ECMP + validate_asic_nhg_regular_ecmp(asic_db, fg_nhg_prefix) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, active_nhs) + state_db.wait_for_n_keys("FG_ROUTE_TABLE", 0) + + # clean up route entry + asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) + ps = swsscommon.ProducerStateTable(app_db.db_connection, ROUTE_TB) + ps._del(fg_nhg_prefix) + asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + + # Cleanup all FG, arp and interface + remove_entry(config_db, "FG_NHG", fg_nhg_name) + class TestFineGrainedNextHopGroup(object): def test_route_fgnhg(self, dvs, testlog): app_db = dvs.get_app_db() @@ -503,23 +535,9 @@ def test_route_fgnhg(self, dvs, testlog): validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) - # remove fgnhg prefix: The fine grained route should transition to regular ECMP/route - remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) - - # Validate regular ECMP - validate_asic_nhg_regular_ecmp(asic_db, fg_nhg_prefix) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 3) - state_db.wait_for_n_keys("FG_ROUTE_TABLE", 0) - - # remove prefix entry - asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) - ps._del(fg_nhg_prefix) - asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) - - # Cleanup all FG, arp and interface - remove_entry(config_db, "FG_NHG", fg_nhg_name) - + fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, 3) + + # clean up nh entries for i in range(0,NUM_NHs): if_name_key = "Ethernet" + str(i*4) vlan_name_key = "Vlan" + str((i+1)*4) @@ -532,7 +550,6 @@ def test_route_fgnhg(self, dvs, testlog): dvs.servers[i].runcmd("ip link set down dev eth0") == 0 remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0." + str(1 + i*2)) - ### Create new set of entries with a greater number of FG members and ### bigger bucket size such that the # of nhs are not divisible by ### bucket size. Different physical interface type for dynamicitiy. @@ -626,12 +643,126 @@ def test_route_fgnhg(self, dvs, testlog): "10.0.0.9":13,"10.0.0.11":64} program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, 6) + # clean up nh entries + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + ip_pref_key = "Ethernet" + str(i*4) + "|10.0.0." + str(i*2) + "/31" + remove_entry(config_db, IF_TB, ip_pref_key) + remove_entry(config_db, IF_TB, vlan_name_key) + dvs.runcmd("config interface shutdown " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0." + str(1 + i*2)) - # Remove route - # remove prefix entry - asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) - ps._del(fg_nhg_prefix) - asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + def test_route_fgnhg_warm_reboot(self, dvs, testlog): + dvs.runcmd("swssloglevel -l INFO -c orchagent") + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + config_db = dvs.get_config_db() + state_db = dvs.get_state_db() + fvs_nul = {"NULL": "NULL"} + NUM_NHs = 6 + fg_nhg_name = "fgnhg_v4" + fg_nhg_prefix = "2.2.2.0/24" + bucket_size = 60 + ip_to_if_map = {} - remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + fvs = {"bucket_size": str(bucket_size)} + create_entry(config_db, FG_NHG, fg_nhg_name, fvs) + + fvs = {"FG_NHG": fg_nhg_name} + create_entry(config_db, FG_NHG_PREFIX, fg_nhg_prefix, fvs) + + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + vlan_name_key = "Vlan" + str((i+1)*4) + ip_pref_key = vlan_name_key + "|10.0.0." + str(i*2) + "/31" + fvs = {"vlanid": str((i+1)*4)} + create_entry(config_db, VLAN_TB, vlan_name_key, fvs) + fvs = {"tagging_mode": "untagged"} + create_entry(config_db, VLAN_MEMB_TB, vlan_name_key + "|" + if_name_key, fvs) + create_entry(config_db, VLAN_IF_TB, vlan_name_key, fvs_nul) + create_entry(config_db, VLAN_IF_TB, ip_pref_key, fvs_nul) + dvs.runcmd("config interface startup " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + bank = 0 + if i >= NUM_NHs/2: + bank = 1 + fvs = {"FG_NHG": fg_nhg_name, "bank": str(bank), "link": if_name_key} + create_entry(config_db, FG_NHG_MEMBER, "10.0.0." + str(1 + i*2), fvs) + ip_to_if_map["10.0.0." + str(1 + i*2)] = vlan_name_key + + # Wait for the software to receive the entries + time.sleep(1) + + ps = swsscommon.ProducerStateTable(app_db.db_connection, ROUTE_TB) + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.7,10.0.0.9,10.0.0.11"), + ("ifname", "Vlan16,Vlan20,Vlan24")]) + ps.set(fg_nhg_prefix, fvs) + # No ASIC_DB entry we can wait for since ARP is not resolved yet, + # We just use sleep so that the sw receives this entry + time.sleep(1) + + asic_nh_count = len(asic_db.get_keys(ASIC_NH_TB)) + dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") + dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") + dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") + dvs.runcmd("arp -s 10.0.0.7 00:00:00:00:00:04") + dvs.runcmd("arp -s 10.0.0.9 00:00:00:00:00:05") + dvs.runcmd("arp -s 10.0.0.11 00:00:00:00:00:06") + asic_db.wait_for_n_keys(ASIC_NH_TB, asic_nh_count + 6) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + # Now that ARP was resolved, 10.0.0.7 should be added as a valid fg nhg member + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + + run_warm_reboot(dvs) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":30,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 2 next hop and bring up 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.9":60} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up some next-hops in bank 0 for the 1st time + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + run_warm_reboot(dvs) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) \ No newline at end of file