Skip to content

Commit

Permalink
[fgnhgorch] add warm reboot support for fgnhg (sonic-net#1538)
Browse files Browse the repository at this point in the history
- Add bake() functions in FgnhgOrch
  bake() function retrieves the state database information
  so that when the new route is added, fgnhg orch is able to recover
  the nexthop and bucket the same as before warm reboot.

- Add priority to fgnhg tables so that fgnhg tables can be loaded prior to route table.

Co-authored-by: Ubuntu <weixchen@weixchen-dev.papr3y04rasunohgmh0bxenl2f.xx.internal.cloudapp.net>
  • Loading branch information
weixchen1215 and Ubuntu committed Dec 30, 2020
1 parent 4cf6617 commit 7ba4e43
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 39 deletions.
72 changes: 67 additions & 5 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "swssnet.h"
#include "crmorch.h"
#include <array>
#include <algorithm>

#define LINK_DOWN 0
#define LINK_UP 1
Expand All @@ -20,7 +21,7 @@ extern RouteOrch *gRouteOrch;
extern CrmOrch *gCrmOrch;
extern PortsOrch *gPortsOrch;

FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector<string> &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) :
FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector<table_name_with_pri_t> &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) :
Orch(db, tableNames),
m_neighOrch(neighOrch),
m_intfsOrch(intfsOrch),
Expand Down Expand Up @@ -105,6 +106,39 @@ void FgNhgOrch::update(SubjectType type, void *cntx)
}
}

bool FgNhgOrch::bake()
{
SWSS_LOG_ENTER();

deque<KeyOpFieldsValuesTuple> entries;
vector<string> keys;
m_stateWarmRestartRouteTable.getKeys(keys);

SWSS_LOG_NOTICE("Warm reboot: recovering entry %lu from state", keys.size());

for (const auto &key : keys)
{
vector<FieldValueTuple> tuples;
m_stateWarmRestartRouteTable.get(key, tuples);

NextHopIndexMap nhop_index_map(tuples.size(), std::string());
for (const auto &tuple : tuples)
{
const auto index = stoi(fvField(tuple));
const auto nextHop = fvValue(tuple);

nhop_index_map[index] = nextHop;
SWSS_LOG_INFO("Storing next hop %s at index %d", nhop_index_map[index].c_str(), index);
}

// Recover nexthop with index relationship
m_recoveryMap[key] = nhop_index_map;

m_stateWarmRestartRouteTable.del(key);
}

return Orch::bake();
}

/* calculateBankHashBucketStartIndices: generates the hash_bucket_indices for all banks
* and stores it in fgNhgEntry for the group.
Expand Down Expand Up @@ -191,7 +225,6 @@ void FgNhgOrch::setStateDbRouteEntry(const IpPrefix &ipPrefix, uint32_t index, N

}


bool FgNhgOrch::writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid,
const IpPrefix &ipPrefix, NextHopKey nextHop)
{
Expand Down Expand Up @@ -881,6 +914,8 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh
SWSS_LOG_ENTER();

sai_status_t status;
bool isWarmReboot = false;
auto nexthopsMap = m_recoveryMap.find(ipPrefix.to_string());
for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++)
{
uint32_t bank = i;
Expand Down Expand Up @@ -913,11 +948,33 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh
return false;
}

// recover state before warm reboot
if (nexthopsMap != m_recoveryMap.end())
{
isWarmReboot = true;
}

SWSS_LOG_INFO("Warm reboot is set to %d", isWarmReboot);

for (uint32_t j = fgNhgEntry->hash_bucket_indices[i].start_index;
j <= fgNhgEntry->hash_bucket_indices[i].end_index; j++)
{
NextHopKey bank_nh_memb = bank_member_changes[bank].nhs_to_add[j %
bank_member_changes[bank].nhs_to_add.size()];
NextHopKey bank_nh_memb;
if (isWarmReboot)
{
bank_nh_memb = nexthopsMap->second[j];
SWSS_LOG_INFO("Recovering nexthop %s with bucket %d", bank_nh_memb.ip_address.to_string().c_str(), j);
// case nhps in bank are all down
if (fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank != i)
{
syncd_fg_route_entry.inactive_to_active_map[i] = fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank;
}
}
else
{
bank_nh_memb = bank_member_changes[bank].nhs_to_add[j %
bank_member_changes[bank].nhs_to_add.size()];
}

// Create a next hop group member
sai_attribute_t nhgm_attr;
Expand Down Expand Up @@ -961,6 +1018,11 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh
}
}

if (isWarmReboot)
{
m_recoveryMap.erase(nexthopsMap);
}

return true;
}

Expand Down Expand Up @@ -1592,7 +1654,7 @@ bool FgNhgOrch::doTaskFgNhgMember(const KeyOpFieldsValuesTuple & t)
}
fgNhg_entry->second.next_hops[next_hop] = fg_nh_info;
SWSS_LOG_INFO("FG_NHG member added for group %s, next-hop %s",
fgNhg_entry->second.fg_nhg_name.c_str(), next_hop.to_string().c_str());
fgNhg_entry->second.fg_nhg_name.c_str(), nhk.to_string().c_str());
}
}
else if (op == DEL_COMMAND)
Expand Down
12 changes: 11 additions & 1 deletion orchagent/fgnhgorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,24 @@ typedef struct
std::vector<NextHopKey> active_nhs;
} BankMemberChanges;

typedef std::vector<string> NextHopIndexMap;
typedef map<string, NextHopIndexMap> WarmBootRecoveryMap;

class FgNhgOrch : public Orch, public Observer
{
public:
FgNhgPrefixes fgNhgPrefixes;
FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector<string> &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch);
FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector<table_name_with_pri_t> &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch);

void update(SubjectType type, void *cntx);
bool addRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&);
bool removeRoute(sai_object_id_t, const IpPrefix&);
bool validNextHopInNextHopGroup(const NextHopKey&);
bool invalidNextHopInNextHopGroup(const NextHopKey&);

// warm reboot support
bool bake() override;

private:
NeighOrch *m_neighOrch;
IntfsOrch *m_intfsOrch;
Expand All @@ -106,6 +112,10 @@ class FgNhgOrch : public Orch, public Observer
FgPrefixOpCache m_fgPrefixAddCache;
FgPrefixOpCache m_fgPrefixDelCache;

// warm reboot support for recovery
// < ip_prefix, < HashBuckets, nh_ip>>
WarmBootRecoveryMap m_recoveryMap;

bool setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry,
std::vector<BankMemberChanges> &bank_member_changes,
std::map<NextHopKey,sai_object_id_t> &nhopgroup_members_set, const IpPrefix&);
Expand Down
10 changes: 6 additions & 4 deletions orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,12 @@ bool OrchDaemon::init()
gIntfsOrch = new IntfsOrch(m_applDb, APP_INTF_TABLE_NAME, vrf_orch);
gNeighOrch = new NeighOrch(m_applDb, APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch);

vector<string> fgnhg_tables = {
CFG_FG_NHG,
CFG_FG_NHG_PREFIX,
CFG_FG_NHG_MEMBER
const int fgnhgorch_pri = 15;

vector<table_name_with_pri_t> fgnhg_tables = {
{ CFG_FG_NHG, fgnhgorch_pri },
{ CFG_FG_NHG_PREFIX, fgnhgorch_pri },
{ CFG_FG_NHG_MEMBER, fgnhgorch_pri }
};

gFgNhgOrch = new FgNhgOrch(m_configDb, m_applDb, m_stateDb, fgnhg_tables, gNeighOrch, gIntfsOrch, vrf_orch);
Expand Down
10 changes: 6 additions & 4 deletions tests/mock_tests/aclorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,12 @@ namespace aclorch_test
gNeighOrch = new NeighOrch(m_app_db.get(), APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch);

ASSERT_EQ(gFgNhgOrch, nullptr);
vector<string> fgnhg_tables = {
CFG_FG_NHG,
CFG_FG_NHG_PREFIX,
CFG_FG_NHG_MEMBER
const int fgnhgorch_pri = 15;

vector<table_name_with_pri_t> fgnhg_tables = {
{ CFG_FG_NHG, fgnhgorch_pri },
{ CFG_FG_NHG_PREFIX, fgnhgorch_pri },
{ CFG_FG_NHG_MEMBER, fgnhgorch_pri }
};
gFgNhgOrch = new FgNhgOrch(m_config_db.get(), m_app_db.get(), m_state_db.get(), fgnhg_tables, gNeighOrch, gIntfsOrch, gVrfOrch);

Expand Down
Loading

0 comments on commit 7ba4e43

Please sign in to comment.