Skip to content

Commit

Permalink
PFCWD recovery changes using DLR_INIT (sonic-net#2316)
Browse files Browse the repository at this point in the history
  • Loading branch information
vmittal-msft authored and yxieca committed Sep 1, 2022
1 parent 3e888b7 commit 8bb21be
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 11 deletions.
27 changes: 20 additions & 7 deletions orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,13 +597,26 @@ bool OrchDaemon::init()
SAI_QUEUE_ATTR_PAUSE_STATUS,
};

m_orchList.push_back(new PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>(
m_configDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
if(gSwitchOrch->checkPfcDlrInitEnable())
{
m_orchList.push_back(new PfcWdSwOrch<PfcWdDlrHandler, PfcWdLossyHandler>(
m_configDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
}
else
{
m_orchList.push_back(new PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>(
m_configDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds,
PFC_WD_POLL_MSECS));
}
} else if (platform == CISCO_8000_PLATFORM_SUBSTRING)
{
static const vector<sai_port_stat_t> portStatIds;
Expand Down
43 changes: 43 additions & 0 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,49 @@ PfcWdSaiDlrInitHandler::~PfcWdSaiDlrInitHandler(void)
}
}

PfcWdDlrHandler::PfcWdDlrHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdLossyHandler(port, queue, queueId, countersTable)
{
SWSS_LOG_ENTER();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = true;

// Set DLR init to true to start PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(queue, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d",
port, queue, queueId, status);
return;
}
}

PfcWdDlrHandler::~PfcWdDlrHandler(void)
{
SWSS_LOG_ENTER();

sai_object_id_t port = getPort();
sai_object_id_t queue = getQueue();
uint8_t queueId = getQueueId();

sai_attribute_t attr;
attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT;
attr.value.booldata = false;

// Set DLR init to false to stop PFC deadlock recovery
sai_status_t status = sai_queue_api->set_queue_attribute(getQueue(), &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to clear PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64
" queueId %d : %d", port, queue, queueId, status);
return;
}
}

PfcWdAclHandler::PfcWdAclHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable):
PfcWdLossyHandler(port, queue, queueId, countersTable)
Expand Down
8 changes: 8 additions & 0 deletions orchagent/pfcactionhandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ class PfcWdAclHandler: public PfcWdLossyHandler
void updatePfcAclRule(shared_ptr<AclRule> rule, uint8_t queueId, string strTable, vector<sai_object_id_t> port);
};

class PfcWdDlrHandler: public PfcWdLossyHandler
{
public:
PfcWdDlrHandler(sai_object_id_t port, sai_object_id_t queue,
uint8_t queueId, shared_ptr<Table> countersTable);
virtual ~PfcWdDlrHandler(void);
};

// PFC queue that implements drop action by draining queue with buffer of zero size
class PfcWdZeroBufferHandler: public PfcWdLossyHandler
{
Expand Down
34 changes: 34 additions & 0 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@
#define PFC_WD_TC_MAX 8
#define COUNTER_CHECK_POLL_TIMEOUT_SEC 1

extern sai_object_id_t gSwitchId;
extern sai_switch_api_t* sai_switch_api;
extern sai_port_api_t *sai_port_api;
extern sai_queue_api_t *sai_queue_api;

extern SwitchOrch *gSwitchOrch;
extern PortsOrch *gPortsOrch;

template <typename DropHandler, typename ForwardHandler>
Expand Down Expand Up @@ -229,6 +232,36 @@ task_process_status PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const st
SWSS_LOG_ERROR("Unsupported action %s for platform %s", value.c_str(), m_platform.c_str());
return task_process_status::task_invalid_entry;
}
if(m_platform == BRCM_PLATFORM_SUBSTRING)
{
if(gSwitchOrch->checkPfcDlrInitEnable())
{
if(getPfcDlrPacketAction() == PfcWdAction::PFC_WD_ACTION_UNKNOWN)
{
sai_attribute_t attr;
attr.id = SAI_SWITCH_ATTR_PFC_DLR_PACKET_ACTION;
attr.value.u32 = (sai_uint32_t)action;

sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
if(status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set switch level PFC DLR packet action rv : %d", status);
return task_process_status::task_invalid_entry;
}
setPfcDlrPacketAction(action);
}
else
{
if(getPfcDlrPacketAction() != action)
{
string DlrPacketAction = serializeAction(getPfcDlrPacketAction());
SWSS_LOG_ERROR("Invalid PFC Watchdog action %s as switch level action %s is set",
value.c_str(), DlrPacketAction.c_str());
return task_process_status::task_invalid_entry;
}
}
}
}
}
else
{
Expand Down Expand Up @@ -1064,4 +1097,5 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::bake()
// Trick to keep member functions in a separate file
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdAclHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdDlrHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdSaiDlrInitHandler, PfcWdActionHandler>;
3 changes: 3 additions & 0 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class PfcWdOrch: public Orch

virtual task_process_status createEntry(const string& key, const vector<FieldValueTuple>& data);
task_process_status deleteEntry(const string& name);
PfcWdAction getPfcDlrPacketAction() { return PfcDlrPacketAction; }
void setPfcDlrPacketAction(PfcWdAction action) { PfcDlrPacketAction = action; }

protected:
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;
Expand All @@ -58,6 +60,7 @@ class PfcWdOrch: public Orch

shared_ptr<DBConnector> m_countersDb = nullptr;
shared_ptr<Table> m_countersTable = nullptr;
PfcWdAction PfcDlrPacketAction = PfcWdAction::PFC_WD_ACTION_UNKNOWN;
};

template <typename DropHandler, typename ForwardHandler>
Expand Down
2 changes: 1 addition & 1 deletion orchagent/qosorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,7 @@ bool QosOrch::applyDscpToTcMapToSwitch(sai_attr_id_t attr_id, sai_object_id_t ma
SWSS_LOG_ENTER();

/* Query DSCP_TO_TC QoS map at switch capability */
bool rv = gSwitchOrch->querySwitchDscpToTcCapability(SAI_OBJECT_TYPE_SWITCH, SAI_SWITCH_ATTR_QOS_DSCP_TO_TC_MAP);
bool rv = gSwitchOrch->querySwitchCapability(SAI_OBJECT_TYPE_SWITCH, SAI_SWITCH_ATTR_QOS_DSCP_TO_TC_MAP);
if (rv == false)
{
SWSS_LOG_ERROR("Switch level DSCP to TC QoS map configuration is not supported");
Expand Down
24 changes: 23 additions & 1 deletion orchagent/switchorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,27 @@ const map<string, sai_packet_action_t> packet_action_map =

const std::set<std::string> switch_non_sai_attribute_set = {"ordered_ecmp"};

void SwitchOrch::set_switch_pfc_dlr_init_capability()
{
vector<FieldValueTuple> fvVector;

/* Query PFC DLR INIT capability */
bool rv = querySwitchCapability(SAI_OBJECT_TYPE_QUEUE, SAI_QUEUE_ATTR_PFC_DLR_INIT);
if (rv == false)
{
SWSS_LOG_INFO("Queue level PFC DLR INIT configuration is not supported");
m_PfcDlrInitEnable = false;
fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE, "false");
}
else
{
SWSS_LOG_INFO("Queue level PFC DLR INIT configuration is supported");
m_PfcDlrInitEnable = true;
fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE, "true");
}
set_switch_capability(fvVector);
}

SwitchOrch::SwitchOrch(DBConnector *db, vector<TableConnector>& connectors, TableConnector switchTable):
Orch(connectors),
m_switchTable(switchTable.first, switchTable.second),
Expand All @@ -60,6 +81,7 @@ SwitchOrch::SwitchOrch(DBConnector *db, vector<TableConnector>& connectors, Tabl
auto restartCheckNotifier = new Notifier(m_restartCheckNotificationConsumer, this, "RESTARTCHECK");
Orch::addExecutor(restartCheckNotifier);

set_switch_pfc_dlr_init_capability();
initSensorsTable();
querySwitchTpidCapability();
auto executorT = new ExecutableTimer(m_sensorsPollerTimer, this, "ASIC_SENSORS_POLL_TIMER");
Expand Down Expand Up @@ -762,7 +784,7 @@ void SwitchOrch::querySwitchTpidCapability()
}
}

bool SwitchOrch::querySwitchDscpToTcCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id)
bool SwitchOrch::querySwitchCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id)
{
SWSS_LOG_ENTER();

Expand Down
6 changes: 5 additions & 1 deletion orchagent/switchorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define SWITCH_CAPABILITY_TABLE_PORT_TPID_CAPABLE "PORT_TPID_CAPABLE"
#define SWITCH_CAPABILITY_TABLE_LAG_TPID_CAPABLE "LAG_TPID_CAPABLE"
#define SWITCH_CAPABILITY_TABLE_ORDERED_ECMP_CAPABLE "ORDERED_ECMP_CAPABLE"
#define SWITCH_CAPABILITY_TABLE_PFC_DLR_INIT_CAPABLE "PFC_DLR_INIT_CAPABLE"

struct WarmRestartCheck
{
Expand All @@ -30,7 +31,9 @@ class SwitchOrch : public Orch
void restartCheckReply(const std::string &op, const std::string &data, std::vector<swss::FieldValueTuple> &values);
bool setAgingFDB(uint32_t sec);
void set_switch_capability(const std::vector<swss::FieldValueTuple>& values);
bool querySwitchDscpToTcCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id);
bool querySwitchCapability(sai_object_type_t sai_object, sai_attr_id_t attr_id);
bool checkPfcDlrInitEnable() { return m_PfcDlrInitEnable; }
void set_switch_pfc_dlr_init_capability();

// Return reference to ACL group created for each stage and the bind point is
// the switch
Expand Down Expand Up @@ -80,6 +83,7 @@ class SwitchOrch : public Orch
bool m_sensorsAvgTempSupported = true;
bool m_vxlanSportUserModeEnabled = false;
bool m_orderedEcmpEnable = false;
bool m_PfcDlrInitEnable = false;

// Information contained in the request from
// external program for orchagent pre-shutdown state check
Expand Down
90 changes: 89 additions & 1 deletion tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ extern redisReply *mockReply;

namespace portsorch_test
{

using namespace std;

sai_port_api_t ut_sai_port_api;
Expand Down Expand Up @@ -83,6 +82,39 @@ namespace portsorch_test
{
sai_port_api = pold_sai_port_api;
}

sai_queue_api_t ut_sai_queue_api;
sai_queue_api_t *pold_sai_queue_api;
int _sai_set_queue_attr_count = 0;

sai_status_t _ut_stub_sai_set_queue_attribute(sai_object_id_t queue_id, const sai_attribute_t *attr)
{
if(attr->id == SAI_QUEUE_ATTR_PFC_DLR_INIT)
{
if(attr->value.booldata == true)
{
_sai_set_queue_attr_count++;
}
else
{
_sai_set_queue_attr_count--;
}
}
return SAI_STATUS_SUCCESS;
}

void _hook_sai_queue_api()
{
ut_sai_queue_api = *sai_queue_api;
pold_sai_queue_api = sai_queue_api;
ut_sai_queue_api.set_queue_attribute = _ut_stub_sai_set_queue_attribute;
sai_queue_api = &ut_sai_queue_api;
}

void _unhook_sai_queue_api()
{
sai_queue_api = pold_sai_queue_api;
}

struct PortsOrchTest : public ::testing::Test
{
Expand Down Expand Up @@ -588,6 +620,61 @@ namespace portsorch_test
ASSERT_TRUE(ts.empty());
}

TEST_F(PortsOrchTest, PfcDlrHandlerCallingDlrInitAttribute)
{
_hook_sai_queue_api();
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
Table pgTable = Table(m_app_db.get(), APP_BUFFER_PG_TABLE_NAME);
Table profileTable = Table(m_app_db.get(), APP_BUFFER_PROFILE_TABLE_NAME);
Table poolTable = Table(m_app_db.get(), APP_BUFFER_POOL_TABLE_NAME);
Table queueTable = Table(m_app_db.get(), APP_BUFFER_QUEUE_TABLE_NAME);

// Get SAI default ports to populate DB
auto ports = ut_helper::getInitialSaiPorts();

// Populate port table with SAI ports
for (const auto &it : ports)
{
portTable.set(it.first, it.second);
}

// Set PortConfigDone, PortInitDone
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });
portTable.set("PortInitDone", { { "lanes", "0" } });

// refill consumer
gPortsOrch->addExistingData(&portTable);

// Apply configuration :
// create ports

static_cast<Orch *>(gPortsOrch)->doTask();

// Apply configuration
// ports
static_cast<Orch *>(gPortsOrch)->doTask();

ASSERT_TRUE(gPortsOrch->allPortsReady());

// No more tasks
vector<string> ts;
gPortsOrch->dumpPendingTasks(ts);
ASSERT_TRUE(ts.empty());
ts.clear();

// Simulate storm drop handler started on Ethernet0 TC 3
Port port;
gPortsOrch->getPort("Ethernet0", port);
auto countersTable = make_shared<Table>(m_counters_db.get(), COUNTERS_TABLE);
auto dropHandler = make_unique<PfcWdDlrHandler>(port.m_port_id, port.m_queue_ids[3], 3, countersTable);
ASSERT_TRUE(_sai_set_queue_attr_count == 1);

dropHandler.reset();
ASSERT_FALSE(_sai_set_queue_attr_count == 1);

_unhook_sai_queue_api();
}

TEST_F(PortsOrchTest, PfcZeroBufferHandler)
{
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
Expand Down Expand Up @@ -1026,4 +1113,5 @@ namespace portsorch_test

ASSERT_FALSE(bridgePortCalledBeforeLagMember); // bridge port created on lag before lag member was created
}

}

0 comments on commit 8bb21be

Please sign in to comment.