Skip to content

Commit

Permalink
Merge branch 'net-sched-retpoline'
Browse files Browse the repository at this point in the history
Pedro Tammela says:

====================
net/sched: retpoline wrappers for tc

In tc all qdics, classifiers and actions can be compiled as modules.
This results today in indirect calls in all transitions in the tc hierarchy.
Due to CONFIG_RETPOLINE, CPUs with mitigations=on might pay an extra cost on
indirect calls. For newer Intel cpus with IBRS the extra cost is
nonexistent, but AMD Zen cpus and older x86 cpus still go through the
retpoline thunk.

Known built-in symbols can be optimized into direct calls, thus
avoiding the retpoline thunk. So far, tc has not been leveraging this
build information and leaving out a performance optimization for some
CPUs. In this series we wire up 'tcf_classify()' and 'tcf_action_exec()'
with direct calls when known modules are compiled as built-in as an
opt-in optimization.

We measured these changes in one AMD Zen 4 cpu (Retpoline), one AMD Zen 3 cpu (Retpoline),
one Intel 10th Gen CPU (IBRS), one Intel 3rd Gen cpu (Retpoline) and one
Intel Xeon CPU (IBRS) using pktgen with 64b udp packets. Our test setup is a
dummy device with clsact and matchall in a kernel compiled with every
tc module as built-in.  We observed a 3-8% speed up on the retpoline CPUs,
when going through 1 tc filter, and a 60-100% speed up when going through 100 filters.
For the IBRS cpus we observed a 1-2% degradation in both scenarios, we believe
the extra branches check introduced a small overhead therefore we added
a static key that bypasses the wrapper on kernels not using the retpoline mitigation,
but compiled with CONFIG_RETPOLINE.

1 filter:
CPU        | before (pps) | after (pps) | diff
R9 7950X   | 5914980      | 6380227     | +7.8%
R9 5950X   | 4237838      | 4412241     | +4.1%
R9 5950X   | 4265287      | 4413757     | +3.4%   [*]
i5-3337U   | 1580565      | 1682406     | +6.4%
i5-10210U  | 3006074      | 3006857     | +0.0%
i5-10210U  | 3160245      | 3179945     | +0.6%   [*]
Xeon 6230R | 3196906      | 3197059     | +0.0%
Xeon 6230R | 3190392      | 3196153     | +0.01%  [*]

100 filters:
CPU        | before (pps) | after (pps) | diff
R9 7950X   | 373598       | 820396      | +119.59%
R9 5950X   | 313469       | 633303      | +102.03%
R9 5950X   | 313797       | 633150      | +101.77% [*]
i5-3337U   | 127454       | 211210      | +65.71%
i5-10210U  | 389259       | 381765      | -1.9%
i5-10210U  | 408812       | 412730      | +0.9%    [*]
Xeon 6230R | 415420       | 406612      | -2.1%
Xeon 6230R | 416705       | 405869      | -2.6%    [*]

[*] In these tests we ran pktgen with clone set to 1000.

On the 7950x system we also tested the impact of filters if iteration order
placement varied, first by compiling a kernel with the filter under test being
the first one in the static iteration and then repeating it with being last (of 15 classifiers existing today).
We saw a difference of +0.5-1% in pps between being the first in the iteration vs being the last.
Therefore we order the classifiers and actions according to relevance per our current thinking.

v5->v6:
- Address Eric Dumazet suggestions

v4->v5:
- Rebase

v3->v4:
- Address Eric Dumazet suggestions

v2->v3:
- Address suggestions by Jakub, Paolo and Eric
- Dropped RFC tag (I forgot to add it on v2)

v1->v2:
- Fix build errors found by the bots
- Address Kuniyuki Iwashima suggestions

====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Dec 9, 2022
2 parents 0bdff11 + 9f3101d commit b602d00
Show file tree
Hide file tree
Showing 38 changed files with 391 additions and 72 deletions.
10 changes: 5 additions & 5 deletions include/net/act_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
return hw_stats;
}

#ifdef CONFIG_NET_CLS_ACT

#define ACT_P_CREATED 1
#define ACT_P_DELETED 1

typedef void (*tc_action_priv_destructor)(void *priv);

struct tc_action_ops {
Expand Down Expand Up @@ -140,6 +135,11 @@ struct tc_action_ops {
struct netlink_ext_ack *extack);
};

#ifdef CONFIG_NET_CLS_ACT

#define ACT_P_CREATED 1
#define ACT_P_DELETED 1

struct tc_action_net {
struct tcf_idrinfo *idrinfo;
const struct tc_action_ops *ops;
Expand Down
251 changes: 251 additions & 0 deletions include/net/tc_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_TC_WRAPPER_H
#define __NET_TC_WRAPPER_H

#include <net/pkt_cls.h>

#if IS_ENABLED(CONFIG_RETPOLINE)

#include <linux/cpufeature.h>
#include <linux/static_key.h>
#include <linux/indirect_call_wrapper.h>

#define TC_INDIRECT_SCOPE

extern struct static_key_false tc_skip_wrapper;

/* TC Actions */
#ifdef CONFIG_NET_CLS_ACT

#define TC_INDIRECT_ACTION_DECLARE(fname) \
INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
const struct tc_action *a, \
struct tcf_result *res))

TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);

static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
if (static_branch_likely(&tc_skip_wrapper))
goto skip;

#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
if (a->ops->act == tcf_gact_act)
return tcf_gact_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED)
if (a->ops->act == tcf_mirred_act)
return tcf_mirred_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
if (a->ops->act == tcf_pedit_act)
return tcf_pedit_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT)
if (a->ops->act == tcf_skbedit_act)
return tcf_skbedit_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD)
if (a->ops->act == tcf_skbmod_act)
return tcf_skbmod_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_POLICE)
if (a->ops->act == tcf_police_act)
return tcf_police_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_BPF)
if (a->ops->act == tcf_bpf_act)
return tcf_bpf_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK)
if (a->ops->act == tcf_connmark_act)
return tcf_connmark_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_CSUM)
if (a->ops->act == tcf_csum_act)
return tcf_csum_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_CT)
if (a->ops->act == tcf_ct_act)
return tcf_ct_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO)
if (a->ops->act == tcf_ctinfo_act)
return tcf_ctinfo_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_GATE)
if (a->ops->act == tcf_gate_act)
return tcf_gate_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_MPLS)
if (a->ops->act == tcf_mpls_act)
return tcf_mpls_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_NAT)
if (a->ops->act == tcf_nat_act)
return tcf_nat_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY)
if (a->ops->act == tunnel_key_act)
return tunnel_key_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_VLAN)
if (a->ops->act == tcf_vlan_act)
return tcf_vlan_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_IFE)
if (a->ops->act == tcf_ife_act)
return tcf_ife_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
if (a->ops->act == tcf_ipt_act)
return tcf_ipt_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
if (a->ops->act == tcf_simp_act)
return tcf_simp_act(skb, a, res);
#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE)
if (a->ops->act == tcf_sample_act)
return tcf_sample_act(skb, a, res);
#endif

skip:
return a->ops->act(skb, a, res);
}

#endif /* CONFIG_NET_CLS_ACT */

/* TC Filters */
#ifdef CONFIG_NET_CLS

#define TC_INDIRECT_FILTER_DECLARE(fname) \
INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
const struct tcf_proto *tp, \
struct tcf_result *res))

TC_INDIRECT_FILTER_DECLARE(basic_classify);
TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify);
TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify);
TC_INDIRECT_FILTER_DECLARE(fl_classify);
TC_INDIRECT_FILTER_DECLARE(flow_classify);
TC_INDIRECT_FILTER_DECLARE(fw_classify);
TC_INDIRECT_FILTER_DECLARE(mall_classify);
TC_INDIRECT_FILTER_DECLARE(route4_classify);
TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
TC_INDIRECT_FILTER_DECLARE(u32_classify);

static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
if (static_branch_likely(&tc_skip_wrapper))
goto skip;

#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
if (tp->classify == cls_bpf_classify)
return cls_bpf_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_U32)
if (tp->classify == u32_classify)
return u32_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER)
if (tp->classify == fl_classify)
return fl_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_FW)
if (tp->classify == fw_classify)
return fw_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL)
if (tp->classify == mall_classify)
return mall_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_BASIC)
if (tp->classify == basic_classify)
return basic_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
if (tp->classify == cls_cgroup_classify)
return cls_cgroup_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_FLOW)
if (tp->classify == flow_classify)
return flow_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4)
if (tp->classify == route4_classify)
return route4_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
if (tp->classify == rsvp_classify)
return rsvp_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
if (tp->classify == rsvp6_classify)
return rsvp6_classify(skb, tp, res);
#endif
#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
if (tp->classify == tcindex_classify)
return tcindex_classify(skb, tp, res);
#endif

skip:
return tp->classify(skb, tp, res);
}

static inline void tc_wrapper_init(void)
{
#ifdef CONFIG_X86
if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
static_branch_enable(&tc_skip_wrapper);
#endif
}

#endif /* CONFIG_NET_CLS */

#else

#define TC_INDIRECT_SCOPE static

static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
return a->ops->act(skb, a, res);
}

static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
return tp->classify(skb, tp, res);
}

static inline void tc_wrapper_init(void)
{
}

#endif

#endif /* __NET_TC_WRAPPER_H */
3 changes: 2 additions & 1 deletion net/sched/act_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/flow_offload.h>
#include <net/tc_wrapper.h>

#ifdef CONFIG_INET
DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
Expand Down Expand Up @@ -1080,7 +1081,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,

repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
ret = tc_act(skb, a, res);
if (unlikely(ret == TC_ACT_REPEAT)) {
if (--repeat_ttl != 0)
goto repeat;
Expand Down
6 changes: 4 additions & 2 deletions net/sched/act_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
#include <net/tc_wrapper.h>

#define ACT_BPF_NAME_LEN 256

Expand All @@ -31,8 +32,9 @@ struct tcf_bpf_cfg {

static struct tc_action_ops act_bpf_ops;

static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
const struct tc_action *act,
struct tcf_result *res)
{
bool at_ingress = skb_at_tc_ingress(skb);
struct tcf_bpf *prog = to_bpf(act);
Expand Down
6 changes: 4 additions & 2 deletions net/sched/act_connmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
#include <net/tc_wrapper.h>

#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>

static struct tc_action_ops act_connmark_ops;

static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash;
struct nf_conntrack_tuple tuple;
Expand Down
6 changes: 4 additions & 2 deletions net/sched/act_csum.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_wrapper.h>

static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
Expand Down Expand Up @@ -563,8 +564,9 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
return 0;
}

static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
bool orig_vlan_tag_present = false;
Expand Down
5 changes: 3 additions & 2 deletions net/sched/act_ct.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <net/ipv6_frag.h>
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
#include <net/tc_wrapper.h>

#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
Expand Down Expand Up @@ -1038,8 +1039,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
#endif
}

static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
enum ip_conntrack_info ctinfo;
Expand Down
6 changes: 4 additions & 2 deletions net/sched/act_ctinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ctinfo.h>
#include <net/tc_act/tc_ctinfo.h>
#include <net/tc_wrapper.h>

#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
Expand Down Expand Up @@ -75,8 +76,9 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}

static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash = NULL;
struct tcf_ctinfo *ca = to_ctinfo(a);
Expand Down
Loading

0 comments on commit b602d00

Please sign in to comment.