Commit e9ab2559 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-sched-transition-actions-to-pcpu-stats-and-rcu'

Pedro Tammela says:

====================
net/sched: transition actions to pcpu stats and rcu

Following the work done for act_pedit[0], transition the remaining tc
actions to percpu stats and rcu, whenever possible.
Percpu stats make updating the action stats very cheap, while combining
it with rcu action parameters makes it possible to get rid of the per
action lock in the datapath.

For act_connmark and act_nat we run the following tests:
- tc filter add dev ens2f0 ingress matchall action connmark
- tc filter add dev ens2f0 ingress matchall action nat ingress any 10.10.10.10

Our setup consists of a 26 cores Intel CPU and a 25G NIC.
We use TRex to shoot 10mpps TCP packets and take perf measurements.
Both actions improved performance as expected since the datapath lock disappeared.

For act_pedit we move the drop counter to percpu, when available.
For act_gate we move the counters to percpu, when available.

[0] https://lore.kernel.org/all/20230131145149.3776656-1-pctammela@mojatatu.com/
====================

Link: https://lore.kernel.org/r/20230214211534.735718-1-pctammela@mojatatu.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 10d13421 2d2e75d2
......@@ -4,10 +4,15 @@
#include <net/act_api.h>
struct tcf_connmark_info {
struct tc_action common;
struct tcf_connmark_parms {
struct net *net;
u16 zone;
struct rcu_head rcu;
};
struct tcf_connmark_info {
struct tc_action common;
struct tcf_connmark_parms __rcu *parms;
};
#define to_connmark(a) ((struct tcf_connmark_info *)a)
......
......@@ -5,13 +5,17 @@
#include <linux/types.h>
#include <net/act_api.h>
struct tcf_nat {
struct tc_action common;
struct tcf_nat_parms {
__be32 old_addr;
__be32 new_addr;
__be32 mask;
u32 flags;
struct rcu_head rcu;
};
struct tcf_nat {
struct tc_action common;
struct tcf_nat_parms __rcu *parms;
};
#define to_tcf_nat(a) ((struct tcf_nat *)a)
......
......@@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = to_connmark(a);
struct tcf_connmark_parms *parms;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
spin_lock(&ca->tcf_lock);
tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
tcf_action_update_bstats(&ca->common, skb);
parms = rcu_dereference_bh(ca->parms);
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
......@@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
c = nf_ct_get(skb, &ctinfo);
if (c) {
skb->mark = READ_ONCE(c->mark);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
goto out;
goto count;
}
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
proto, ca->net, &tuple))
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net,
&tuple))
goto out;
zone.id = ca->zone;
zone.id = parms->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
thash = nf_conntrack_find_get(parms->net, &zone, &tuple);
if (!thash)
goto out;
c = nf_ct_tuplehash_to_ctrack(thash);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
count:
/* using overlimits stats to count how many packets marked */
tcf_action_inc_overlimit_qstats(&ca->common);
out:
spin_unlock(&ca->tcf_lock);
return ca->tcf_action;
return READ_ONCE(ca->tcf_action);
}
static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
......@@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
struct tcf_connmark_parms *nparms, *oparms;
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
......@@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_CONNMARK_PARMS])
return -EINVAL;
nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
if (!nparms)
return -ENOMEM;
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
index = parm->index;
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, a,
&act_connmark_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_connmark_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
err = ret;
goto out_free;
}
ci = to_connmark(*a);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
extack);
if (err < 0)
goto release_idr;
tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->net = net;
ci->zone = parm->zone;
nparms->net = net;
nparms->zone = parm->zone;
ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
if (bind)
return 0;
if (bind) {
err = 0;
goto out_free;
}
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
err = -EEXIST;
goto release_idr;
}
nparms->net = rtnl_dereference(ci->parms)->net;
nparms->zone = parm->zone;
ret = 0;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
extack);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
/* replacing action and zone */
spin_lock_bh(&ci->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->zone = parm->zone;
oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
spin_unlock_bh(&ci->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
ret = 0;
}
if (oparms)
kfree_rcu(oparms, rcu);
return ret;
release_idr:
tcf_idr_release(*a, bind);
out_free:
kfree(nparms);
return err;
}
......@@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
struct tcf_connmark_parms *parms;
struct tcf_t t;
spin_lock_bh(&ci->tcf_lock);
parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));
opt.action = ci->tcf_action;
opt.zone = ci->zone;
opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
......@@ -201,6 +219,16 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
return -1;
}
static void tcf_connmark_cleanup(struct tc_action *a)
{
struct tcf_connmark_info *ci = to_connmark(a);
struct tcf_connmark_parms *parms;
parms = rcu_dereference_protected(ci->parms, 1);
if (parms)
kfree_rcu(parms, rcu);
}
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
......@@ -208,6 +236,7 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
.cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
......
......@@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
spin_lock(&gact->tcf_lock);
int action = READ_ONCE(gact->tcf_action);
tcf_lastuse_update(&gact->tcf_tm);
bstats_update(&gact->tcf_bstats, skb);
tcf_action_update_bstats(&gact->common, skb);
spin_lock(&gact->tcf_lock);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
return gact->tcf_action;
return action;
}
if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) {
spin_unlock(&gact->tcf_lock);
goto drop;
}
if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
gact->tcf_qstats.overlimits++;
goto drop;
spin_unlock(&gact->tcf_lock);
goto overlimit;
}
}
spin_unlock(&gact->tcf_lock);
return gact->tcf_action;
drop:
gact->tcf_qstats.drops++;
spin_unlock(&gact->tcf_lock);
return action;
overlimit:
tcf_action_inc_overlimit_qstats(&gact->common);
drop:
tcf_action_inc_drop_qstats(&gact->common);
return TC_ACT_SHOT;
}
......@@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return 0;
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
&act_gate_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_gate_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
......
......@@ -38,6 +38,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_nat_parms *nparm, *oparm;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
......@@ -59,8 +60,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
&act_nat_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops,
bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
......@@ -79,19 +80,31 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
nparm = kzalloc(sizeof(*nparm), GFP_KERNEL);
if (!nparm) {
err = -ENOMEM;
goto release_idr;
}
nparm->old_addr = parm->old_addr;
nparm->new_addr = parm->new_addr;
nparm->mask = parm->mask;
nparm->flags = parm->flags;
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
p->old_addr = parm->old_addr;
p->new_addr = parm->new_addr;
p->mask = parm->mask;
p->flags = parm->flags;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (oparm)
kfree_rcu(oparm, rcu);
return ret;
release_idr:
tcf_idr_release(*a, bind);
......@@ -103,6 +116,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
struct tcf_nat_parms *parms;
struct iphdr *iph;
__be32 old_addr;
__be32 new_addr;
......@@ -113,18 +127,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
int ihl;
int noff;
spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
old_addr = p->old_addr;
new_addr = p->new_addr;
mask = p->mask;
egress = p->flags & TCA_NAT_FLAG_EGRESS;
action = p->tcf_action;
tcf_action_update_bstats(&p->common, skb);
bstats_update(&p->tcf_bstats, skb);
action = READ_ONCE(p->tcf_action);
spin_unlock(&p->tcf_lock);
parms = rcu_dereference_bh(p->parms);
old_addr = parms->old_addr;
new_addr = parms->new_addr;
mask = parms->mask;
egress = parms->flags & TCA_NAT_FLAG_EGRESS;
if (unlikely(action == TC_ACT_SHOT))
goto drop;
......@@ -248,9 +260,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
return action;
drop:
spin_lock(&p->tcf_lock);
p->tcf_qstats.drops++;
spin_unlock(&p->tcf_lock);
tcf_action_inc_drop_qstats(&p->common);
return TC_ACT_SHOT;
}
......@@ -264,15 +274,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
struct tcf_nat_parms *parms;
struct tcf_t t;
spin_lock_bh(&p->tcf_lock);
opt.old_addr = p->old_addr;
opt.new_addr = p->new_addr;
opt.mask = p->mask;
opt.flags = p->flags;
opt.action = p->tcf_action;
parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
opt.old_addr = parms->old_addr;
opt.new_addr = parms->new_addr;
opt.mask = parms->mask;
opt.flags = parms->flags;
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
......@@ -289,6 +304,16 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
return -1;
}
static void tcf_nat_cleanup(struct tc_action *a)
{
struct tcf_nat *p = to_tcf_nat(a);
struct tcf_nat_parms *parms;
parms = rcu_dereference_protected(p->parms, 1);
if (parms)
kfree_rcu(parms, rcu);
}
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.id = TCA_ID_NAT,
......@@ -296,6 +321,7 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat_act,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
.cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
......
......@@ -443,9 +443,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
goto done;
bad:
spin_lock(&p->tcf_lock);
p->tcf_qstats.overlimits++;
spin_unlock(&p->tcf_lock);
tcf_action_inc_overlimit_qstats(&p->common);
done:
return p->tcf_action;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment