Commit cff82457 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by David S. Miller

net_sched: act_bpf: remove spinlock in fast path

Similar to act_gact/act_mirred, act_bpf can be lockless in packet processing
with extra care taken to free bpf programs after rcu grace period.
Replacement of existing act_bpf (very rare) is done with synchronize_rcu()
and final destruction is done from tc_action_ops->cleanup() callback that is
called from tcf_exts_destroy()->tcf_action_destroy()->__tcf_hash_release() when
bind and refcnt reach zero which is only possible when classifier is destroyed.
Previous two patches fixed the last two classifiers (tcindex and rsvp) to
call tcf_exts_destroy() from rcu callback.

Similar to gact/mirred there is a race between prog->filter and
prog->tcf_action. Meaning that the program being replaced may use
previous default action if it happened to return TC_ACT_UNSPEC.
act_mirred race betwen tcf_action and tcfm_dev is similar.
In all cases the race is harmless.
Long term we may want to improve the situation by replacing the whole
tc_action->priv as single pointer instead of updating inner fields one by one.
Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9e528d89
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
struct tcf_bpf { struct tcf_bpf {
struct tcf_common common; struct tcf_common common;
struct bpf_prog *filter; struct bpf_prog __rcu *filter;
union { union {
u32 bpf_fd; u32 bpf_fd;
u16 bpf_num_ops; u16 bpf_num_ops;
......
...@@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, ...@@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res) struct tcf_result *res)
{ {
struct tcf_bpf *prog = act->priv; struct tcf_bpf *prog = act->priv;
struct bpf_prog *filter;
int action, filter_res; int action, filter_res;
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb))) if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC; return TC_ACT_UNSPEC;
spin_lock(&prog->tcf_lock); tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
prog->tcf_tm.lastuse = jiffies;
bstats_update(&prog->tcf_bstats, skb);
/* Needed here for accessing maps. */
rcu_read_lock(); rcu_read_lock();
filter = rcu_dereference(prog->filter);
if (at_ingress) { if (at_ingress) {
__skb_push(skb, skb->mac_len); __skb_push(skb, skb->mac_len);
filter_res = BPF_PROG_RUN(prog->filter, skb); filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len); __skb_pull(skb, skb->mac_len);
} else { } else {
filter_res = BPF_PROG_RUN(prog->filter, skb); filter_res = BPF_PROG_RUN(filter, skb);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, ...@@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break; break;
case TC_ACT_SHOT: case TC_ACT_SHOT:
action = filter_res; action = filter_res;
prog->tcf_qstats.drops++; qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break; break;
case TC_ACT_UNSPEC: case TC_ACT_UNSPEC:
action = prog->tcf_action; action = prog->tcf_action;
...@@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, ...@@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break; break;
} }
spin_unlock(&prog->tcf_lock);
return action; return action;
} }
...@@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, ...@@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
struct tcf_bpf_cfg *cfg) struct tcf_bpf_cfg *cfg)
{ {
cfg->is_ebpf = tcf_bpf_is_ebpf(prog); cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
cfg->filter = prog->filter; /* updates to prog->filter are prevented, since it's called either
* with rtnl lock or during final cleanup in rcu callback
*/
cfg->filter = rcu_dereference_protected(prog->filter, 1);
cfg->bpf_ops = prog->bpf_ops; cfg->bpf_ops = prog->bpf_ops;
cfg->bpf_name = prog->bpf_name; cfg->bpf_name = prog->bpf_name;
...@@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ...@@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, act, bind)) { if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act, ret = tcf_hash_create(parm->index, est, act,
sizeof(*prog), bind, false); sizeof(*prog), bind, true);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -325,7 +326,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ...@@ -325,7 +326,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out; goto out;
prog = to_bpf(act); prog = to_bpf(act);
spin_lock_bh(&prog->tcf_lock); ASSERT_RTNL();
if (res != ACT_P_CREATED) if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old); tcf_bpf_prog_fill_cfg(prog, &old);
...@@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ...@@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd; prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action; prog->tcf_action = parm->action;
prog->filter = cfg.filter; rcu_assign_pointer(prog->filter, cfg.filter);
spin_unlock_bh(&prog->tcf_lock);
if (res == ACT_P_CREATED) if (res == ACT_P_CREATED) {
tcf_hash_insert(act); tcf_hash_insert(act);
else } else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
tcf_bpf_cfg_cleanup(&old); tcf_bpf_cfg_cleanup(&old);
}
return res; return res;
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment