Commit 8c5bbe77 authored by David S. Miller's avatar David S. Miller

Merge branch 'act_bpf_lockless'

Alexei Starovoitov says:

====================
act_bpf: remove spinlock in fast path

v1 version had a race condition in cleanup path of bpf_prog.
I tried to fix it by adding new callback 'cleanup_rcu' to 'struct tcf_common'
and call it out of act_api cleanup path, but Daniel noticed
(thanks for the idea!) that most of the classifiers already do action cleanup
out of rcu callback.
So instead this set of patches converts tcindex and rsvp classifiers to call
tcf_exts_destroy() after rcu grace period and since action cleanup logic
in __tcf_hash_release() is only called when bind and refcnt goes to zero,
it's guaranteed that cleanup() callback is called from rcu callback.
More specifically:
patches 1 and 2 - simple fixes
patches 2 and 3 - convert tcf_exts_destroy in tcindex and rsvp to call_rcu
patch 5 - removes spin_lock from act_bpf

The cleanup of actions is now universally done after rcu grace period
and in the future we can drop (now unnecessary) call_rcu from tcf_hash_destroy()
patch 5 is using synchronize_rcu() in act_bpf replacement path, since it's
very rare and alternative of dynamically allocating 'struct tcf_bpf_cfg' just
to pass it to call_rcu looks even less appealing.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dc8242f7 cff82457
......@@ -111,7 +111,6 @@ struct tc_action_ops {
};
int tcf_hash_search(struct tc_action *a, u32 index);
void tcf_hash_destroy(struct tc_action *a);
u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
int tcf_hash_check(u32 index, struct tc_action *a, int bind);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
......
......@@ -15,7 +15,7 @@
struct tcf_bpf {
struct tcf_common common;
struct bpf_prog *filter;
struct bpf_prog __rcu *filter;
union {
u32 bpf_fd;
u16 bpf_num_ops;
......
......@@ -36,7 +36,7 @@ static void free_tcf(struct rcu_head *head)
kfree(p);
}
void tcf_hash_destroy(struct tc_action *a)
static void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
struct tcf_hashinfo *hinfo = a->ops->hinfo;
......@@ -52,7 +52,6 @@ void tcf_hash_destroy(struct tc_action *a)
*/
call_rcu(&p->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_destroy);
int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
......
......@@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
struct tcf_bpf *prog = act->priv;
struct bpf_prog *filter;
int action, filter_res;
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
spin_lock(&prog->tcf_lock);
prog->tcf_tm.lastuse = jiffies;
bstats_update(&prog->tcf_bstats, skb);
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
/* Needed here for accessing maps. */
rcu_read_lock();
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
filter_res = BPF_PROG_RUN(prog->filter, skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
filter_res = BPF_PROG_RUN(prog->filter, skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
......@@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
case TC_ACT_SHOT:
action = filter_res;
prog->tcf_qstats.drops++;
qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
......@@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
}
spin_unlock(&prog->tcf_lock);
return action;
}
......@@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
struct tcf_bpf_cfg *cfg)
{
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
cfg->filter = prog->filter;
/* updates to prog->filter are prevented, since it's called either
* with rtnl lock or during final cleanup in rcu callback
*/
cfg->filter = rcu_dereference_protected(prog->filter, 1);
cfg->bpf_ops = prog->bpf_ops;
cfg->bpf_name = prog->bpf_name;
......@@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
sizeof(*prog), bind, false);
sizeof(*prog), bind, true);
if (ret < 0)
return ret;
......@@ -325,9 +326,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out;
prog = to_bpf(act);
spin_lock_bh(&prog->tcf_lock);
ASSERT_RTNL();
if (ret != ACT_P_CREATED)
if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);
prog->bpf_ops = cfg.bpf_ops;
......@@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
prog->filter = cfg.filter;
spin_unlock_bh(&prog->tcf_lock);
rcu_assign_pointer(prog->filter, cfg.filter);
if (res == ACT_P_CREATED)
if (res == ACT_P_CREATED) {
tcf_hash_insert(act);
else
} else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
tcf_bpf_cfg_cleanup(&old);
}
return res;
out:
......
......@@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
static void
rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
tcf_unbind_filter(tp, &f->res);
struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
tcf_exts_destroy(&f->exts);
kfree_rcu(f, rcu);
kfree(f);
}
static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
static bool rsvp_destroy(struct tcf_proto *tp, bool force)
......
......@@ -27,6 +27,7 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
struct rcu_head rcu;
};
struct tcindex_filter {
......@@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
static int
tcindex_delete(struct tcf_proto *tp, unsigned long arg)
static void tcindex_destroy_rexts(struct rcu_head *head)
{
struct tcindex_filter_result *r;
r = container_of(head, struct tcindex_filter_result, rcu);
tcf_exts_destroy(&r->exts);
}
static void tcindex_destroy_fexts(struct rcu_head *head)
{
struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
tcf_exts_destroy(&f->result.exts);
kfree(f);
}
static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
......@@ -162,9 +178,14 @@ tcindex_delete(struct tcf_proto *tp, unsigned long arg)
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
tcf_exts_destroy(&r->exts);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
if (f)
kfree_rcu(f, rcu);
call_rcu(&f->rcu, tcindex_destroy_fexts);
else
call_rcu(&r->rcu, tcindex_destroy_rexts);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment