Commit 47e958ea authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by David S. Miller

[NET]: Fix the race between sk_filter_(de|at)tach and sk_clone()

The proposed fix is to delay the reference counter decrement
until the quiescent state pass. This will give sk_clone() a
chance to get the reference on the cloned filter.

Regular sk_filter_uncharge can happen from the sk_free() only
and there's no need in delaying the put - the socket is dead
anyway and is to be release itself.
Signed-off-by: default avatarPavel Emelyanov <xemul@openvz.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d3904b73
...@@ -904,16 +904,6 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) ...@@ -904,16 +904,6 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
return err; return err;
} }
/**
* sk_filter_rcu_free: Free a socket filter
* @rcu: rcu_head that contains the sk_filter to free
*/
static inline void sk_filter_rcu_free(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
kfree(fp);
}
/** /**
* sk_filter_release: Release a socket filter * sk_filter_release: Release a socket filter
* @sk: socket * @sk: socket
...@@ -925,7 +915,7 @@ static inline void sk_filter_rcu_free(struct rcu_head *rcu) ...@@ -925,7 +915,7 @@ static inline void sk_filter_rcu_free(struct rcu_head *rcu)
static inline void sk_filter_release(struct sk_filter *fp) static inline void sk_filter_release(struct sk_filter *fp)
{ {
if (atomic_dec_and_test(&fp->refcnt)) if (atomic_dec_and_test(&fp->refcnt))
call_rcu_bh(&fp->rcu, sk_filter_rcu_free); kfree(fp);
} }
static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
......
...@@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen) ...@@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
} }
/**
* sk_filter_rcu_release: Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
static void sk_filter_rcu_release(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_filter_release(fp);
}
static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->sk_omem_alloc);
call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
}
/** /**
* sk_attach_filter - attach a socket filter * sk_attach_filter - attach a socket filter
* @fprog: the filter program * @fprog: the filter program
...@@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) ...@@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
rcu_assign_pointer(sk->sk_filter, fp); rcu_assign_pointer(sk->sk_filter, fp);
rcu_read_unlock_bh(); rcu_read_unlock_bh();
sk_filter_uncharge(sk, old_fp); sk_filter_delayed_uncharge(sk, old_fp);
return 0; return 0;
} }
...@@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk) ...@@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk)
filter = rcu_dereference(sk->sk_filter); filter = rcu_dereference(sk->sk_filter);
if (filter) { if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL); rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter); sk_filter_delayed_uncharge(sk, filter);
ret = 0; ret = 0;
} }
rcu_read_unlock_bh(); rcu_read_unlock_bh();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment