Commit a4298e45 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: add SOCK_RCU_FREE socket flag

We want a generic way to insert an RCU grace period before socket
freeing for cases where RCU_SLAB_DESTROY_BY_RCU is adding too
much overhead.

SLAB_DESTROY_BY_RCU strict rules force us to take a reference
on the socket sk_refcnt, and it is a performance problem for UDP
encapsulation, or TCP synflood behavior, as many CPUs might
attempt the atomic operations on a shared sk_refcnt

UDP sockets and TCP listeners can set SOCK_RCU_FREE so that their
lookup can use traditional RCU rules, without refcount changes.
They can set the flag only once hashed and visible by other cpus.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Tested-by: default avatarTom Herbert <tom@herbertland.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 43e2dfb2
...@@ -438,6 +438,7 @@ struct sock { ...@@ -438,6 +438,7 @@ struct sock {
struct sk_buff *skb); struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk); void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb; struct sock_reuseport __rcu *sk_reuseport_cb;
struct rcu_head sk_rcu;
}; };
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
...@@ -720,6 +721,7 @@ enum sock_flags { ...@@ -720,6 +721,7 @@ enum sock_flags {
*/ */
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
}; };
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
......
...@@ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, ...@@ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
} }
EXPORT_SYMBOL(sk_alloc); EXPORT_SYMBOL(sk_alloc);
void sk_destruct(struct sock *sk) /* Sockets having SOCK_RCU_FREE will call this function after one RCU
* grace period. This is the case for UDP sockets and TCP listeners.
*/
static void __sk_destruct(struct rcu_head *head)
{ {
struct sock *sk = container_of(head, struct sock, sk_rcu);
struct sk_filter *filter; struct sk_filter *filter;
if (sk->sk_destruct) if (sk->sk_destruct)
...@@ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk) ...@@ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk); sk_prot_free(sk->sk_prot_creator, sk);
} }
void sk_destruct(struct sock *sk)
{
if (sock_flag(sk, SOCK_RCU_FREE))
call_rcu(&sk->sk_rcu, __sk_destruct);
else
__sk_destruct(&sk->sk_rcu);
}
static void __sk_free(struct sock *sk) static void __sk_free(struct sock *sk)
{ {
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment