Commit fc01538f authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

inet: simplify timewait refcounting

timewait sockets have a complex refcounting logic.
Once we realize it should be similar to established and
syn_recv sockets, we can use sk_nulls_del_node_init_rcu()
and remove inet_twsk_unhash()

In particular, deferred inet_twsk_put() added in commit
13475a30 ("tcp: connect() race with timewait reuse")
looks unecessary : When removing a timewait socket from
ehash or bhash, caller must own a reference on the socket
anyway.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3fd2f1b9
...@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk); ...@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo_init(struct inet_hashinfo *h);
int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw); void __inet_hash(struct sock *sk, struct sock *osk);
void inet_hash(struct sock *sk); void inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk); void inet_unhash(struct sock *sk);
......
...@@ -100,9 +100,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) ...@@ -100,9 +100,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
void inet_twsk_free(struct inet_timewait_sock *tw); void inet_twsk_free(struct inet_timewait_sock *tw);
void inet_twsk_put(struct inet_timewait_sock *tw); void inet_twsk_put(struct inet_timewait_sock *tw);
int inet_twsk_unhash(struct inet_timewait_sock *tw); void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo); struct inet_hashinfo *hashinfo);
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
......
...@@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2; struct sock *sk2;
const struct hlist_nulls_node *node; const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL; struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock); spin_lock(lock);
...@@ -371,12 +370,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -371,12 +370,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain); __sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) { if (tw) {
twrefcnt = inet_twsk_unhash(tw); sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} }
spin_unlock(lock); spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) { if (twp) {
...@@ -384,7 +381,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -384,7 +381,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw); inet_twsk_deschedule(tw);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
return 0; return 0;
...@@ -403,13 +399,12 @@ static u32 inet_sk_port_offset(const struct sock *sk) ...@@ -403,13 +399,12 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport); inet->inet_dport);
} }
int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
{ {
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list; struct hlist_nulls_head *list;
struct inet_ehash_bucket *head; struct inet_ehash_bucket *head;
spinlock_t *lock; spinlock_t *lock;
int twrefcnt = 0;
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
...@@ -420,23 +415,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) ...@@ -420,23 +415,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
spin_lock(lock); spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list); __sk_nulls_add_node_rcu(sk, list);
if (tw) { if (osk) {
WARN_ON(sk->sk_hash != tw->tw_hash); WARN_ON(sk->sk_hash != osk->sk_hash);
twrefcnt = inet_twsk_unhash(tw); sk_nulls_del_node_init_rcu(osk);
} }
spin_unlock(lock); spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return twrefcnt;
} }
EXPORT_SYMBOL_GPL(__inet_hash_nolisten); EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) void __inet_hash(struct sock *sk, struct sock *osk)
{ {
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb; struct inet_listen_hashbucket *ilb;
if (sk->sk_state != TCP_LISTEN) if (sk->sk_state != TCP_LISTEN)
return __inet_hash_nolisten(sk, tw); return __inet_hash_nolisten(sk, osk);
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
...@@ -445,7 +439,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) ...@@ -445,7 +439,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
__sk_nulls_add_node_rcu(sk, &ilb->head); __sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock); spin_unlock(&ilb->lock);
return 0;
} }
EXPORT_SYMBOL(__inet_hash); EXPORT_SYMBOL(__inet_hash);
...@@ -492,7 +485,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, ...@@ -492,7 +485,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
int ret; int ret;
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
int twrefcnt = 1;
if (!snum) { if (!snum) {
int i, remaining, low, high, port; int i, remaining, low, high, port;
...@@ -560,19 +552,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, ...@@ -560,19 +552,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
inet_bind_hash(sk, tb, port); inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) { if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port); inet_sk(sk)->inet_sport = htons(port);
twrefcnt += __inet_hash_nolisten(sk, tw); __inet_hash_nolisten(sk, (struct sock *)tw);
} }
if (tw) if (tw)
twrefcnt += inet_twsk_bind_unhash(tw, hinfo); inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock); spin_unlock(&head->lock);
if (tw) { if (tw) {
inet_twsk_deschedule(tw); inet_twsk_deschedule(tw);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw); inet_twsk_put(tw);
} }
}
ret = 0; ret = 0;
goto out; goto out;
......
...@@ -17,28 +17,6 @@ ...@@ -17,28 +17,6 @@
#include <net/ip.h> #include <net/ip.h>
/**
* inet_twsk_unhash - unhash a timewait socket from established hash
* @tw: timewait socket
*
* unhash a timewait socket from established hash, if hashed.
* ehash lock must be held by caller.
* Returns 1 if caller should call inet_twsk_put() after lock release.
*/
int inet_twsk_unhash(struct inet_timewait_sock *tw)
{
if (hlist_nulls_unhashed(&tw->tw_node))
return 0;
hlist_nulls_del_rcu(&tw->tw_node);
sk_nulls_node_init(&tw->tw_node);
/*
* We cannot call inet_twsk_put() ourself under lock,
* caller must call it for us.
*/
return 1;
}
/** /**
* inet_twsk_bind_unhash - unhash a timewait socket from bind hash * inet_twsk_bind_unhash - unhash a timewait socket from bind hash
* @tw: timewait socket * @tw: timewait socket
...@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw) ...@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw)
* bind hash lock must be held by caller. * bind hash lock must be held by caller.
* Returns 1 if caller should call inet_twsk_put() after lock release. * Returns 1 if caller should call inet_twsk_put() after lock release.
*/ */
int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo) struct inet_hashinfo *hashinfo)
{ {
struct inet_bind_bucket *tb = tw->tw_tb; struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb) if (!tb)
return 0; return;
__hlist_del(&tw->tw_bind_node); __hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL; tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
/* __sock_put((struct sock *)tw);
* We cannot call inet_twsk_put() ourself under lock,
* caller must call it for us.
*/
return 1;
} }
/* Must be called with locally disabled BHs. */ /* Must be called with locally disabled BHs. */
static void inet_twsk_kill(struct inet_timewait_sock *tw) static void inet_twsk_kill(struct inet_timewait_sock *tw)
{ {
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
struct inet_bind_hashbucket *bhead;
int refcnt;
/* Unlink from established hashes. */
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
struct inet_bind_hashbucket *bhead;
spin_lock(lock); spin_lock(lock);
refcnt = inet_twsk_unhash(tw); sk_nulls_del_node_init_rcu((struct sock *)tw);
spin_unlock(lock); spin_unlock(lock);
/* Disassociate with bind bucket. */ /* Disassociate with bind bucket. */
...@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) ...@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
hashinfo->bhash_size)]; hashinfo->bhash_size)];
spin_lock(&bhead->lock); spin_lock(&bhead->lock);
refcnt += inet_twsk_bind_unhash(tw, hashinfo); inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock); spin_unlock(&bhead->lock);
BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
atomic_sub(refcnt, &tw->tw_refcnt);
atomic_dec(&tw->tw_dr->tw_count); atomic_dec(&tw->tw_dr->tw_count);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
......
...@@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2; struct sock *sk2;
const struct hlist_nulls_node *node; const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL; struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock); spin_lock(lock);
...@@ -234,12 +233,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -234,12 +233,10 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain); __sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) { if (tw) {
twrefcnt = inet_twsk_unhash(tw); sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} }
spin_unlock(lock); spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) { if (twp) {
...@@ -247,7 +244,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -247,7 +244,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
} else if (tw) { } else if (tw) {
/* Silly. Should hash-dance instead... */ /* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw); inet_twsk_deschedule(tw);
inet_twsk_put(tw); inet_twsk_put(tw);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment