Commit 230140cf authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

[INET]: Remove per bucket rwlock in tcp/dccp ehash table.

As done two years ago on IP route cache table (commit
22c047cc) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.

On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.

Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.

This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.
Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Acked-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent efac5276
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
* I'll experiment with dynamic table growth later. * I'll experiment with dynamic table growth later.
*/ */
struct inet_ehash_bucket { struct inet_ehash_bucket {
rwlock_t lock;
struct hlist_head chain; struct hlist_head chain;
struct hlist_head twchain; struct hlist_head twchain;
}; };
...@@ -100,6 +99,9 @@ struct inet_hashinfo { ...@@ -100,6 +99,9 @@ struct inet_hashinfo {
* TIME_WAIT sockets use a separate chain (twchain). * TIME_WAIT sockets use a separate chain (twchain).
*/ */
struct inet_ehash_bucket *ehash; struct inet_ehash_bucket *ehash;
rwlock_t *ehash_locks;
unsigned int ehash_size;
unsigned int ehash_locks_mask;
/* Ok, let's try this, I give up, we do need a local binding /* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect. * TCP hash as well as the others for fast bind/connect.
...@@ -107,7 +109,7 @@ struct inet_hashinfo { ...@@ -107,7 +109,7 @@ struct inet_hashinfo {
struct inet_bind_hashbucket *bhash; struct inet_bind_hashbucket *bhash;
unsigned int bhash_size; unsigned int bhash_size;
unsigned int ehash_size; /* Note : 4 bytes padding on 64 bit arches */
/* All sockets in TCP_LISTEN state will be in here. This is the only /* All sockets in TCP_LISTEN state will be in here. This is the only
* table where wildcard'd TCP sockets can exist. Hash function here * table where wildcard'd TCP sockets can exist. Hash function here
...@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( ...@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
} }
static inline rwlock_t *inet_ehash_lockp(
struct inet_hashinfo *hashinfo,
unsigned int hash)
{
return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
}
static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int i, size = 256;
#if defined(CONFIG_PROVE_LOCKING)
unsigned int nr_pcpus = 2;
#else
unsigned int nr_pcpus = num_possible_cpus();
#endif
if (nr_pcpus >= 4)
size = 512;
if (nr_pcpus >= 8)
size = 1024;
if (nr_pcpus >= 16)
size = 2048;
if (nr_pcpus >= 32)
size = 4096;
if (sizeof(rwlock_t) != 0) {
#ifdef CONFIG_NUMA
if (size * sizeof(rwlock_t) > PAGE_SIZE)
hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
else
#endif
hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
GFP_KERNEL);
if (!hashinfo->ehash_locks)
return ENOMEM;
for (i = 0; i < size; i++)
rwlock_init(&hashinfo->ehash_locks[i]);
}
hashinfo->ehash_locks_mask = size - 1;
return 0;
}
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
if (hashinfo->ehash_locks) {
#ifdef CONFIG_NUMA
unsigned int size = (hashinfo->ehash_locks_mask + 1) *
sizeof(rwlock_t);
if (size > PAGE_SIZE)
vfree(hashinfo->ehash_locks);
else
#else
kfree(hashinfo->ehash_locks);
#endif
hashinfo->ehash_locks = NULL;
}
}
extern struct inet_bind_bucket * extern struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, inet_bind_bucket_create(struct kmem_cache *cachep,
struct inet_bind_hashbucket *head, struct inet_bind_hashbucket *head,
...@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, ...@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo,
sk->sk_hash = inet_sk_ehashfn(sk); sk->sk_hash = inet_sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash); head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain; list = &head->chain;
lock = &head->lock; lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock(lock); write_lock(lock);
} }
__sk_add_node(sk, list); __sk_add_node(sk, list);
...@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) ...@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
inet_listen_wlock(hashinfo); inet_listen_wlock(hashinfo);
lock = &hashinfo->lhash_lock; lock = &hashinfo->lhash_lock;
} else { } else {
lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock_bh(lock); write_lock_bh(lock);
} }
...@@ -354,9 +412,10 @@ static inline struct sock * ...@@ -354,9 +412,10 @@ static inline struct sock *
*/ */
unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
prefetch(head->chain.first); prefetch(head->chain.first);
read_lock(&head->lock); read_lock(lock);
sk_for_each(sk, node, &head->chain) { sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */ goto hit; /* You sunk my battleship! */
...@@ -369,7 +428,7 @@ static inline struct sock * ...@@ -369,7 +428,7 @@ static inline struct sock *
} }
sk = NULL; sk = NULL;
out: out:
read_unlock(&head->lock); read_unlock(lock);
return sk; return sk;
hit: hit:
sock_hold(sk); sock_hold(sk);
......
...@@ -1072,11 +1072,13 @@ static int __init dccp_init(void) ...@@ -1072,11 +1072,13 @@ static int __init dccp_init(void)
} }
for (i = 0; i < dccp_hashinfo.ehash_size; i++) { for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
rwlock_init(&dccp_hashinfo.ehash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
} }
if (inet_ehash_locks_alloc(&dccp_hashinfo))
goto out_free_dccp_ehash;
bhash_order = ehash_order; bhash_order = ehash_order;
do { do {
...@@ -1091,7 +1093,7 @@ static int __init dccp_init(void) ...@@ -1091,7 +1093,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.bhash) { if (!dccp_hashinfo.bhash) {
DCCP_CRIT("Failed to allocate DCCP bind hash table"); DCCP_CRIT("Failed to allocate DCCP bind hash table");
goto out_free_dccp_ehash; goto out_free_dccp_locks;
} }
for (i = 0; i < dccp_hashinfo.bhash_size; i++) { for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
...@@ -1121,6 +1123,8 @@ static int __init dccp_init(void) ...@@ -1121,6 +1123,8 @@ static int __init dccp_init(void)
out_free_dccp_bhash: out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
dccp_hashinfo.bhash = NULL; dccp_hashinfo.bhash = NULL;
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash: out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
dccp_hashinfo.ehash = NULL; dccp_hashinfo.ehash = NULL;
...@@ -1139,6 +1143,7 @@ static void __exit dccp_fini(void) ...@@ -1139,6 +1143,7 @@ static void __exit dccp_fini(void)
free_pages((unsigned long)dccp_hashinfo.ehash, free_pages((unsigned long)dccp_hashinfo.ehash,
get_order(dccp_hashinfo.ehash_size * get_order(dccp_hashinfo.ehash_size *
sizeof(struct inet_ehash_bucket))); sizeof(struct inet_ehash_bucket)));
inet_ehash_locks_free(&dccp_hashinfo);
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_ackvec_exit(); dccp_ackvec_exit();
dccp_sysctl_exit(); dccp_sysctl_exit();
......
...@@ -747,13 +747,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -747,13 +747,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
for (i = s_i; i < hashinfo->ehash_size; i++) { for (i = s_i; i < hashinfo->ehash_size; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i]; struct inet_ehash_bucket *head = &hashinfo->ehash[i];
rwlock_t *lock = inet_ehash_lockp(hashinfo, i);
struct sock *sk; struct sock *sk;
struct hlist_node *node; struct hlist_node *node;
if (i > s_i) if (i > s_i)
s_num = 0; s_num = 0;
read_lock_bh(&head->lock); read_lock_bh(lock);
num = 0; num = 0;
sk_for_each(sk, node, &head->chain) { sk_for_each(sk, node, &head->chain) {
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
...@@ -769,7 +770,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -769,7 +770,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
r->id.idiag_dport) r->id.idiag_dport)
goto next_normal; goto next_normal;
if (inet_csk_diag_dump(sk, skb, cb) < 0) { if (inet_csk_diag_dump(sk, skb, cb) < 0) {
read_unlock_bh(&head->lock); read_unlock_bh(lock);
goto done; goto done;
} }
next_normal: next_normal:
...@@ -791,14 +792,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -791,14 +792,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
r->id.idiag_dport) r->id.idiag_dport)
goto next_dying; goto next_dying;
if (inet_twsk_diag_dump(tw, skb, cb) < 0) { if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
read_unlock_bh(&head->lock); read_unlock_bh(lock);
goto done; goto done;
} }
next_dying: next_dying:
++num; ++num;
} }
} }
read_unlock_bh(&head->lock); read_unlock_bh(lock);
} }
done: done:
......
...@@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2; struct sock *sk2;
const struct hlist_node *node; const struct hlist_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
prefetch(head->chain.first); prefetch(head->chain.first);
write_lock(&head->lock); write_lock(lock);
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &head->twchain) { sk_for_each(sk2, node, &head->twchain) {
...@@ -239,7 +240,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -239,7 +240,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
BUG_TRAP(sk_unhashed(sk)); BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain); __sk_add_node(sk, &head->chain);
sock_prot_inc_use(sk->sk_prot); sock_prot_inc_use(sk->sk_prot);
write_unlock(&head->lock); write_unlock(lock);
if (twp) { if (twp) {
*twp = tw; *twp = tw;
...@@ -255,7 +256,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -255,7 +256,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
return 0; return 0;
not_unique: not_unique:
write_unlock(&head->lock); write_unlock(lock);
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
......
...@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, ...@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_bind_hashbucket *bhead; struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
/* Unlink from established hashes. */ /* Unlink from established hashes. */
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
write_lock(&ehead->lock); write_lock(lock);
if (hlist_unhashed(&tw->tw_node)) { if (hlist_unhashed(&tw->tw_node)) {
write_unlock(&ehead->lock); write_unlock(lock);
return; return;
} }
__hlist_del(&tw->tw_node); __hlist_del(&tw->tw_node);
sk_node_init(&tw->tw_node); sk_node_init(&tw->tw_node);
write_unlock(&ehead->lock); write_unlock(lock);
/* Disassociate with bind bucket. */ /* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
...@@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_sock *inet = inet_sk(sk); const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead; struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too. /* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in Note, that any socket with inet->num != 0 MUST be bound in
...@@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
spin_unlock(&bhead->lock); spin_unlock(&bhead->lock);
write_lock(&ehead->lock); write_lock(lock);
/* Step 2: Remove SK from established hash. */ /* Step 2: Remove SK from established hash. */
if (__sk_del_node_init(sk)) if (__sk_del_node_init(sk))
...@@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
inet_twsk_add_node(tw, &ehead->twchain); inet_twsk_add_node(tw, &ehead->twchain);
atomic_inc(&tw->tw_refcnt); atomic_inc(&tw->tw_refcnt);
write_unlock(&ehead->lock); write_unlock(lock);
} }
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
......
...@@ -2456,11 +2456,11 @@ void __init tcp_init(void) ...@@ -2456,11 +2456,11 @@ void __init tcp_init(void)
thash_entries ? 0 : 512 * 1024); thash_entries ? 0 : 512 * 1024);
tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
for (i = 0; i < tcp_hashinfo.ehash_size; i++) { for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
rwlock_init(&tcp_hashinfo.ehash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
} }
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash = tcp_hashinfo.bhash =
alloc_large_system_hash("TCP bind", alloc_large_system_hash("TCP bind",
sizeof(struct inet_bind_hashbucket), sizeof(struct inet_bind_hashbucket),
......
...@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
struct sock *sk; struct sock *sk;
struct hlist_node *node; struct hlist_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); read_lock_bh(lock);
sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != st->family) { if (sk->sk_family != st->family) {
continue; continue;
...@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
rc = tw; rc = tw;
goto out; goto out;
} }
read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); read_unlock_bh(lock);
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
} }
out: out:
...@@ -2094,11 +2095,11 @@ static void *established_get_next(struct seq_file *seq, void *cur) ...@@ -2094,11 +2095,11 @@ static void *established_get_next(struct seq_file *seq, void *cur)
cur = tw; cur = tw;
goto out; goto out;
} }
read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
if (++st->bucket < tcp_hashinfo.ehash_size) { if (++st->bucket < tcp_hashinfo.ehash_size) {
read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
} else { } else {
cur = NULL; cur = NULL;
...@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) ...@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED: case TCP_SEQ_STATE_ESTABLISHED:
if (v) if (v)
read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
break; break;
} }
} }
......
...@@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, ...@@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo,
} else { } else {
unsigned int hash; unsigned int hash;
sk->sk_hash = hash = inet6_sk_ehashfn(sk); sk->sk_hash = hash = inet6_sk_ehashfn(sk);
hash &= (hashinfo->ehash_size - 1); list = &inet_ehash_bucket(hashinfo, hash)->chain;
list = &hashinfo->ehash[hash].chain; lock = inet_ehash_lockp(hashinfo, hash);
lock = &hashinfo->ehash[hash].lock;
write_lock(lock); write_lock(lock);
} }
...@@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, ...@@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
*/ */
unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
prefetch(head->chain.first); prefetch(head->chain.first);
read_lock(&head->lock); read_lock(lock);
sk_for_each(sk, node, &head->chain) { sk_for_each(sk, node, &head->chain) {
/* For IPV6 do the cheaper port and family tests first. */ /* For IPV6 do the cheaper port and family tests first. */
if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
...@@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, ...@@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
goto hit; goto hit;
} }
} }
read_unlock(&head->lock); read_unlock(lock);
return NULL; return NULL;
hit: hit:
sock_hold(sk); sock_hold(sk);
read_unlock(&head->lock); read_unlock(lock);
return sk; return sk;
} }
EXPORT_SYMBOL(__inet6_lookup_established); EXPORT_SYMBOL(__inet6_lookup_established);
...@@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
inet->dport); inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2; struct sock *sk2;
const struct hlist_node *node; const struct hlist_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
prefetch(head->chain.first); prefetch(head->chain.first);
write_lock(&head->lock); write_lock(lock);
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &head->twchain) { sk_for_each(sk2, node, &head->twchain) {
...@@ -216,7 +217,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -216,7 +217,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
__sk_add_node(sk, &head->chain); __sk_add_node(sk, &head->chain);
sk->sk_hash = hash; sk->sk_hash = hash;
sock_prot_inc_use(sk->sk_prot); sock_prot_inc_use(sk->sk_prot);
write_unlock(&head->lock); write_unlock(lock);
if (twp != NULL) { if (twp != NULL) {
*twp = tw; *twp = tw;
...@@ -231,7 +232,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -231,7 +232,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
return 0; return 0;
not_unique: not_unique:
write_unlock(&head->lock); write_unlock(lock);
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment