Commit d40ce48c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'af_unix-replace-unix_table_lock-with-per-hash-locks'

Kuniyuki Iwashima says:

====================
af_unix: Replace unix_table_lock with per-hash locks.

The hash table of AF_UNIX sockets is protected by a single big lock,
unix_table_lock.  This series replaces it with small per-hash locks.

1st -  2nd : Misc refactoring
3rd -  8th : Separate BSD/abstract address logics
9th - 11th : Prep to save a hash in each socket
12th       : Replace the big lock
13th       : Speed up autobind()

Note to maintainers:
The 12th patch adds two kinds of Sparse warnings on patchwork:

  about unix_table_double_lock/unlock()
    We can avoid this by adding two apparent acquires/releases annotations,
    but there are the same kinds of warnings about unix_state_double_lock().

  about unix_next_socket() and unix_seq_stop() (/proc/net/unix)
    This is because Sparse does not understand logic in unix_next_socket(),
    which leaves a spin lock held until it returns NULL.
    Also, tcp_seq_stop() causes a warning for the same reason.

These warnings seem reasonable, but let me know if there is any better way.
Please see [0] for details.

[0]: https://lore.kernel.org/netdev/20211117001611.74123-1-kuniyu@amazon.co.jp/
====================

Link: https://lore.kernel.org/r/20211124021431.48956-1-kuniyu@amazon.co.jpSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 442b03c3 9acbc584
......@@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_lock;
extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
refcount_t refcnt;
int len;
unsigned int hash;
struct sockaddr_un name[];
};
......
This diff is collapsed.
......@@ -13,13 +13,14 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
/* might or might not have unix_table_lock */
/* might or might not have unix_table_locks */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
return 0;
return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short),
return nla_put(nlskb, UNIX_DIAG_NAME,
addr->len - offsetof(struct sockaddr_un, sun_path),
addr->name->sun_path);
}
......@@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
s_slot = cb->args[0];
num = s_num = cb->args[1];
spin_lock(&unix_table_lock);
for (slot = s_slot;
slot < ARRAY_SIZE(unix_socket_table);
s_num = 0, slot++) {
struct sock *sk;
num = 0;
spin_lock(&unix_table_locks[slot]);
sk_for_each(sk, &unix_socket_table[slot]) {
if (!net_eq(sock_net(sk), net))
continue;
......@@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (sk_diag_dump(sk, skb, req,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0)
NLM_F_MULTI) < 0) {
spin_unlock(&unix_table_locks[slot]);
goto done;
}
next:
num++;
}
spin_unlock(&unix_table_locks[slot]);
}
done:
spin_unlock(&unix_table_lock);
cb->args[0] = slot;
cb->args[1] = num;
......@@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
static struct sock *unix_lookup_by_ino(unsigned int ino)
{
int i;
struct sock *sk;
int i;
spin_lock(&unix_table_lock);
for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
spin_lock(&unix_table_locks[i]);
sk_for_each(sk, &unix_socket_table[i])
if (ino == sock_i_ino(sk)) {
sock_hold(sk);
spin_unlock(&unix_table_lock);
spin_unlock(&unix_table_locks[i]);
return sk;
}
spin_unlock(&unix_table_locks[i]);
}
spin_unlock(&unix_table_lock);
return NULL;
}
......
......@@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
sock_i_ino(sk));
if (unix_sk->addr) {
if (!UNIX_ABSTRACT(unix_sk)) {
if (unix_sk->addr->name->sun_path[0]) {
BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
} else {
/* The name of the abstract UNIX domain socket starts
......
......@@ -6,8 +6,6 @@
#define AF_INET6 10
#define __SO_ACCEPTCON (1 << 16)
#define UNIX_HASH_SIZE 256
#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
#define SOL_TCP 6
#define TCP_CONGESTION 13
......
......@@ -23,7 +23,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog)
if (!unix_sk)
return 0;
if (!UNIX_ABSTRACT(unix_sk))
if (unix_sk->addr->name->sun_path[0])
return 0;
len = unix_sk->addr->len - sizeof(short);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment