Commit 51bae889 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Paolo Abeni

af_unix: Put pathname sockets in the global hash table.

Commit cf2f225e ("af_unix: Put a socket into a per-netns hash table.")
accidentally broke user API for pathname sockets.  A socket was able to
connect() to a pathname socket whose file was visible even if they were in
different network namespaces.

The commit puts all sockets into a per-netns hash table.  As a result,
connect() to a pathname socket in a different netns fails to find it in the
caller's per-netns hash table and returns -ECONNREFUSED even when the task
can view the peer socket file.

We can reproduce this issue by:

  Console A:

    # python3
    >>> from socket import *
    >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
    >>> s.bind('test')
    >>> s.listen(32)

  Console B:

    # ip netns add test
    # ip netns exec test sh
    # python3
    >>> from socket import *
    >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
    >>> s.connect('test')

Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
filtered only by netns.  In other words, even if they are visible and
connect()able, all sockets in different netns are skipped while iterating
sockets.  Thus, we need a fix only for finding a peer pathname socket.

This patch adds a global hash table for pathname sockets, links them with
sk_bind_node, and uses it in unix_find_socket_byinode().  By doing so, we
can keep sockets in per-netns hash tables and dump them easily.

Thanks to Sachin Sant and Leonard Crestez for reports, logs and a reproducer.

Fixes: cf2f225e ("af_unix: Put a socket into a per-netns hash table.")
Reported-by: default avatarSachin Sant <sachinp@linux.ibm.com>
Reported-by: default avatarLeonard Crestez <cdleonard@gmail.com>
Tested-by: default avatarSachin Sant <sachinp@linux.ibm.com>
Tested-by: default avatarNathan Chancellor <nathan@kernel.org>
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Tested-by: default avatarLeonard Crestez <cdleonard@gmail.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 874bdbfe
...@@ -119,6 +119,8 @@ ...@@ -119,6 +119,8 @@
#include "scm.h" #include "scm.h"
static atomic_long_t unix_nr_socks; static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
/* SMP locking strategy: /* SMP locking strategy:
* hash table is protected with spinlock. * hash table is protected with spinlock.
...@@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk) ...@@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
spin_unlock(&net->unx.table.locks[sk->sk_hash]); spin_unlock(&net->unx.table.locks[sk->sk_hash]);
} }
static void unix_insert_bsd_socket(struct sock *sk)
{
spin_lock(&bsd_socket_locks[sk->sk_hash]);
sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
spin_unlock(&bsd_socket_locks[sk->sk_hash]);
}
static void unix_remove_bsd_socket(struct sock *sk)
{
if (!hlist_unhashed(&sk->sk_bind_node)) {
spin_lock(&bsd_socket_locks[sk->sk_hash]);
__sk_del_bind_node(sk);
spin_unlock(&bsd_socket_locks[sk->sk_hash]);
sk_node_init(&sk->sk_bind_node);
}
}
static struct sock *__unix_find_socket_byname(struct net *net, static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname, struct sockaddr_un *sunname,
int len, unsigned int hash) int len, unsigned int hash)
...@@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net, ...@@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
return s; return s;
} }
static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) static struct sock *unix_find_socket_byinode(struct inode *i)
{ {
unsigned int hash = unix_bsd_hash(i); unsigned int hash = unix_bsd_hash(i);
struct sock *s; struct sock *s;
spin_lock(&net->unx.table.locks[hash]); spin_lock(&bsd_socket_locks[hash]);
sk_for_each(s, &net->unx.table.buckets[hash]) { sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry; struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) { if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s); sock_hold(s);
spin_unlock(&net->unx.table.locks[hash]); spin_unlock(&bsd_socket_locks[hash]);
return s; return s;
} }
} }
spin_unlock(&net->unx.table.locks[hash]); spin_unlock(&bsd_socket_locks[hash]);
return NULL; return NULL;
} }
...@@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion) ...@@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
int state; int state;
unix_remove_socket(sock_net(sk), sk); unix_remove_socket(sock_net(sk), sk);
unix_remove_bsd_socket(sk);
/* Clear state */ /* Clear state */
unix_state_lock(sk); unix_state_lock(sk);
...@@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock) ...@@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock)
return 0; return 0;
} }
static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
int addr_len, int type) int type)
{ {
struct inode *inode; struct inode *inode;
struct path path; struct path path;
...@@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, ...@@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
if (!S_ISSOCK(inode->i_mode)) if (!S_ISSOCK(inode->i_mode))
goto path_put; goto path_put;
sk = unix_find_socket_byinode(net, inode); sk = unix_find_socket_byinode(inode);
if (!sk) if (!sk)
goto path_put; goto path_put;
...@@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net, ...@@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net,
struct sock *sk; struct sock *sk;
if (sunaddr->sun_path[0]) if (sunaddr->sun_path[0])
sk = unix_find_bsd(net, sunaddr, addr_len, type); sk = unix_find_bsd(sunaddr, addr_len, type);
else else
sk = unix_find_abstract(net, sunaddr, addr_len, type); sk = unix_find_abstract(net, sunaddr, addr_len, type);
...@@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, ...@@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
u->path.dentry = dget(dentry); u->path.dentry = dget(dentry);
__unix_set_addr_hash(net, sk, addr, new_hash); __unix_set_addr_hash(net, sk, addr, new_hash);
unix_table_double_unlock(net, old_hash, new_hash); unix_table_double_unlock(net, old_hash, new_hash);
unix_insert_bsd_socket(sk);
mutex_unlock(&u->bindlock); mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry); done_path_create(&parent, dentry);
return 0; return 0;
...@@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void) ...@@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void)
static int __init af_unix_init(void) static int __init af_unix_init(void)
{ {
int rc = -1; int i, rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
spin_lock_init(&bsd_socket_locks[i]);
INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
}
rc = proto_register(&unix_dgram_proto, 1); rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) { if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment