Commit 1901c924 authored by Josef Bacik's avatar Josef Bacik Committed by Christian Brauner

fs: keep an index of current mount namespaces

In order to allow for listmount() to be used on different namespaces we
need a way to lookup a mount ns by its id.  Keep a rbtree of the current
!anonymous mount name spaces indexed by ID that we can use to look up
the namespace.
Co-developed-by: default avatarChristian Brauner <brauner@kernel.org>
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/e5fdd78a90f5b00a75bd893962a70f52a2c015cd.1719243756.git.josef@toxicpanda.comSigned-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent f3107df3
...@@ -16,6 +16,8 @@ struct mnt_namespace { ...@@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event; u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts; unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout; } __randomize_layout;
struct mnt_pcp { struct mnt_pcp {
......
...@@ -78,6 +78,8 @@ static struct kmem_cache *mnt_cache __ro_after_init; ...@@ -78,6 +78,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem); static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr { struct mount_kattr {
unsigned int attr_set; unsigned int attr_set;
...@@ -103,6 +105,109 @@ EXPORT_SYMBOL_GPL(fs_kobj); ...@@ -103,6 +105,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/ */
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
{
u64 seq_b = ns->seq;
if (seq < seq_b)
return -1;
if (seq > seq_b)
return 1;
return 0;
}
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
if (!node)
return NULL;
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
}
static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
{
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
u64 seq_a = ns_a->seq;
return mnt_ns_cmp(seq_a, ns_b) < 0;
}
static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
guard(write_lock)(&mnt_ns_tree_lock);
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
}
static void mnt_ns_release(struct mnt_namespace *ns)
{
lockdep_assert_not_held(&mnt_ns_tree_lock);
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
put_user_ns(ns->user_ns);
kfree(ns);
}
}
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
if (!is_anon_ns(ns)) {
guard(write_lock)(&mnt_ns_tree_lock);
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
}
mnt_ns_release(ns);
}
/*
* Returns the mount namespace which either has the specified id, or has the
* next smallest id afer the specified one.
*/
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
{
struct rb_node *node = mnt_ns_tree.rb_node;
struct mnt_namespace *ret = NULL;
lockdep_assert_held(&mnt_ns_tree_lock);
while (node) {
struct mnt_namespace *n = node_to_mnt_ns(node);
if (mnt_ns_id <= n->seq) {
ret = node_to_mnt_ns(node);
if (mnt_ns_id == n->seq)
break;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return ret;
}
/*
* Lookup a mount namespace by id and take a passive reference count. Taking a
* passive reference means the mount namespace can be emptied if e.g., the last
* task holding an active reference exits. To access the mounts of the
* namespace the @namespace_sem must first be acquired. If the namespace has
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
struct mnt_namespace *ns;
guard(read_lock)(&mnt_ns_tree_lock);
ns = mnt_ns_find_id_at(mnt_ns_id);
if (!ns || ns->seq != mnt_ns_id)
return NULL;
refcount_inc(&ns->passive);
return ns;
}
static inline void lock_mount_hash(void) static inline void lock_mount_hash(void)
{ {
write_seqlock(&mount_lock); write_seqlock(&mount_lock);
...@@ -3733,8 +3838,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) ...@@ -3733,8 +3838,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns)) if (!is_anon_ns(ns))
ns_free_inum(&ns->ns); ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts); dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns); mnt_ns_tree_remove(ns);
kfree(ns);
} }
/* /*
...@@ -3773,7 +3877,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a ...@@ -3773,7 +3877,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon) if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1); refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT; new_ns->mounts = RB_ROOT;
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll); init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns); new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts; new_ns->ucounts = ucounts;
...@@ -3850,6 +3956,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, ...@@ -3850,6 +3956,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root) while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old); p = next_mnt(skip_mnt_tree(p), old);
} }
mnt_ns_tree_add(new_ns);
namespace_unlock(); namespace_unlock();
if (rootmnt) if (rootmnt)
...@@ -5205,6 +5312,8 @@ static void __init init_mount_tree(void) ...@@ -5205,6 +5312,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root); set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root); set_fs_root(current->fs, &root);
mnt_ns_tree_add(ns);
} }
void __init mnt_init(void) void __init mnt_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment