Commit 4f9020ff authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro:
 "Assorted fixes that sat in -next for a while, all over the place"

* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  aio: Fix locking in aio_poll()
  exec: Fix mem leak in kernel_read_file
  copy_mount_string: Limit string length to PATH_MAX
  cgroup: saner refcounting for cgroup_root
  fix cgroup_do_mount() handling of failure exits
parents 736706be d3d6a18d
...@@ -1666,6 +1666,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -1666,6 +1666,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
__poll_t mask = key_to_poll(key); __poll_t mask = key_to_poll(key);
unsigned long flags;
req->woken = true; req->woken = true;
...@@ -1674,10 +1675,15 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -1674,10 +1675,15 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (!(mask & req->events)) if (!(mask & req->events))
return 0; return 0;
/* try to complete the iocb inline if we can: */ /*
if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { * Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse)
* call this function with IRQs disabled and because IRQs
* have to be disabled before ctx_lock is obtained.
*/
if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
list_del(&iocb->ki_list); list_del(&iocb->ki_list);
spin_unlock(&iocb->ki_ctx->ctx_lock); spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
list_del_init(&req->wait.entry); list_del_init(&req->wait.entry);
aio_poll_complete(iocb, mask); aio_poll_complete(iocb, mask);
......
...@@ -932,7 +932,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, ...@@ -932,7 +932,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
bytes = kernel_read(file, *buf + pos, i_size - pos, &pos); bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
if (bytes < 0) { if (bytes < 0) {
ret = bytes; ret = bytes;
goto out; goto out_free;
} }
if (bytes == 0) if (bytes == 0)
......
...@@ -196,8 +196,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, ...@@ -196,8 +196,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
return dentry; return dentry;
knparent = find_next_ancestor(kn, NULL); knparent = find_next_ancestor(kn, NULL);
if (WARN_ON(!knparent)) if (WARN_ON(!knparent)) {
dput(dentry);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
do { do {
struct dentry *dtmp; struct dentry *dtmp;
...@@ -206,8 +208,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, ...@@ -206,8 +208,10 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
if (kn == knparent) if (kn == knparent)
return dentry; return dentry;
kntmp = find_next_ancestor(kn, knparent); kntmp = find_next_ancestor(kn, knparent);
if (WARN_ON(!kntmp)) if (WARN_ON(!kntmp)) {
dput(dentry);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
dtmp = lookup_one_len_unlocked(kntmp->name, dentry, dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
strlen(kntmp->name)); strlen(kntmp->name));
dput(dentry); dput(dentry);
......
...@@ -2744,7 +2744,7 @@ void *copy_mount_options(const void __user * data) ...@@ -2744,7 +2744,7 @@ void *copy_mount_options(const void __user * data)
char *copy_mount_string(const void __user *data) char *copy_mount_string(const void __user *data)
{ {
return data ? strndup_user(data, PAGE_SIZE) : NULL; return data ? strndup_user(data, PATH_MAX) : NULL;
} }
/* /*
......
...@@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, ...@@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
void cgroup_free_root(struct cgroup_root *root); void cgroup_free_root(struct cgroup_root *root);
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_root *root, unsigned long magic, struct cgroup_root *root, unsigned long magic,
......
...@@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ...@@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
void *data, unsigned long magic, void *data, unsigned long magic,
struct cgroup_namespace *ns) struct cgroup_namespace *ns)
{ {
struct super_block *pinned_sb = NULL;
struct cgroup_sb_opts opts; struct cgroup_sb_opts opts;
struct cgroup_root *root; struct cgroup_root *root;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
struct dentry *dentry; struct dentry *dentry;
int i, ret; int i, ret;
bool new_root = false;
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
...@@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ...@@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
if (root->flags ^ opts.flags) if (root->flags ^ opts.flags)
pr_warn("new mount options do not match the existing superblock, will be ignored\n"); pr_warn("new mount options do not match the existing superblock, will be ignored\n");
/*
* We want to reuse @root whose lifetime is governed by its
* ->cgrp. Let's check whether @root is alive and keep it
* that way. As cgroup_kill_sb() can happen anytime, we
* want to block it by pinning the sb so that @root doesn't
* get killed before mount is complete.
*
* With the sb pinned, tryget_live can reliably indicate
* whether @root can be reused. If it's being killed,
* drain it. We can use wait_queue for the wait but this
* path is super cold. Let's just sleep a bit and retry.
*/
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
if (IS_ERR(pinned_sb) ||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
if (!IS_ERR_OR_NULL(pinned_sb))
deactivate_super(pinned_sb);
msleep(10);
ret = restart_syscall();
goto out_free;
}
ret = 0; ret = 0;
goto out_unlock; goto out_unlock;
} }
...@@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ...@@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
ret = -ENOMEM; ret = -ENOMEM;
goto out_unlock; goto out_unlock;
} }
new_root = true;
init_cgroup_root(root, &opts); init_cgroup_root(root, &opts);
ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); ret = cgroup_setup_root(root, opts.subsys_mask);
if (ret) if (ret)
cgroup_free_root(root); cgroup_free_root(root);
out_unlock: out_unlock:
if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
msleep(10);
ret = restart_syscall();
goto out_free;
}
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
out_free: out_free:
kfree(opts.release_agent); kfree(opts.release_agent);
...@@ -1252,25 +1232,13 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ...@@ -1252,25 +1232,13 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
CGROUP_SUPER_MAGIC, ns); CGROUP_SUPER_MAGIC, ns);
/* if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
* There's a race window after we release cgroup_mutex and before struct super_block *sb = dentry->d_sb;
* allocating a superblock. Make sure a concurrent process won't dput(dentry);
* be able to re-use the root during this window by delaying the deactivate_locked_super(sb);
* initialization of root refcnt. msleep(10);
*/ dentry = ERR_PTR(restart_syscall());
if (new_root) {
mutex_lock(&cgroup_mutex);
percpu_ref_reinit(&root->cgrp.self.refcnt);
mutex_unlock(&cgroup_mutex);
} }
/*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
*/
if (pinned_sb)
deactivate_super(pinned_sb);
return dentry; return dentry;
} }
......
...@@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) ...@@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
} }
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
{ {
LIST_HEAD(tmp_links); LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp; struct cgroup *root_cgrp = &root->cgrp;
...@@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) ...@@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
root_cgrp->ancestor_ids[0] = ret; root_cgrp->ancestor_ids[0] = ret;
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
ref_flags, GFP_KERNEL); 0, GFP_KERNEL);
if (ret) if (ret)
goto out; goto out;
...@@ -2033,7 +2033,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, ...@@ -2033,7 +2033,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_namespace *ns) struct cgroup_namespace *ns)
{ {
struct dentry *dentry; struct dentry *dentry;
bool new_sb; bool new_sb = false;
dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
...@@ -2043,6 +2043,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, ...@@ -2043,6 +2043,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
*/ */
if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
struct dentry *nsdentry; struct dentry *nsdentry;
struct super_block *sb = dentry->d_sb;
struct cgroup *cgrp; struct cgroup *cgrp;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
...@@ -2053,12 +2054,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, ...@@ -2053,12 +2054,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(dentry); dput(dentry);
if (IS_ERR(nsdentry))
deactivate_locked_super(sb);
dentry = nsdentry; dentry = nsdentry;
} }
if (IS_ERR(dentry) || !new_sb) if (!new_sb)
cgroup_put(&root->cgrp); cgroup_put(&root->cgrp);
return dentry; return dentry;
...@@ -2118,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb) ...@@ -2118,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb)
struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_root *root = cgroup_root_from_kf(kf_root);
/* /*
* If @root doesn't have any mounts or children, start killing it. * If @root doesn't have any children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live(). * This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release. * cgroup_mount() may wait for @root's release.
* *
* And don't kill the default root. * And don't kill the default root.
*/ */
if (!list_empty(&root->cgrp.self.children) || if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
root == &cgrp_dfl_root) !percpu_ref_is_dying(&root->cgrp.self.refcnt))
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt); percpu_ref_kill(&root->cgrp.self.refcnt);
cgroup_put(&root->cgrp);
kernfs_kill_sb(sb); kernfs_kill_sb(sb);
} }
...@@ -5399,7 +5400,7 @@ int __init cgroup_init(void) ...@@ -5399,7 +5400,7 @@ int __init cgroup_init(void)
hash_add(css_set_table, &init_css_set.hlist, hash_add(css_set_table, &init_css_set.hlist,
css_set_hash(init_css_set.subsys)); css_set_hash(init_css_set.subsys));
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment