Commit 4cb2c00c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs fixes from Miklos Szeredi:

 - Fix capability conversion and minor overlayfs bugs that are related
   to the unprivileged overlay mounts introduced in this cycle.

 - Fix two recent (v5.10) and one old (v4.10) bug.

 - Clean up security xattr copy-up (related to a SELinux regression).

* tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: implement volatile-specific fsync error behaviour
  ovl: skip getxattr of security labels
  ovl: fix dentry leak in ovl_get_redirect
  ovl: avoid deadlock on directory ioctl
  cap: fix conversions on getxattr
  ovl: perform vfs_getxattr() with mounter creds
  ovl: add warning on user_ns mismatch
parents 61556703 335d3fc5
......@@ -586,6 +586,14 @@ without significant effort.
The advantage of mounting with the "volatile" option is that all forms of
sync calls to the upper filesystem are omitted.
In order to avoid a giving a false sense of safety, the syncfs (and fsync)
semantics of volatile mounts are slightly different than that of the rest of
VFS. If any writeback error occurs on the upperdir's filesystem after a
volatile mount takes place, all sync functions will return an error. Once this
condition is reached, the filesystem will not recover, and every subsequent sync
call will return an error, even if the upperdir has not experience a new error
since the last sync call.
When overlay is mounted with "volatile" option, the directory
"$workdir/work/incompat/volatile" is created. During next mount, overlay
checks for this directory and refuses to mount if present. This is a strong
......
......@@ -84,6 +84,14 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
if (ovl_is_private_xattr(sb, name))
continue;
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
......@@ -107,13 +115,6 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
goto retry;
}
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
error = vfs_setxattr(new, name, value, size, 0);
if (error) {
if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
......
......@@ -992,8 +992,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
tmp = dget_parent(d);
dput(d);
d = tmp;
......
......@@ -398,8 +398,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct cred *old_cred;
int ret;
if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb)))
return 0;
ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
if (ret <= 0)
return ret;
ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret)
......
......@@ -352,7 +352,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
goto out;
if (!value && !upperdentry) {
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getxattr(realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
}
......
......@@ -324,6 +324,7 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
int padding);
int ovl_sync_status(struct ovl_fs *ofs);
static inline bool ovl_is_impuredir(struct super_block *sb,
struct dentry *dentry)
......
......@@ -81,6 +81,8 @@ struct ovl_fs {
atomic_long_t last_ino;
/* Whiteout dentry cache */
struct dentry *whiteout;
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq;
};
static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
......
......@@ -865,7 +865,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
struct file *realfile = od->realfile;
struct file *old, *realfile = od->realfile;
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
return want_upper ? NULL : realfile;
......@@ -874,29 +874,20 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
* Need to check if we started out being a lower dir, but got copied up
*/
if (!od->is_upper) {
struct inode *inode = file_inode(file);
realfile = READ_ONCE(od->upperfile);
if (!realfile) {
struct path upperpath;
ovl_path_upper(dentry, &upperpath);
realfile = ovl_dir_open_realfile(file, &upperpath);
if (IS_ERR(realfile))
return realfile;
inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
inode_unlock(inode);
return realfile;
}
smp_store_release(&od->upperfile, realfile);
} else {
/* somebody has beaten us to it */
if (!IS_ERR(realfile))
fput(realfile);
realfile = od->upperfile;
old = cmpxchg_release(&od->upperfile, NULL, realfile);
if (old) {
fput(realfile);
realfile = old;
}
inode_unlock(inode);
}
}
......@@ -909,8 +900,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
struct file *realfile;
int err;
if (!ovl_should_sync(OVL_FS(file->f_path.dentry->d_sb)))
return 0;
err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
if (err <= 0)
return err;
realfile = ovl_dir_real_file(file, true);
err = PTR_ERR_OR_ZERO(realfile);
......
......@@ -264,11 +264,20 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
struct super_block *upper_sb;
int ret;
if (!ovl_upper_mnt(ofs))
return 0;
ret = ovl_sync_status(ofs);
/*
* We have to always set the err, because the return value isn't
* checked in syncfs, and instead indirectly return an error via
* the sb's writeback errseq, which VFS inspects after this call.
*/
if (ret < 0) {
errseq_set(&sb->s_wb_err, -EIO);
return -EIO;
}
if (!ret)
return ret;
if (!ovl_should_sync(ofs))
return 0;
/*
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
* All the super blocks will be iterated, including upper_sb.
......@@ -1923,6 +1932,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
unsigned int numlower;
int err;
err = -EIO;
if (WARN_ON(sb->s_user_ns != current_user_ns()))
goto out;
sb->s_d_op = &ovl_dentry_operations;
err = -ENOMEM;
......@@ -1989,6 +2002,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ovl_super_operations;
if (ofs->config.upperdir) {
struct super_block *upper_sb;
if (!ofs->config.workdir) {
pr_err("missing 'workdir'\n");
goto out_err;
......@@ -1998,6 +2013,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_err;
upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
if (!ovl_should_sync(ofs)) {
ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
err = -EIO;
pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
goto out_err;
}
}
err = ovl_get_workdir(sb, ofs, &upperpath);
if (err)
goto out_err;
......@@ -2005,9 +2030,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ofs->workdir)
sb->s_flags |= SB_RDONLY;
sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
sb->s_stack_depth = upper_sb->s_stack_depth;
sb->s_time_gran = upper_sb->s_time_gran;
}
oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
err = PTR_ERR(oe);
......
......@@ -962,3 +962,30 @@ char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
kfree(buf);
return ERR_PTR(res);
}
/*
* ovl_sync_status() - Check fs sync status for volatile mounts
*
* Returns 1 if this is not a volatile mount and a real sync is required.
*
* Returns 0 if syncing can be skipped because mount is volatile, and no errors
* have occurred on the upperdir since the mount.
*
* Returns -errno if it is a volatile mount, and the error that occurred since
* the last mount. If the error code changes, it'll return the latest error
* code.
*/
int ovl_sync_status(struct ovl_fs *ofs)
{
struct vfsmount *mnt;
if (ovl_should_sync(ofs))
return 1;
mnt = ovl_upper_mnt(ofs);
if (!mnt)
return 0;
return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
}
......@@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
{
int size, ret;
kuid_t kroot;
u32 nsmagic, magic;
uid_t root, mappedroot;
char *tmpbuf = NULL;
struct vfs_cap_data *cap;
struct vfs_ns_cap_data *nscap;
struct vfs_ns_cap_data *nscap = NULL;
struct dentry *dentry;
struct user_namespace *fs_ns;
......@@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
fs_ns = inode->i_sb->s_user_ns;
cap = (struct vfs_cap_data *) tmpbuf;
if (is_v2header((size_t) ret, cap)) {
/* If this is sizeof(vfs_cap_data) then we're ok with the
* on-disk value, so return that. */
if (alloc)
*buffer = tmpbuf;
else
kfree(tmpbuf);
return ret;
} else if (!is_v3header((size_t) ret, cap)) {
kfree(tmpbuf);
return -EINVAL;
root = 0;
} else if (is_v3header((size_t) ret, cap)) {
nscap = (struct vfs_ns_cap_data *) tmpbuf;
root = le32_to_cpu(nscap->rootid);
} else {
size = -EINVAL;
goto out_free;
}
nscap = (struct vfs_ns_cap_data *) tmpbuf;
root = le32_to_cpu(nscap->rootid);
kroot = make_kuid(fs_ns, root);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
mappedroot = from_kuid(current_user_ns(), kroot);
if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
size = sizeof(struct vfs_ns_cap_data);
if (alloc) {
*buffer = tmpbuf;
if (!nscap) {
/* v2 -> v3 conversion */
nscap = kzalloc(size, GFP_ATOMIC);
if (!nscap) {
size = -ENOMEM;
goto out_free;
}
nsmagic = VFS_CAP_REVISION_3;
magic = le32_to_cpu(cap->magic_etc);
if (magic & VFS_CAP_FLAGS_EFFECTIVE)
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
nscap->magic_etc = cpu_to_le32(nsmagic);
} else {
/* use allocated v3 buffer */
tmpbuf = NULL;
}
nscap->rootid = cpu_to_le32(mappedroot);
} else
kfree(tmpbuf);
return size;
*buffer = nscap;
}
goto out_free;
}
if (!rootid_owns_currentns(kroot)) {
kfree(tmpbuf);
return -EOPNOTSUPP;
size = -EOVERFLOW;
goto out_free;
}
/* This comes from a parent namespace. Return as a v2 capability */
size = sizeof(struct vfs_cap_data);
if (alloc) {
*buffer = kmalloc(size, GFP_ATOMIC);
if (*buffer) {
struct vfs_cap_data *cap = *buffer;
__le32 nsmagic, magic;
if (nscap) {
/* v3 -> v2 conversion */
cap = kzalloc(size, GFP_ATOMIC);
if (!cap) {
size = -ENOMEM;
goto out_free;
}
magic = VFS_CAP_REVISION_2;
nsmagic = le32_to_cpu(nscap->magic_etc);
if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
......@@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
cap->magic_etc = cpu_to_le32(magic);
} else {
size = -ENOMEM;
/* use unconverted v2 */
tmpbuf = NULL;
}
*buffer = cap;
}
out_free:
kfree(tmpbuf);
return size;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment