Commit 7c6893e3 authored by Miklos Szeredi's avatar Miklos Szeredi

ovl: don't allow writing ioctl on lower layer

Problem with ioctl() is that it's a file operation, yet often used as an
inode operation (i.e. modify the inode despite the file being opened for
read-only).

mnt_want_write_file() is used by filesystems in such cases to get write
access on an arbitrary open file.

Since overlayfs lets filesystems do all file operations, including ioctl,
this can lead to mnt_want_write_file() returning OK for a lower file and
modification of that lower file.

This patch prevents modification by checking if the file is from an
overlayfs lower layer and returning EPERM in that case.

Need to introduce a mnt_want_write_file_path() variant that still does the
old thing for inode operations that can do the copy up + modification
correctly in such cases (fchown, fsetxattr, fremovexattr).

This does not address the correctness of such ioctls on overlayfs (the
correct way would be to copy up and attempt to perform ioctl on upper
file).

In theory this could be a regression.  We very much hope that nobody is
relying on such a hack in any sane setup.

While this patch meddles in VFS code, it has no effect on non-overlayfs
filesystems.
Reported-by: default avatar"zhangyi (F)" <yi.zhang@huawei.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent cd91304e
...@@ -71,8 +71,10 @@ extern void __init mnt_init(void); ...@@ -71,8 +71,10 @@ extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *); extern int __mnt_want_write_file(struct file *);
extern int mnt_want_write_file_path(struct file *);
extern void __mnt_drop_write(struct vfsmount *); extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *); extern void __mnt_drop_write_file(struct file *);
extern void mnt_drop_write_file_path(struct file *);
/* /*
* fs_struct.c * fs_struct.c
......
...@@ -431,13 +431,18 @@ int __mnt_want_write_file(struct file *file) ...@@ -431,13 +431,18 @@ int __mnt_want_write_file(struct file *file)
} }
/** /**
* mnt_want_write_file - get write access to a file's mount * mnt_want_write_file_path - get write access to a file's mount
* @file: the file who's mount on which to take a write * @file: the file who's mount on which to take a write
* *
* This is like mnt_want_write, but it takes a file and can * This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already * do some optimisations if the file is open for write already
*
* Called by the vfs for cases when we have an open file at hand, but will do an
* inode operation on it (important distinction for files opened on overlayfs,
* since the file operations will come from the real underlying file, while
* inode operations come from the overlay).
*/ */
int mnt_want_write_file(struct file *file) int mnt_want_write_file_path(struct file *file)
{ {
int ret; int ret;
...@@ -447,6 +452,53 @@ int mnt_want_write_file(struct file *file) ...@@ -447,6 +452,53 @@ int mnt_want_write_file(struct file *file)
sb_end_write(file->f_path.mnt->mnt_sb); sb_end_write(file->f_path.mnt->mnt_sb);
return ret; return ret;
} }
static inline int may_write_real(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *upperdentry;
/* Writable file? */
if (file->f_mode & FMODE_WRITER)
return 0;
/* Not overlayfs? */
if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
return 0;
/* File refers to upper, writable layer? */
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
if (upperdentry && file_inode(file) == d_inode(upperdentry))
return 0;
/* Lower layer: can't write to real file, sorry... */
return -EPERM;
}
/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
*
* Mostly called by filesystems from their ioctl operation before performing
* modification. On overlayfs this needs to check if the file is on a read-only
* lower layer and deny access in that case.
*/
int mnt_want_write_file(struct file *file)
{
int ret;
ret = may_write_real(file);
if (!ret) {
sb_start_write(file_inode(file)->i_sb);
ret = __mnt_want_write_file(file);
if (ret)
sb_end_write(file_inode(file)->i_sb);
}
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file); EXPORT_SYMBOL_GPL(mnt_want_write_file);
/** /**
...@@ -484,10 +536,16 @@ void __mnt_drop_write_file(struct file *file) ...@@ -484,10 +536,16 @@ void __mnt_drop_write_file(struct file *file)
__mnt_drop_write(file->f_path.mnt); __mnt_drop_write(file->f_path.mnt);
} }
void mnt_drop_write_file(struct file *file) void mnt_drop_write_file_path(struct file *file)
{ {
mnt_drop_write(file->f_path.mnt); mnt_drop_write(file->f_path.mnt);
} }
void mnt_drop_write_file(struct file *file)
{
__mnt_drop_write(file->f_path.mnt);
sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file); EXPORT_SYMBOL(mnt_drop_write_file);
static int mnt_make_readonly(struct mount *mnt) static int mnt_make_readonly(struct mount *mnt)
......
...@@ -670,12 +670,12 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) ...@@ -670,12 +670,12 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
if (!f.file) if (!f.file)
goto out; goto out;
error = mnt_want_write_file(f.file); error = mnt_want_write_file_path(f.file);
if (error) if (error)
goto out_fput; goto out_fput;
audit_file(f.file); audit_file(f.file);
error = chown_common(&f.file->f_path, user, group); error = chown_common(&f.file->f_path, user, group);
mnt_drop_write_file(f.file); mnt_drop_write_file_path(f.file);
out_fput: out_fput:
fdput(f); fdput(f);
out: out:
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/posix_acl_xattr.h> #include <linux/posix_acl_xattr.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "internal.h"
static const char * static const char *
strcmp_prefix(const char *a, const char *a_prefix) strcmp_prefix(const char *a, const char *a_prefix)
...@@ -496,10 +497,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, ...@@ -496,10 +497,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
if (!f.file) if (!f.file)
return error; return error;
audit_file(f.file); audit_file(f.file);
error = mnt_want_write_file(f.file); error = mnt_want_write_file_path(f.file);
if (!error) { if (!error) {
error = setxattr(f.file->f_path.dentry, name, value, size, flags); error = setxattr(f.file->f_path.dentry, name, value, size, flags);
mnt_drop_write_file(f.file); mnt_drop_write_file_path(f.file);
} }
fdput(f); fdput(f);
return error; return error;
...@@ -728,10 +729,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) ...@@ -728,10 +729,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
if (!f.file) if (!f.file)
return error; return error;
audit_file(f.file); audit_file(f.file);
error = mnt_want_write_file(f.file); error = mnt_want_write_file_path(f.file);
if (!error) { if (!error) {
error = removexattr(f.file->f_path.dentry, name); error = removexattr(f.file->f_path.dentry, name);
mnt_drop_write_file(f.file); mnt_drop_write_file_path(f.file);
} }
fdput(f); fdput(f);
return error; return error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment