Commit bb93c5ed authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs rw updates from Christian Brauner:
 "This contains updates from Amir for read-write backing file helpers
  for stacking filesystems such as overlayfs:

   - Fanotify is currently in the process of introducing pre content
     events. Roughly, a new permission event will be added indicating
     that it is safe to write to the file being accessed. These events
     are used by hierarchical storage managers to e.g., fill the content
     of files on first access.

     During that work we noticed that our current permission checking is
     inconsistent in rw_verify_area() and remap_verify_area().
     Especially in the splice code permission checking is done multiple
     times. For example, one time for the whole range and then again for
     partial ranges inside the iterator.

     In addition, we mostly do permission checking before we call
     file_start_write() except for a few places where we call it after.
     For pre-content events we need such permission checking to be done
     before file_start_write(). So this is a nice reason to clean this
     all up.

     After this series, all permission checking is done before
     file_start_write().

     As part of this cleanup we also massaged the splice code a bit. We
     got rid of a few helpers because we are alredy drowning in special
     read-write helpers. We also cleaned up the return types for splice
     helpers.

   - Introduce generic read-write helpers for backing files. This lifts
     some overlayfs code to common code so it can be used by the FUSE
     passthrough work coming in over the next cycles. Make Amir and
     Miklos the maintainers for this new subsystem of the vfs"

* tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
  fs: fix __sb_write_started() kerneldoc formatting
  fs: factor out backing_file_mmap() helper
  fs: factor out backing_file_splice_{read,write}() helpers
  fs: factor out backing_file_{read,write}_iter() helpers
  fs: prepare for stackable filesystems backing file helpers
  fsnotify: optionally pass access range in file permission hooks
  fsnotify: assert that file_start_write() is not held in permission hooks
  fsnotify: split fsnotify_perm() into two hooks
  fs: use splice_copy_file_range() inline helper
  splice: return type ssize_t from all helpers
  fs: use do_splice_direct() for nfsd/ksmbd server-side-copy
  fs: move file_start_write() into direct_splice_actor()
  fs: fork splice_file_range() from do_splice_direct()
  fs: create {sb,file}_write_not_started() helpers
  fs: create file_write_started() helper
  fs: create __sb_write_started() helper
  fs: move kiocb_start_write() into vfs_iocb_iter_write()
  fs: move permission hook out of do_iter_read()
  fs: move permission hook out of do_iter_write()
  fs: move file_start_write() into vfs_iter_write()
  ...
parents 8c9440fe c39e2ae3
...@@ -8143,6 +8143,15 @@ S: Supported ...@@ -8143,6 +8143,15 @@ S: Supported
F: fs/iomap/ F: fs/iomap/
F: include/linux/iomap.h F: include/linux/iomap.h
FILESYSTEMS [STACKABLE]
M: Miklos Szeredi <miklos@szeredi.hu>
M: Amir Goldstein <amir73il@gmail.com>
L: linux-fsdevel@vger.kernel.org
L: linux-unionfs@vger.kernel.org
S: Maintained
F: fs/backing-file.c
F: include/linux/backing-file.h
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
M: Riku Voipio <riku.voipio@iki.fi> M: Riku Voipio <riku.voipio@iki.fi>
L: linux-hwmon@vger.kernel.org L: linux-hwmon@vger.kernel.org
......
...@@ -245,9 +245,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) ...@@ -245,9 +245,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0); bw = vfs_iter_write(file, &i, ppos, 0);
file_end_write(file);
if (likely(bw == bvec->bv_len)) if (likely(bw == bvec->bv_len))
return 0; return 0;
......
...@@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER ...@@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER
config FS_IOMAP config FS_IOMAP
bool bool
# Stackable filesystems
config FS_STACK
bool
config BUFFER_HEAD config BUFFER_HEAD
bool bool
......
...@@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o ...@@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
obj-$(CONFIG_FS_STACK) += backing-file.o
obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_NFS_COMMON) += nfs_common/
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Common helpers for stackable filesystems and backing files.
*
* Forked from fs/overlayfs/file.c.
*
* Copyright (C) 2017 Red Hat, Inc.
* Copyright (C) 2023 CTERA Networks.
*/
#include <linux/fs.h>
#include <linux/backing-file.h>
#include <linux/splice.h>
#include <linux/mm.h>
#include "internal.h"
/**
* backing_file_open - open a backing file for kernel internal use
* @user_path: path that the user reuqested to open
* @flags: open flags
* @real_path: path of the backing file
* @cred: credentials for open
*
* Open a backing file for a stackable filesystem (e.g., overlayfs).
* @user_path may be on the stackable filesystem and @real_path on the
* underlying filesystem. In this case, we want to be able to return the
* @user_path of the stackable filesystem. This is done by embedding the
* returned file into a container structure that also stores the stacked
* file's path, which can be retrieved using backing_file_user_path().
*/
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred)
{
struct file *f;
int error;
f = alloc_empty_backing_file(flags, cred);
if (IS_ERR(f))
return f;
path_get(user_path);
*backing_file_user_path(f) = *user_path;
error = vfs_open(real_path, f);
if (error) {
fput(f);
f = ERR_PTR(error);
}
return f;
}
EXPORT_SYMBOL_GPL(backing_file_open);
struct backing_aio {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
/* used for aio completion */
void (*end_write)(struct file *);
struct work_struct work;
long res;
};
static struct kmem_cache *backing_aio_cachep;
#define BACKING_IOCB_MASK \
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
static rwf_t iocb_to_rw_flags(int flags)
{
return (__force rwf_t)(flags & BACKING_IOCB_MASK);
}
static void backing_aio_put(struct backing_aio *aio)
{
if (refcount_dec_and_test(&aio->ref)) {
fput(aio->iocb.ki_filp);
kmem_cache_free(backing_aio_cachep, aio);
}
}
static void backing_aio_cleanup(struct backing_aio *aio, long res)
{
struct kiocb *iocb = &aio->iocb;
struct kiocb *orig_iocb = aio->orig_iocb;
if (aio->end_write)
aio->end_write(orig_iocb->ki_filp);
orig_iocb->ki_pos = iocb->ki_pos;
backing_aio_put(aio);
}
static void backing_aio_rw_complete(struct kiocb *iocb, long res)
{
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
struct kiocb *orig_iocb = aio->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE)
kiocb_end_write(iocb);
backing_aio_cleanup(aio, res);
orig_iocb->ki_complete(orig_iocb, res);
}
static void backing_aio_complete_work(struct work_struct *work)
{
struct backing_aio *aio = container_of(work, struct backing_aio, work);
backing_aio_rw_complete(&aio->iocb, aio->res);
}
static void backing_aio_queue_completion(struct kiocb *iocb, long res)
{
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
/*
* Punt to a work queue to serialize updates of mtime/size.
*/
aio->res = res;
INIT_WORK(&aio->work, backing_aio_complete_work);
queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
&aio->work);
}
static int backing_aio_init_wq(struct kiocb *iocb)
{
struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
if (sb->s_dio_done_wq)
return 0;
return sb_init_dio_done_wq(sb);
}
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx)
{
struct backing_aio *aio = NULL;
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
return -EIO;
if (!iov_iter_count(iter))
return 0;
if (iocb->ki_flags & IOCB_DIRECT &&
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
old_cred = override_creds(ctx->cred);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(flags);
ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
} else {
ret = -ENOMEM;
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
if (!aio)
goto out;
aio->orig_iocb = iocb;
kiocb_clone(&aio->iocb, iocb, get_file(file));
aio->iocb.ki_complete = backing_aio_rw_complete;
refcount_set(&aio->ref, 2);
ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
backing_aio_put(aio);
if (ret != -EIOCBQUEUED)
backing_aio_cleanup(aio, ret);
}
out:
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_read_iter);
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
return -EIO;
if (!iov_iter_count(iter))
return 0;
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
if (iocb->ki_flags & IOCB_DIRECT &&
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
/*
* Stacked filesystems don't support deferred completions, don't copy
* this property in case it is set by the issuer.
*/
flags &= ~IOCB_DIO_CALLER_COMP;
old_cred = override_creds(ctx->cred);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(flags);
ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
if (ctx->end_write)
ctx->end_write(ctx->user_file);
} else {
struct backing_aio *aio;
ret = backing_aio_init_wq(iocb);
if (ret)
goto out;
ret = -ENOMEM;
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
if (!aio)
goto out;
aio->orig_iocb = iocb;
aio->end_write = ctx->end_write;
kiocb_clone(&aio->iocb, iocb, get_file(file));
aio->iocb.ki_flags = flags;
aio->iocb.ki_complete = backing_aio_queue_completion;
refcount_set(&aio->ref, 2);
ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
backing_aio_put(aio);
if (ret != -EIOCBQUEUED)
backing_aio_cleanup(aio, ret);
}
out:
revert_creds(old_cred);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_write_iter);
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
return -EIO;
old_cred = override_creds(ctx->cred);
ret = vfs_splice_read(in, ppos, pipe, len, flags);
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_splice_read);
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
return -EIO;
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
old_cred = override_creds(ctx->cred);
file_start_write(out);
ret = iter_file_splice_write(pipe, out, ppos, len, flags);
file_end_write(out);
revert_creds(old_cred);
if (ctx->end_write)
ctx->end_write(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_splice_write);
int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
int ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
WARN_ON_ONCE(ctx->user_file != vma->vm_file))
return -EIO;
if (!file->f_op->mmap)
return -ENODEV;
vma_set_file(vma, file);
old_cred = override_creds(ctx->cred);
ret = call_mmap(vma->vm_file, vma);
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_mmap);
static int __init backing_aio_init(void)
{
backing_aio_cachep = kmem_cache_create("backing_aio",
sizeof(struct backing_aio),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!backing_aio_cachep)
return -ENOMEM;
return 0;
}
fs_initcall(backing_aio_init);
...@@ -4533,29 +4533,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool ...@@ -4533,29 +4533,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
if (ret < 0) if (ret < 0)
goto out_acct; goto out_acct;
file_start_write(file);
if (iov_iter_count(&iter) == 0) { if (iov_iter_count(&iter) == 0) {
ret = 0; ret = 0;
goto out_end_write; goto out_iov;
} }
pos = args.offset; pos = args.offset;
ret = rw_verify_area(WRITE, file, &pos, args.len); ret = rw_verify_area(WRITE, file, &pos, args.len);
if (ret < 0) if (ret < 0)
goto out_end_write; goto out_iov;
init_sync_kiocb(&kiocb, file); init_sync_kiocb(&kiocb, file);
ret = kiocb_set_rw_flags(&kiocb, 0); ret = kiocb_set_rw_flags(&kiocb, 0);
if (ret) if (ret)
goto out_end_write; goto out_iov;
kiocb.ki_pos = pos; kiocb.ki_pos = pos;
file_start_write(file);
ret = btrfs_do_write_iter(&kiocb, &iter, &args); ret = btrfs_do_write_iter(&kiocb, &iter, &args);
if (ret > 0) if (ret > 0)
fsnotify_modify(file); fsnotify_modify(file);
out_end_write:
file_end_write(file); file_end_write(file);
out_iov:
kfree(iov); kfree(iov);
out_acct: out_acct:
if (ret > 0) if (ret > 0)
......
...@@ -259,7 +259,8 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) ...@@ -259,7 +259,8 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret); _enter("%ld", ret);
kiocb_end_write(iocb); if (ki->was_async)
kiocb_end_write(iocb);
if (ret < 0) if (ret < 0)
trace_cachefiles_io_error(object, inode, ret, trace_cachefiles_io_error(object, inode, ret,
...@@ -319,8 +320,6 @@ int __cachefiles_write(struct cachefiles_object *object, ...@@ -319,8 +320,6 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete; ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing); atomic_long_add(ki->b_writing, &cache->b_writing);
kiocb_start_write(&ki->iocb);
get_file(ki->iocb.ki_filp); get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq); cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/splice.h>
#include "super.h" #include "super.h"
#include "mds_client.h" #include "mds_client.h"
...@@ -3010,8 +3011,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ...@@ -3010,8 +3011,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
* {read,write}_iter, which will get caps again. * {read,write}_iter, which will get caps again.
*/ */
put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got); put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
ret = do_splice_direct(src_file, &src_off, dst_file, ret = splice_file_range(src_file, &src_off, dst_file, &dst_off,
&dst_off, src_objlen, flags); src_objlen);
/* Abort on short copies or on error */ /* Abort on short copies or on error */
if (ret < (long)src_objlen) { if (ret < (long)src_objlen) {
doutc(cl, "Failed partial copy (%zd)\n", ret); doutc(cl, "Failed partial copy (%zd)\n", ret);
...@@ -3065,8 +3066,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ...@@ -3065,8 +3066,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
*/ */
if (len && (len < src_ci->i_layout.object_size)) { if (len && (len < src_ci->i_layout.object_size)) {
doutc(cl, "Final partial copy of %zu bytes\n", len); doutc(cl, "Final partial copy of %zu bytes\n", len);
bytes = do_splice_direct(src_file, &src_off, dst_file, bytes = splice_file_range(src_file, &src_off, dst_file,
&dst_off, len, flags); &dst_off, len);
if (bytes > 0) if (bytes > 0)
ret += bytes; ret += bytes;
else else
...@@ -3089,8 +3090,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, ...@@ -3089,8 +3090,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
len, flags); len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV) if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_file, src_off, dst_file, ret = splice_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags); dst_off, len);
return ret; return ret;
} }
......
...@@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
if (ret) if (ret)
goto finish_write; goto finish_write;
file_start_write(host_file);
inode_lock(coda_inode); inode_lock(coda_inode);
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0); ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size; coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode)); inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode));
inode_unlock(coda_inode); inode_unlock(coda_inode);
file_end_write(host_file);
finish_write: finish_write:
venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/filelock.h> #include <linux/filelock.h>
#include <linux/splice.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode, unsigned int open_flags, int opcode,
...@@ -3195,8 +3196,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, ...@@ -3195,8 +3196,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
len, flags); len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV) if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_file, src_off, dst_file, ret = splice_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags); dst_off, len);
return ret; return ret;
} }
......
...@@ -244,10 +244,10 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, ...@@ -244,10 +244,10 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
/* /*
* fs/splice.c: * fs/splice.c:
*/ */
long splice_file_to_pipe(struct file *in, ssize_t splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe, struct pipe_inode_info *opipe,
loff_t *offset, loff_t *offset,
size_t len, unsigned int flags); size_t len, unsigned int flags);
/* /*
* fs/xattr.c: * fs/xattr.c:
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/nfs_ssc.h> #include <linux/nfs_ssc.h>
#include <linux/splice.h>
#include "delegation.h" #include "delegation.h"
#include "internal.h" #include "internal.h"
#include "iostat.h" #include "iostat.h"
...@@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ...@@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
flags); flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV) if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(file_in, pos_in, file_out, ret = splice_copy_file_range(file_in, pos_in, file_out,
pos_out, count, flags); pos_out, count);
return ret; return ret;
} }
......
...@@ -1039,7 +1039,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1039,7 +1039,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err; ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count); trace_nfsd_read_splice(rqstp, fhp, offset, *count);
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = rw_verify_area(READ, file, &offset, *count);
if (!host_err)
host_err = splice_direct_to_actor(file, &sd,
nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
...@@ -1176,9 +1179,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, ...@@ -1176,9 +1179,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
since = READ_ONCE(file->f_wb_err); since = READ_ONCE(file->f_wb_err);
if (verf) if (verf)
nfsd_copy_write_verifier(verf, nn); nfsd_copy_write_verifier(verf, nn);
file_start_write(file);
host_err = vfs_iter_write(file, &iter, &pos, flags); host_err = vfs_iter_write(file, &iter, &pos, flags);
file_end_write(file);
if (host_err < 0) { if (host_err < 0) {
commit_reset_write_verifier(nn, rqstp, host_err); commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr; goto out_nfserr;
......
...@@ -304,6 +304,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -304,6 +304,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret) if (ret)
return ret; return ret;
ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len);
if (ret)
return ret;
if (S_ISFIFO(inode->i_mode)) if (S_ISFIFO(inode->i_mode))
return -ESPIPE; return -ESPIPE;
...@@ -1178,44 +1182,6 @@ struct file *kernel_file_open(const struct path *path, int flags, ...@@ -1178,44 +1182,6 @@ struct file *kernel_file_open(const struct path *path, int flags,
} }
EXPORT_SYMBOL_GPL(kernel_file_open); EXPORT_SYMBOL_GPL(kernel_file_open);
/**
* backing_file_open - open a backing file for kernel internal use
* @user_path: path that the user reuqested to open
* @flags: open flags
* @real_path: path of the backing file
* @cred: credentials for open
*
* Open a backing file for a stackable filesystem (e.g., overlayfs).
* @user_path may be on the stackable filesystem and @real_path on the
* underlying filesystem. In this case, we want to be able to return the
* @user_path of the stackable filesystem. This is done by embedding the
* returned file into a container structure that also stores the stacked
* file's path, which can be retrieved using backing_file_user_path().
*/
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred)
{
struct file *f;
int error;
f = alloc_empty_backing_file(flags, cred);
if (IS_ERR(f))
return f;
path_get(user_path);
*backing_file_user_path(f) = *user_path;
f->f_path = *real_path;
error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
if (error) {
fput(f);
f = ERR_PTR(error);
}
return f;
}
EXPORT_SYMBOL_GPL(backing_file_open);
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
config OVERLAY_FS config OVERLAY_FS
tristate "Overlay filesystem support" tristate "Overlay filesystem support"
select FS_STACK
select EXPORTFS select EXPORTFS
help help
An overlay filesystem combines two filesystems - an 'upper' filesystem An overlay filesystem combines two filesystems - an 'upper' filesystem
......
...@@ -230,6 +230,19 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, ...@@ -230,6 +230,19 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
return ovl_real_fileattr_set(new, &newfa); return ovl_real_fileattr_set(new, &newfa);
} }
static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
{
loff_t tmp;
if (WARN_ON_ONCE(pos != pos2))
return -EIO;
if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
return -EIO;
if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
return -EIO;
return 0;
}
static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
struct file *new_file, loff_t len) struct file *new_file, loff_t len)
{ {
...@@ -244,13 +257,20 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, ...@@ -244,13 +257,20 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
int error = 0; int error = 0;
ovl_path_lowerdata(dentry, &datapath); ovl_path_lowerdata(dentry, &datapath);
if (WARN_ON(datapath.dentry == NULL)) if (WARN_ON_ONCE(datapath.dentry == NULL) ||
WARN_ON_ONCE(len < 0))
return -EIO; return -EIO;
old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY); old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file)) if (IS_ERR(old_file))
return PTR_ERR(old_file); return PTR_ERR(old_file);
error = rw_verify_area(READ, old_file, &old_pos, len);
if (!error)
error = rw_verify_area(WRITE, new_file, &new_pos, len);
if (error)
goto out_fput;
/* Try to use clone_file_range to clone up within the same fs */ /* Try to use clone_file_range to clone up within the same fs */
ovl_start_write(dentry); ovl_start_write(dentry);
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
...@@ -265,7 +285,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, ...@@ -265,7 +285,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
while (len) { while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE; size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes; ssize_t bytes;
if (len < this_len) if (len < this_len)
this_len = len; this_len = len;
...@@ -309,11 +329,13 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, ...@@ -309,11 +329,13 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
} }
} }
ovl_start_write(dentry); error = ovl_verify_area(old_pos, new_pos, this_len, len);
if (error)
break;
bytes = do_splice_direct(old_file, &old_pos, bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos, new_file, &new_pos,
this_len, SPLICE_F_MOVE); this_len, SPLICE_F_MOVE);
ovl_end_write(dentry);
if (bytes <= 0) { if (bytes <= 0) {
error = bytes; error = bytes;
break; break;
......
...@@ -9,25 +9,11 @@ ...@@ -9,25 +9,11 @@
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/splice.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/mm.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/backing-file.h>
#include "overlayfs.h" #include "overlayfs.h"
#include "../internal.h" /* for sb_init_dio_done_wq */
struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
/* used for aio completion */
struct work_struct work;
long res;
};
static struct kmem_cache *ovl_aio_request_cachep;
static char ovl_whatisit(struct inode *inode, struct inode *realinode) static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{ {
if (realinode != ovl_inode_upper(inode)) if (realinode != ovl_inode_upper(inode))
...@@ -274,83 +260,16 @@ static void ovl_file_accessed(struct file *file) ...@@ -274,83 +260,16 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path); touch_atime(&file->f_path);
} }
#define OVL_IOCB_MASK \
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
static rwf_t iocb_to_rw_flags(int flags)
{
return (__force rwf_t)(flags & OVL_IOCB_MASK);
}
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
{
if (refcount_dec_and_test(&aio_req->ref)) {
fput(aio_req->iocb.ki_filp);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
}
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
kiocb_end_write(iocb);
ovl_file_modified(orig_iocb->ki_filp);
}
orig_iocb->ki_pos = iocb->ki_pos;
ovl_aio_put(aio_req);
}
static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res);
}
static void ovl_aio_complete_work(struct work_struct *work)
{
struct ovl_aio_req *aio_req = container_of(work,
struct ovl_aio_req, work);
ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
}
static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
/*
* Punt to a work queue to serialize updates of mtime/size.
*/
aio_req->res = res;
INIT_WORK(&aio_req->work, ovl_aio_complete_work);
queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
&aio_req->work);
}
static int ovl_init_aio_done_wq(struct super_block *sb)
{
if (sb->s_dio_done_wq)
return 0;
return sb_init_dio_done_wq(sb);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct fd real; struct fd real;
const struct cred *old_cred;
ssize_t ret; ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(file_inode(file)->i_sb),
.user_file = file,
.accessed = ovl_file_accessed,
};
if (!iov_iter_count(iter)) if (!iov_iter_count(iter))
return 0; return 0;
...@@ -359,37 +278,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -359,37 +278,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret) if (ret)
return ret; return ret;
ret = -EINVAL; ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
if (iocb->ki_flags & IOCB_DIRECT && &ctx);
!(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
} else {
struct ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
ovl_file_accessed(file);
out_fdput:
fdput(real); fdput(real);
return ret; return ret;
...@@ -400,9 +290,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -400,9 +290,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fd real; struct fd real;
const struct cred *old_cred;
ssize_t ret; ssize_t ret;
int ifl = iocb->ki_flags; int ifl = iocb->ki_flags;
struct backing_file_ctx ctx = {
.cred = ovl_creds(inode->i_sb),
.user_file = file,
.end_write = ovl_file_modified,
};
if (!iov_iter_count(iter)) if (!iov_iter_count(iter))
return 0; return 0;
...@@ -410,19 +304,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -410,19 +304,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
inode_lock(inode); inode_lock(inode);
/* Update mode */ /* Update mode */
ovl_copyattr(inode); ovl_copyattr(inode);
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
ret = ovl_real_fdget(file, &real); ret = ovl_real_fdget(file, &real);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
!(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
if (!ovl_should_sync(OVL_FS(inode->i_sb))) if (!ovl_should_sync(OVL_FS(inode->i_sb)))
ifl &= ~(IOCB_DSYNC | IOCB_SYNC); ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
...@@ -431,42 +317,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -431,42 +317,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
* this property in case it is set by the issuer. * this property in case it is set by the issuer.
*/ */
ifl &= ~IOCB_DIO_CALLER_COMP; ifl &= ~IOCB_DIO_CALLER_COMP;
ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(ifl);
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
file_end_write(real.file);
/* Update size */
ovl_file_modified(file);
} else {
struct ovl_aio_req *aio_req;
ret = ovl_init_aio_done_wq(inode->i_sb);
if (ret)
goto out;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
out_fdput:
fdput(real); fdput(real);
out_unlock: out_unlock:
...@@ -479,20 +330,21 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, ...@@ -479,20 +330,21 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, struct pipe_inode_info *pipe, size_t len,
unsigned int flags) unsigned int flags)
{ {
const struct cred *old_cred;
struct fd real; struct fd real;
ssize_t ret; ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(file_inode(in)->i_sb),
.user_file = in,
.accessed = ovl_file_accessed,
};
ret = ovl_real_fdget(in, &real); ret = ovl_real_fdget(in, &real);
if (ret) if (ret)
return ret; return ret;
old_cred = ovl_override_creds(file_inode(in)->i_sb); ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
revert_creds(old_cred);
ovl_file_accessed(in);
fdput(real); fdput(real);
return ret; return ret;
} }
...@@ -508,30 +360,23 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -508,30 +360,23 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags) loff_t *ppos, size_t len, unsigned int flags)
{ {
struct fd real; struct fd real;
const struct cred *old_cred;
struct inode *inode = file_inode(out); struct inode *inode = file_inode(out);
ssize_t ret; ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(inode->i_sb),
.user_file = out,
.end_write = ovl_file_modified,
};
inode_lock(inode); inode_lock(inode);
/* Update mode */ /* Update mode */
ovl_copyattr(inode); ovl_copyattr(inode);
ret = file_remove_privs(out);
if (ret)
goto out_unlock;
ret = ovl_real_fdget(out, &real); ret = ovl_real_fdget(out, &real);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
old_cred = ovl_override_creds(inode->i_sb); ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
file_start_write(real.file);
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
file_end_write(real.file);
/* Update size */
ovl_file_modified(out);
revert_creds(old_cred);
fdput(real); fdput(real);
out_unlock: out_unlock:
...@@ -569,23 +414,13 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -569,23 +414,13 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
static int ovl_mmap(struct file *file, struct vm_area_struct *vma) static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct file *realfile = file->private_data; struct file *realfile = file->private_data;
const struct cred *old_cred; struct backing_file_ctx ctx = {
int ret; .cred = ovl_creds(file_inode(file)->i_sb),
.user_file = file,
if (!realfile->f_op->mmap) .accessed = ovl_file_accessed,
return -ENODEV; };
if (WARN_ON(file != vma->vm_file)) return backing_file_mmap(realfile, vma, &ctx);
return -EIO;
vma_set_file(vma, realfile);
old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = call_mmap(vma->vm_file, vma);
revert_creds(old_cred);
ovl_file_accessed(file);
return ret;
} }
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
...@@ -778,19 +613,3 @@ const struct file_operations ovl_file_operations = { ...@@ -778,19 +613,3 @@ const struct file_operations ovl_file_operations = {
.copy_file_range = ovl_copy_file_range, .copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range, .remap_file_range = ovl_remap_file_range,
}; };
int __init ovl_aio_request_cache_init(void)
{
ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
sizeof(struct ovl_aio_req),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ovl_aio_request_cachep)
return -ENOMEM;
return 0;
}
void ovl_aio_request_cache_destroy(void)
{
kmem_cache_destroy(ovl_aio_request_cachep);
}
...@@ -425,6 +425,12 @@ int ovl_want_write(struct dentry *dentry); ...@@ -425,6 +425,12 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb); const struct cred *ovl_override_creds(struct super_block *sb);
static inline const struct cred *ovl_creds(struct super_block *sb)
{
return OVL_FS(sb)->creator_cred;
}
int ovl_can_decode_fh(struct super_block *sb); int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb); bool ovl_index_all(struct super_block *sb);
...@@ -837,8 +843,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, ...@@ -837,8 +843,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
/* file.c */ /* file.c */
extern const struct file_operations ovl_file_operations; extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
......
...@@ -1501,14 +1501,10 @@ static int __init ovl_init(void) ...@@ -1501,14 +1501,10 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL) if (ovl_inode_cachep == NULL)
return -ENOMEM; return -ENOMEM;
err = ovl_aio_request_cache_init(); err = register_filesystem(&ovl_fs_type);
if (!err) { if (!err)
err = register_filesystem(&ovl_fs_type); return 0;
if (!err)
return 0;
ovl_aio_request_cache_destroy();
}
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
return err; return err;
...@@ -1524,7 +1520,6 @@ static void __exit ovl_exit(void) ...@@ -1524,7 +1520,6 @@ static void __exit ovl_exit(void)
*/ */
rcu_barrier(); rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
ovl_aio_request_cache_destroy();
} }
module_init(ovl_init); module_init(ovl_init);
......
This diff is collapsed.
...@@ -96,6 +96,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx) ...@@ -96,6 +96,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
if (res) if (res)
goto out; goto out;
res = fsnotify_file_perm(file, MAY_READ);
if (res)
goto out;
res = down_read_killable(&inode->i_rwsem); res = down_read_killable(&inode->i_rwsem);
if (res) if (res)
goto out; goto out;
......
...@@ -102,7 +102,9 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, ...@@ -102,7 +102,9 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len, static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write) bool write)
{ {
int mask = write ? MAY_WRITE : MAY_READ;
loff_t tmp; loff_t tmp;
int ret;
if (unlikely(pos < 0 || len < 0)) if (unlikely(pos < 0 || len < 0))
return -EINVAL; return -EINVAL;
...@@ -110,7 +112,11 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, ...@@ -110,7 +112,11 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
if (unlikely(check_add_overflow(pos, len, &tmp))) if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL; return -EINVAL;
return security_file_permission(file, write ? MAY_WRITE : MAY_READ); ret = security_file_permission(file, mask);
if (ret)
return ret;
return fsnotify_file_area_perm(file, mask, &pos, len);
} }
/* /*
...@@ -385,14 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, ...@@ -385,14 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (!file_in->f_op->remap_file_range) if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP; return -EOPNOTSUPP;
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
ret = remap_verify_area(file_out, pos_out, len, true);
if (ret)
return ret;
ret = file_in->f_op->remap_file_range(file_in, pos_in, ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out, len, remap_flags); file_out, pos_out, len, remap_flags);
if (ret < 0) if (ret < 0)
...@@ -410,6 +408,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, ...@@ -410,6 +408,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
{ {
loff_t ret; loff_t ret;
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
ret = remap_verify_area(file_out, pos_out, len, true);
if (ret)
return ret;
file_start_write(file_out); file_start_write(file_out);
ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
remap_flags); remap_flags);
...@@ -420,7 +426,7 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, ...@@ -420,7 +426,7 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
EXPORT_SYMBOL(vfs_clone_file_range); EXPORT_SYMBOL(vfs_clone_file_range);
/* Check whether we are allowed to dedupe the destination file */ /* Check whether we are allowed to dedupe the destination file */
static bool allow_file_dedupe(struct file *file) static bool may_dedupe_file(struct file *file)
{ {
struct mnt_idmap *idmap = file_mnt_idmap(file); struct mnt_idmap *idmap = file_mnt_idmap(file);
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -445,24 +451,29 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, ...@@ -445,24 +451,29 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
REMAP_FILE_CAN_SHORTEN)); REMAP_FILE_CAN_SHORTEN));
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
/* /*
* This is redundant if called from vfs_dedupe_file_range(), but other * This is redundant if called from vfs_dedupe_file_range(), but other
* callers need it and it's not performance sesitive... * callers need it and it's not performance sesitive...
*/ */
ret = remap_verify_area(src_file, src_pos, len, false); ret = remap_verify_area(src_file, src_pos, len, false);
if (ret) if (ret)
goto out_drop_write; return ret;
ret = remap_verify_area(dst_file, dst_pos, len, true); ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret) if (ret)
goto out_drop_write; return ret;
/*
* This needs to be called after remap_verify_area() because of
* sb_start_write() and before may_dedupe_file() because the mount's
* MAY_WRITE need to be checked with mnt_get_write_access_file() held.
*/
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
ret = -EPERM; ret = -EPERM;
if (!allow_file_dedupe(dst_file)) if (!may_dedupe_file(dst_file))
goto out_drop_write; goto out_drop_write;
ret = -EXDEV; ret = -EXDEV;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/splice.h>
#include <linux/uuid.h> #include <linux/uuid.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <uapi/linux/magic.h> #include <uapi/linux/magic.h>
...@@ -1506,8 +1507,8 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, ...@@ -1506,8 +1507,8 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
free_xid(xid); free_xid(xid);
if (rc == -EOPNOTSUPP || rc == -EXDEV) if (rc == -EOPNOTSUPP || rc == -EXDEV)
rc = generic_copy_file_range(src_file, off, dst_file, rc = splice_copy_file_range(src_file, off, dst_file,
destoff, len, flags); destoff, len);
return rc; return rc;
} }
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common helpers for stackable filesystems and backing files.
*
* Copyright (C) 2023 CTERA Networks.
*/
#ifndef _LINUX_BACKING_FILE_H
#define _LINUX_BACKING_FILE_H
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/fs.h>
struct backing_file_ctx {
const struct cred *cred;
struct file *user_file;
void (*accessed)(struct file *);
void (*end_write)(struct file *);
};
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred);
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx);
int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx);
#endif /* _LINUX_BACKING_FILE_H */
...@@ -1648,9 +1648,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) ...@@ -1648,9 +1648,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
#define __sb_writers_release(sb, lev) \ #define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
/**
* __sb_write_started - check if sb freeze level is held
* @sb: the super we write to
* @level: the freeze level
*
* * > 0 - sb freeze level is held
* * 0 - sb freeze level is not held
* * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
*/
static inline int __sb_write_started(const struct super_block *sb, int level)
{
return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
}
/**
* sb_write_started - check if SB_FREEZE_WRITE is held
* @sb: the super we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
*/
static inline bool sb_write_started(const struct super_block *sb) static inline bool sb_write_started(const struct super_block *sb)
{ {
return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); return __sb_write_started(sb, SB_FREEZE_WRITE);
}
/**
* sb_write_not_started - check if SB_FREEZE_WRITE is not held
* @sb: the super we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
*/
static inline bool sb_write_not_started(const struct super_block *sb)
{
return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
}
/**
* file_write_started - check if SB_FREEZE_WRITE is held
* @file: the file we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
* May be false positive with !S_ISREG, because file_start_write() has
* no effect on !S_ISREG.
*/
static inline bool file_write_started(const struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return true;
return sb_write_started(file_inode(file)->i_sb);
}
/**
* file_write_not_started - check if SB_FREEZE_WRITE is not held
* @file: the file we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
* May be false positive with !S_ISREG, because file_start_write() has
* no effect on !S_ISREG.
*/
static inline bool file_write_not_started(const struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return true;
return sb_write_not_started(file_inode(file)->i_sb);
} }
/** /**
...@@ -2032,9 +2093,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); ...@@ -2032,9 +2093,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int); loff_t, size_t, unsigned int);
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags);
int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, struct file *file_out, loff_t pos_out,
loff_t *len, unsigned int remap_flags, loff_t *len, unsigned int remap_flags,
...@@ -2535,9 +2593,6 @@ struct file *dentry_open(const struct path *path, int flags, ...@@ -2535,9 +2593,6 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds); const struct cred *creds);
struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred); const struct cred *cred);
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred);
struct path *backing_file_user_path(struct file *f); struct path *backing_file_user_path(struct file *f);
/* /*
...@@ -3017,8 +3072,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, ...@@ -3017,8 +3072,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
size_t len, unsigned int flags); size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *, extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int); struct file *, loff_t *, size_t, unsigned int);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags);
extern void extern void
......
...@@ -100,29 +100,49 @@ static inline int fsnotify_file(struct file *file, __u32 mask) ...@@ -100,29 +100,49 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
} }
/* Simple call site for access decisions */ /*
static inline int fsnotify_perm(struct file *file, int mask) * fsnotify_file_area_perm - permission hook before access to file range
*/
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
const loff_t *ppos, size_t count)
{ {
int ret; __u32 fsnotify_mask = FS_ACCESS_PERM;
__u32 fsnotify_mask = 0;
/*
* filesystem may be modified in the context of permission events
* (e.g. by HSM filling a file on access), so sb freeze protection
* must not be held.
*/
lockdep_assert_once(file_write_not_started(file));
if (!(mask & (MAY_READ | MAY_OPEN))) if (!(perm_mask & MAY_READ))
return 0; return 0;
if (mask & MAY_OPEN) { return fsnotify_file(file, fsnotify_mask);
fsnotify_mask = FS_OPEN_PERM; }
/*
* fsnotify_file_perm - permission hook before file access
*/
static inline int fsnotify_file_perm(struct file *file, int perm_mask)
{
return fsnotify_file_area_perm(file, perm_mask, NULL, 0);
}
if (file->f_flags & __FMODE_EXEC) { /*
ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); * fsnotify_open_perm - permission hook before file open
*/
static inline int fsnotify_open_perm(struct file *file)
{
int ret;
if (ret) if (file->f_flags & __FMODE_EXEC) {
return ret; ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
} if (ret)
} else if (mask & MAY_READ) { return ret;
fsnotify_mask = FS_ACCESS_PERM;
} }
return fsnotify_file(file, fsnotify_mask); return fsnotify_file(file, FS_OPEN_PERM);
} }
/* /*
......
...@@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, ...@@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
typedef int (splice_direct_actor)(struct pipe_inode_info *, typedef int (splice_direct_actor)(struct pipe_inode_info *,
struct splice_desc *); struct splice_desc *);
extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
loff_t *, size_t, unsigned int, loff_t *ppos, size_t len, unsigned int flags,
splice_actor *); splice_actor *actor);
extern ssize_t __splice_from_pipe(struct pipe_inode_info *, ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
struct splice_desc *, splice_actor *); struct splice_desc *sd, splice_actor *actor);
extern ssize_t splice_to_pipe(struct pipe_inode_info *, ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *); struct splice_pipe_desc *spd);
extern ssize_t add_to_pipe(struct pipe_inode_info *, ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf);
struct pipe_buffer *); ssize_t vfs_splice_read(struct file *in, loff_t *ppos,
long vfs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len,
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
unsigned int flags); ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd,
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *actor);
splice_direct_actor *); ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out,
extern long do_splice(struct file *in, loff_t *off_in, loff_t *off_out, size_t len, unsigned int flags);
struct file *out, loff_t *off_out, ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
size_t len, unsigned int flags); loff_t *opos, size_t len, unsigned int flags);
ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len);
extern long do_tee(struct file *in, struct file *out, size_t len, static inline long splice_copy_file_range(struct file *in, loff_t pos_in,
unsigned int flags); struct file *out, loff_t pos_out,
extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, size_t len)
loff_t *ppos, size_t len, unsigned int flags); {
return splice_file_range(in, &pos_in, out, &pos_out, len);
}
ssize_t do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags);
/* /*
* for dynamic pipe sizing * for dynamic pipe sizing
......
...@@ -51,7 +51,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags) ...@@ -51,7 +51,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags)
struct file *out = sp->file_out; struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
struct file *in; struct file *in;
long ret = 0; ssize_t ret = 0;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
...@@ -92,7 +92,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags) ...@@ -92,7 +92,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags)
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out; loff_t *poff_in, *poff_out;
struct file *in; struct file *in;
long ret = 0; ssize_t ret = 0;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
......
...@@ -2580,13 +2580,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir, ...@@ -2580,13 +2580,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
*/ */
int security_file_permission(struct file *file, int mask) int security_file_permission(struct file *file, int mask)
{ {
int ret; return call_int_hook(file_permission, 0, file, mask);
ret = call_int_hook(file_permission, 0, file, mask);
if (ret)
return ret;
return fsnotify_perm(file, mask);
} }
/** /**
...@@ -2837,7 +2831,7 @@ int security_file_open(struct file *file) ...@@ -2837,7 +2831,7 @@ int security_file_open(struct file *file)
if (ret) if (ret)
return ret; return ret;
return fsnotify_perm(file, MAY_OPEN); return fsnotify_open_perm(file);
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment