Commit 9ec1efbf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Fix a page locking bug in write (introduced in 2.6.26)

 - Allow sgid bit to be killed in setacl()

 - Miscellaneous fixes and cleanups

* tag 'fuse-update-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  cuse: simplify refcount
  cuse: prevent clone
  virtiofs: fix userns
  virtiofs: remove useless function
  virtiofs: split requests that exceed virtqueue size
  virtiofs: fix memory leak in virtio_fs_probe()
  fuse: invalidate attrs when page writeback completes
  fuse: add a flag FUSE_SETXATTR_ACL_KILL_SGID to kill SGID
  fuse: extend FUSE_SETXATTR request
  fuse: fix matching of FUSE_DEV_IOC_CLONE command
  fuse: fix a typo
  fuse: don't zero pages twice
  fuse: fix typo for fuse_conn.max_pages comment
  fuse: fix write deadlock
parents d652502e 3c9c1433
......@@ -71,6 +71,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return -EINVAL;
if (acl) {
unsigned int extra_flags = 0;
/*
* Fuse userspace is responsible for updating access
* permissions in the inode, if needed. fuse_setxattr
......@@ -94,7 +95,11 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return ret;
}
ret = fuse_setxattr(inode, name, value, size, 0);
if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
!capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
kfree(value);
} else {
ret = fuse_removexattr(inode, name);
......
......@@ -511,20 +511,18 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);
cc->fc.release = cuse_fc_release;
fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) {
kfree(cc);
fuse_conn_put(&cc->fc);
if (!fud)
return -ENOMEM;
}
INIT_LIST_HEAD(&cc->list);
cc->fc.release = cuse_fc_release;
cc->fc.initialized = 1;
rc = cuse_send_init(cc);
if (rc) {
fuse_dev_free(fud);
fuse_conn_put(&cc->fc);
return rc;
}
file->private_data = fud;
......@@ -561,8 +559,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
unregister_chrdev_region(cc->cdev->dev, 1);
cdev_del(cc->cdev);
}
/* Base reference is now owned by "fud" */
fuse_conn_put(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
......@@ -627,6 +623,8 @@ static int __init cuse_init(void)
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
/* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
......
......@@ -2233,11 +2233,8 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
int oldfd;
struct fuse_dev *fud = NULL;
if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC)
return -ENOTTY;
switch (_IOC_NR(cmd)) {
case _IOC_NR(FUSE_DEV_IOC_CLONE):
switch (cmd) {
case FUSE_DEV_IOC_CLONE:
res = -EFAULT;
if (!get_user(oldfd, (__u32 __user *)arg)) {
struct file *old = fget(oldfd);
......
......@@ -802,21 +802,12 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
{
struct fuse_conn *fc = get_fuse_conn(inode);
if (fc->writeback_cache) {
/*
* A hole in a file. Some data after the hole are in page cache,
* but have not reached the client fs yet. So, the hole is not
* present there.
*/
int i;
int start_idx = num_read >> PAGE_SHIFT;
size_t off = num_read & (PAGE_SIZE - 1);
for (i = start_idx; i < ap->num_pages; i++) {
zero_user_segment(ap->pages[i], off, PAGE_SIZE);
off = 0;
}
} else {
/*
* If writeback_cache is enabled, a short read means there's a hole in
* the file. Some data after the hole is in page cache, but has not
* reached the client fs yet. So the hole is not present there.
*/
if (!fc->writeback_cache) {
loff_t pos = page_offset(ap->pages[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
......@@ -1103,6 +1094,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
unsigned int offset, i;
bool short_write;
int err;
for (i = 0; i < ap->num_pages; i++)
......@@ -1117,32 +1109,38 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
if (!err && ia->write.out.size > count)
err = -EIO;
short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
if (!err && !offset && count >= PAGE_SIZE)
SetPageUptodate(page);
if (count > PAGE_SIZE - offset)
count -= PAGE_SIZE - offset;
else
count = 0;
offset = 0;
unlock_page(page);
if (err) {
ClearPageUptodate(page);
} else {
if (count >= PAGE_SIZE - offset)
count -= PAGE_SIZE - offset;
else {
if (short_write)
ClearPageUptodate(page);
count = 0;
}
offset = 0;
}
if (ia->write.page_locked && (i == ap->num_pages - 1))
unlock_page(page);
put_page(page);
}
return err;
}
static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
unsigned int max_pages)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
......@@ -1195,6 +1193,16 @@ static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
if (offset == PAGE_SIZE)
offset = 0;
/* If we copied full page, mark it uptodate */
if (tmp == PAGE_SIZE)
SetPageUptodate(page);
if (PageUptodate(page)) {
unlock_page(page);
} else {
ia->write.page_locked = true;
break;
}
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
......@@ -1238,7 +1246,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
break;
}
count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
if (count <= 0) {
err = count;
} else {
......@@ -1753,8 +1761,17 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
container_of(args, typeof(*wpa), ia.ap.args);
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
mapping_set_error(inode->i_mapping, error);
/*
* A writeback finished and this might have updated mtime/ctime on
* server making local mtime/ctime stale. Hence invalidate attrs.
* Do this only if writeback_cache is not enabled. If writeback_cache
* is enabled, we trust local ctime/mtime.
*/
if (!fc->writeback_cache)
fuse_invalidate_attr(inode);
spin_lock(&fi->lock);
rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) {
......
......@@ -552,9 +552,12 @@ struct fuse_conn {
/** Maximum write size */
unsigned max_write;
/** Maxmum number of pages that can be used in a single request */
/** Maximum number of pages that can be used in a single request */
unsigned int max_pages;
/** Constrain ->max_pages to this value during feature negotiation */
unsigned int max_pages_limit;
/** Input queue */
struct fuse_iqueue iq;
......@@ -668,6 +671,9 @@ struct fuse_conn {
/** Is setxattr not implemented by fs? */
unsigned no_setxattr:1;
/** Does file server support extended setxattr */
unsigned setxattr_ext:1;
/** Is getxattr not implemented by fs? */
unsigned no_getxattr:1;
......@@ -713,7 +719,7 @@ struct fuse_conn {
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
/** Filesystem is fully reponsible for page cache invalidation. */
/** Filesystem is fully responsible for page cache invalidation. */
unsigned explicit_inval_data:1;
/** Does the filesystem support readdirplus? */
......@@ -934,6 +940,7 @@ struct fuse_io_args {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
bool page_locked;
} write;
};
struct fuse_args_pages ap;
......@@ -1193,7 +1200,7 @@ void fuse_unlock_inode(struct inode *inode, bool locked);
bool fuse_lock_inode(struct inode *inode);
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags);
size_t size, int flags, unsigned int extra_flags);
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size);
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
......
......@@ -712,6 +712,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
INIT_LIST_HEAD(&fc->mounts);
list_add(&fm->fc_entry, &fc->mounts);
......@@ -1040,7 +1041,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
min_t(unsigned int, fc->max_pages_limit,
max_t(unsigned int, arg->max_pages, 1));
}
if (IS_ENABLED(CONFIG_FUSE_DAX) &&
......@@ -1052,6 +1053,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->handle_killpriv_v2 = 1;
fm->sb->s_flags |= SB_NOSEC;
}
if (arg->flags & FUSE_SETXATTR_EXT)
fc->setxattr_ext = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
......@@ -1095,7 +1098,7 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
FUSE_HANDLE_KILLPRIV_V2;
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
ia->in.flags |= FUSE_MAP_ALIGNMENT;
......
......@@ -18,6 +18,12 @@
#include <linux/uio.h>
#include "fuse_i.h"
/* Used to help calculate the FUSE connection's max_pages limit for a request's
* size. Parts of the struct fuse_req are sliced into scattergather lists in
* addition to the pages used, so this can help account for that overhead.
*/
#define FUSE_HEADER_OVERHEAD 4
/* List of virtio-fs device instances and a lock for the list. Also provides
* mutual exclusion in device removal and mounting path
*/
......@@ -127,11 +133,6 @@ static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
return &fs->vqs[vq->index];
}
static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
{
return &vq_to_fsvq(vq)->fud->pq;
}
/* Should be called with fsvq->lock held. */
static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
{
......@@ -896,6 +897,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
out_vqs:
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
kfree(fs->vqs);
out:
vdev->priv = NULL;
......@@ -1413,9 +1415,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
{
struct virtio_fs *fs;
struct super_block *sb;
struct fuse_conn *fc;
struct fuse_conn *fc = NULL;
struct fuse_mount *fm;
int err;
unsigned int virtqueue_size;
int err = -EIO;
/* This gets a reference on virtio_fs object. This ptr gets installed
* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
......@@ -1427,6 +1430,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
return -EINVAL;
}
virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
goto out_err;
err = -ENOMEM;
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
if (!fc)
......@@ -1436,12 +1443,15 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
if (!fm)
goto out_err;
fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
&virtio_fs_fiq_ops, fs);
fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
fc->release = fuse_free_conn;
fc->delete_stale = true;
fc->auto_submounts = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
virtqueue_size - FUSE_HEADER_OVERHEAD);
fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
if (fsc->s_fs_info) {
......
......@@ -12,7 +12,7 @@
#include <linux/posix_acl_xattr.h>
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags)
size_t size, int flags, unsigned int extra_flags)
{
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
......@@ -25,10 +25,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
inarg.setxattr_flags = extra_flags;
args.opcode = FUSE_SETXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 3;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].size = fm->fc->setxattr_ext ?
sizeof(inarg) : FUSE_COMPAT_SETXATTR_IN_SIZE;
args.in_args[0].value = &inarg;
args.in_args[1].size = strlen(name) + 1;
args.in_args[1].value = name;
......@@ -199,7 +202,7 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
if (!value)
return fuse_removexattr(inode, name);
return fuse_setxattr(inode, name, value, size, flags);
return fuse_setxattr(inode, name, value, size, flags, 0);
}
static bool no_xattr_list(struct dentry *dentry)
......
......@@ -179,6 +179,8 @@
* 7.33
* - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
* - add FUSE_OPEN_KILL_SUIDGID
* - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
* - add FUSE_SETXATTR_ACL_KILL_SGID
*/
#ifndef _LINUX_FUSE_H
......@@ -330,6 +332,7 @@ struct fuse_file_lock {
* does not have CAP_FSETID. Additionally upon
* write/truncate sgid is killed only if file has group
* execute permission. (Same as Linux VFS behavior).
* FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
......@@ -360,6 +363,7 @@ struct fuse_file_lock {
#define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_SUBMOUNTS (1 << 27)
#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
#define FUSE_SETXATTR_EXT (1 << 29)
/**
* CUSE INIT request/reply flags
......@@ -451,6 +455,12 @@ struct fuse_file_lock {
*/
#define FUSE_OPEN_KILL_SUIDGID (1 << 0)
/**
* setxattr flags
* FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
*/
#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0)
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
......@@ -681,9 +691,13 @@ struct fuse_fsync_in {
uint32_t padding;
};
#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
struct fuse_setxattr_in {
uint32_t size;
uint32_t flags;
uint32_t setxattr_flags;
uint32_t padding;
};
struct fuse_getxattr_in {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment