Commit 7b1373dd authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Continue separating the transport (user/kernel communication) and the
   filesystem layers of fuse. Getting rid of most layering violations
   will allow for easier cleanup and optimization later on.

 - Prepare for the addition of the virtio-fs filesystem. The actual
   filesystem will be introduced by a separate pull request.

 - Convert to new mount API.

 - Various fixes, optimizations and cleanups.

* tag 'fuse-update-5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (55 commits)
  fuse: Make fuse_args_to_req static
  fuse: fix memleak in cuse_channel_open
  fuse: fix beyond-end-of-page access in fuse_parse_cache()
  fuse: unexport fuse_put_request
  fuse: kmemcg account fs data
  fuse: on 64-bit store time in d_fsdata directly
  fuse: fix missing unlock_page in fuse_writepage()
  fuse: reserve byteswapped init opcodes
  fuse: allow skipping control interface and forced unmount
  fuse: dissociate DESTROY from fuseblk
  fuse: delete dentry if timeout is zero
  fuse: separate fuse device allocation and installation in fuse_conn
  fuse: add fuse_iqueue_ops callbacks
  fuse: extract fuse_fill_super_common()
  fuse: export fuse_dequeue_forget() function
  fuse: export fuse_get_unique()
  fuse: export fuse_send_init_request()
  fuse: export fuse_len_args()
  fuse: export fuse_end_request()
  fuse: fix request limit
  ...
parents 301310c6 5addcd5d
......@@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc)
put_net(fc->net_ns);
put_user_ns(fc->user_ns);
put_cred(fc->cred);
kfree(fc->subtype);
put_fc_log(fc);
put_filesystem(fc->fs_type);
kfree(fc->source);
......@@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}
if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
strcmp(param->key, "subtype") == 0) {
if (param->type != fs_value_is_string)
return invalf(fc, "VFS: Legacy: Non-string subtype");
if (fc->subtype)
return invalf(fc, "VFS: Legacy: Multiple subtype");
fc->subtype = param->string;
param->string = NULL;
return 0;
}
if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
......@@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc)
fc->s_fs_info = NULL;
fc->sb_flags = 0;
security_free_mnt_opts(&fc->security);
kfree(fc->subtype);
fc->subtype = NULL;
kfree(fc->source);
fc->source = NULL;
......
......@@ -142,11 +142,10 @@ static int cuse_open(struct inode *inode, struct file *file)
static int cuse_release(struct inode *inode, struct file *file)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
fuse_sync_release(fi, ff, file->f_flags);
fuse_sync_release(NULL, ff, file->f_flags);
fuse_conn_put(fc);
return 0;
......@@ -299,6 +298,14 @@ static void cuse_gendev_release(struct device *dev)
kfree(dev);
}
struct cuse_init_args {
struct fuse_args_pages ap;
struct cuse_init_in in;
struct cuse_init_out out;
struct page *page;
struct fuse_page_desc desc;
};
/**
* cuse_process_init_reply - finish initializing CUSE channel
*
......@@ -306,21 +313,22 @@ static void cuse_gendev_release(struct device *dev)
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
*/
static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
static void cuse_process_init_reply(struct fuse_conn *fc,
struct fuse_args *args, int error)
{
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = req->out.args[0].value;
struct page *page = req->pages[0];
struct cuse_init_out *arg = &ia->out;
struct page *page = ap->pages[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
dev_t devt;
int rc, i;
if (req->out.h.error ||
arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11)
goto err;
}
fc->minor = arg->minor;
fc->max_read = max_t(unsigned, arg->max_read, 4096);
......@@ -329,7 +337,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
/* parse init reply */
cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
&devinfo);
if (rc)
goto err;
......@@ -396,7 +404,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
dev_set_uevent_suppress(dev, 0);
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
kfree(arg);
kfree(ia);
__free_page(page);
return;
......@@ -415,55 +423,49 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct fuse_req *req;
struct page *page;
struct fuse_conn *fc = &cc->fc;
struct cuse_init_in *arg;
void *outarg;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
req = fuse_get_req_for_background(fc, 1);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
goto err;
}
rc = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_put_req;
goto err;
outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
if (!outarg)
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (!ia)
goto err_free_page;
arg = &req->misc.cuse_init_in;
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->flags |= CUSE_UNRESTRICTED_IOCTL;
req->in.h.opcode = CUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct cuse_init_in);
req->in.args[0].value = arg;
req->out.numargs = 2;
req->out.args[0].size = sizeof(struct cuse_init_out);
req->out.args[0].value = outarg;
req->out.args[1].size = CUSE_INIT_INFO_MAX;
req->out.argvar = 1;
req->out.argpages = 1;
req->pages[0] = page;
req->page_descs[0].length = req->out.args[1].size;
req->num_pages = 1;
req->end = cuse_process_init_reply;
fuse_request_send_background(fc, req);
return 0;
ap = &ia->ap;
ia->in.major = FUSE_KERNEL_VERSION;
ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
ia->in.flags |= CUSE_UNRESTRICTED_IOCTL;
ap->args.opcode = CUSE_INIT;
ap->args.in_numargs = 1;
ap->args.in_args[0].size = sizeof(ia->in);
ap->args.in_args[0].value = &ia->in;
ap->args.out_numargs = 2;
ap->args.out_args[0].size = sizeof(ia->out);
ap->args.out_args[0].value = &ia->out;
ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
ap->args.out_argvar = 1;
ap->args.out_pages = 1;
ap->num_pages = 1;
ap->pages = &ia->page;
ap->descs = &ia->desc;
ia->page = page;
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
__free_page(page);
err_put_req:
fuse_put_request(fc, req);
}
err:
return rc;
}
......@@ -504,9 +506,9 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
fuse_conn_init(&cc->fc, file->f_cred->user_ns);
fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
fud = fuse_dev_alloc(&cc->fc);
fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) {
kfree(cc);
return -ENOMEM;
......@@ -519,6 +521,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
rc = cuse_send_init(cc);
if (rc) {
fuse_dev_free(fud);
fuse_conn_put(&cc->fc);
return rc;
}
file->private_data = fud;
......
......@@ -40,107 +40,30 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
return READ_ONCE(file->private_data);
}
static void fuse_request_init(struct fuse_req *req, struct page **pages,
struct fuse_page_desc *page_descs,
unsigned npages)
static void fuse_request_init(struct fuse_req *req)
{
INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
refcount_set(&req->count, 1);
req->pages = pages;
req->page_descs = page_descs;
req->max_pages = npages;
__set_bit(FR_PENDING, &req->flags);
}
static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
{
struct page **pages;
pages = kzalloc(npages * (sizeof(struct page *) +
sizeof(struct fuse_page_desc)), flags);
*desc = (void *) pages + npages * sizeof(struct page *);
return pages;
}
static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
static struct fuse_req *fuse_request_alloc(gfp_t flags)
{
struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
if (req) {
struct page **pages = NULL;
struct fuse_page_desc *page_descs = NULL;
WARN_ON(npages > FUSE_MAX_MAX_PAGES);
if (npages > FUSE_REQ_INLINE_PAGES) {
pages = fuse_req_pages_alloc(npages, flags,
&page_descs);
if (!pages) {
kmem_cache_free(fuse_req_cachep, req);
return NULL;
}
} else if (npages) {
pages = req->inline_pages;
page_descs = req->inline_page_descs;
}
if (req)
fuse_request_init(req);
fuse_request_init(req, pages, page_descs, npages);
}
return req;
}
struct fuse_req *fuse_request_alloc(unsigned npages)
{
return __fuse_request_alloc(npages, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(fuse_request_alloc);
struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
{
return __fuse_request_alloc(npages, GFP_NOFS);
}
static void fuse_req_pages_free(struct fuse_req *req)
{
if (req->pages != req->inline_pages)
kfree(req->pages);
}
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags)
{
struct page **pages;
struct fuse_page_desc *page_descs;
unsigned int npages = min_t(unsigned int,
max_t(unsigned int, req->max_pages * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
WARN_ON(npages <= req->max_pages);
pages = fuse_req_pages_alloc(npages, flags, &page_descs);
if (!pages)
return false;
memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
memcpy(page_descs, req->page_descs,
sizeof(struct fuse_page_desc) * req->max_pages);
fuse_req_pages_free(req);
req->pages = pages;
req->page_descs = page_descs;
req->max_pages = npages;
return true;
}
void fuse_request_free(struct fuse_req *req)
static void fuse_request_free(struct fuse_req *req)
{
fuse_req_pages_free(req);
kmem_cache_free(fuse_req_cachep, req);
}
void __fuse_get_request(struct fuse_req *req)
static void __fuse_get_request(struct fuse_req *req)
{
refcount_inc(&req->count);
}
......@@ -177,8 +100,9 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
}
}
static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
bool for_background)
static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
{
struct fuse_req *req;
int err;
......@@ -201,7 +125,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
if (fc->conn_error)
goto out;
req = fuse_request_alloc(npages);
req = fuse_request_alloc(GFP_KERNEL);
err = -ENOMEM;
if (!req) {
if (for_background)
......@@ -229,101 +153,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
return ERR_PTR(err);
}
struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
{
return __fuse_get_req(fc, npages, false);
}
EXPORT_SYMBOL_GPL(fuse_get_req);
struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
unsigned npages)
{
return __fuse_get_req(fc, npages, true);
}
EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
/*
* Return request in fuse_file->reserved_req. However that may
* currently be in use. If that is the case, wait for it to become
* available.
*/
static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
struct file *file)
{
struct fuse_req *req = NULL;
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
struct fuse_file *ff = file->private_data;
do {
wait_event(fc->reserved_req_waitq, ff->reserved_req);
spin_lock(&fi->lock);
if (ff->reserved_req) {
req = ff->reserved_req;
ff->reserved_req = NULL;
req->stolen_file = get_file(file);
}
spin_unlock(&fi->lock);
} while (!req);
return req;
}
/*
* Put stolen request back into fuse_file->reserved_req
*/
static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
{
struct file *file = req->stolen_file;
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
struct fuse_file *ff = file->private_data;
WARN_ON(req->max_pages);
spin_lock(&fi->lock);
memset(req, 0, sizeof(*req));
fuse_request_init(req, NULL, NULL, 0);
BUG_ON(ff->reserved_req);
ff->reserved_req = req;
wake_up_all(&fc->reserved_req_waitq);
spin_unlock(&fi->lock);
fput(file);
}
/*
* Gets a requests for a file operation, always succeeds
*
* This is used for sending the FLUSH request, which must get to
* userspace, due to POSIX locks which may need to be unlocked.
*
* If allocation fails due to OOM, use the reserved request in
* fuse_file.
*
* This is very unlikely to deadlock accidentally, since the
* filesystem should not have it's own file open. If deadlock is
* intentional, it can still be broken by "aborting" the filesystem.
*/
struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
struct file *file)
{
struct fuse_req *req;
atomic_inc(&fc->num_waiting);
wait_event(fc->blocked_waitq, fc->initialized);
/* Matches smp_wmb() in fuse_set_initialized() */
smp_rmb();
req = fuse_request_alloc(0);
if (!req)
req = get_reserved_req(fc, file);
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
__set_bit(FR_WAITING, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
}
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
if (refcount_dec_and_test(&req->count)) {
if (test_bit(FR_BACKGROUND, &req->flags)) {
......@@ -342,15 +172,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
fuse_drop_waiting(fc);
}
if (req->stolen_file)
put_reserved_req(fc, req);
else
fuse_request_free(req);
}
}
EXPORT_SYMBOL_GPL(fuse_put_request);
static unsigned len_args(unsigned numargs, struct fuse_arg *args)
unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
{
unsigned nbytes = 0;
unsigned i;
......@@ -360,25 +186,47 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
return nbytes;
}
EXPORT_SYMBOL_GPL(fuse_len_args);
static u64 fuse_get_unique(struct fuse_iqueue *fiq)
u64 fuse_get_unique(struct fuse_iqueue *fiq)
{
fiq->reqctr += FUSE_REQ_ID_STEP;
return fiq->reqctr;
}
EXPORT_SYMBOL_GPL(fuse_get_unique);
static unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
}
static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
/**
* A new request is available, wake fiq->waitq
*/
static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
__releases(fiq->lock)
{
wake_up(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
spin_unlock(&fiq->lock);
}
const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
.wake_forget_and_unlock = fuse_dev_wake_and_unlock,
.wake_interrupt_and_unlock = fuse_dev_wake_and_unlock,
.wake_pending_and_unlock = fuse_dev_wake_and_unlock,
};
EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
static void queue_request_and_unlock(struct fuse_iqueue *fiq,
struct fuse_req *req)
__releases(fiq->lock)
{
req->in.h.len = sizeof(struct fuse_in_header) +
len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
list_add_tail(&req->list, &fiq->pending);
wake_up_locked(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
fiq->ops->wake_pending_and_unlock(fiq);
}
void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
......@@ -389,16 +237,15 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
forget->forget_one.nodeid = nodeid;
forget->forget_one.nlookup = nlookup;
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
if (fiq->connected) {
fiq->forget_list_tail->next = forget;
fiq->forget_list_tail = forget;
wake_up_locked(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
fiq->ops->wake_forget_and_unlock(fiq);
} else {
kfree(forget);
spin_unlock(&fiq->lock);
}
spin_unlock(&fiq->waitq.lock);
}
static void flush_bg_queue(struct fuse_conn *fc)
......@@ -412,10 +259,9 @@ static void flush_bg_queue(struct fuse_conn *fc)
req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
list_del(&req->list);
fc->active_background++;
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
req->in.h.unique = fuse_get_unique(fiq);
queue_request(fiq, req);
spin_unlock(&fiq->waitq.lock);
queue_request_and_unlock(fiq, req);
}
}
......@@ -427,9 +273,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
* the 'end' callback is called if given, else the reference to the
* request is released
*/
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
bool async = req->args->end;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
......@@ -439,9 +286,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
* smp_mb() from queue_interrupt().
*/
if (!list_empty(&req->intr_entry)) {
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
list_del_init(&req->intr_entry);
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
}
WARN_ON(test_bit(FR_PENDING, &req->flags));
WARN_ON(test_bit(FR_SENT, &req->flags));
......@@ -475,18 +322,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&req->waitq);
}
if (req->end)
req->end(fc, req);
if (async)
req->args->end(fc, req->args, req->out.h.error);
put_request:
fuse_put_request(fc, req);
}
EXPORT_SYMBOL_GPL(fuse_request_end);
static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
/* Check for we've sent request to interrupt this req */
if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
return -EINVAL;
}
......@@ -499,13 +347,13 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
list_del_init(&req->intr_entry);
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
return 0;
}
wake_up_locked(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
fiq->ops->wake_interrupt_and_unlock(fiq);
} else {
spin_unlock(&fiq->lock);
}
spin_unlock(&fiq->waitq.lock);
return 0;
}
......@@ -535,16 +383,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
if (!err)
return;
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
/* Request is not yet in userspace, bail out */
if (test_bit(FR_PENDING, &req->flags)) {
list_del(&req->list);
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
__fuse_put_request(req);
req->out.h.error = -EINTR;
return;
}
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
}
/*
......@@ -559,101 +407,110 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_iqueue *fiq = &fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
if (!fiq->connected) {
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
req->out.h.error = -ENOTCONN;
} else {
req->in.h.unique = fuse_get_unique(fiq);
queue_request(fiq, req);
/* acquire extra reference, since request is still needed
after request_end() */
after fuse_request_end() */
__fuse_get_request(req);
spin_unlock(&fiq->waitq.lock);
queue_request_and_unlock(fiq, req);
request_wait_answer(fc, req);
/* Pairs with smp_wmb() in request_end() */
/* Pairs with smp_wmb() in fuse_request_end() */
smp_rmb();
}
}
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
{
__set_bit(FR_ISREPLY, &req->flags);
if (!test_bit(FR_WAITING, &req->flags)) {
__set_bit(FR_WAITING, &req->flags);
atomic_inc(&fc->num_waiting);
}
__fuse_request_send(fc, req);
}
EXPORT_SYMBOL_GPL(fuse_request_send);
static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
{
if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
if (fc->minor < 4 && args->opcode == FUSE_STATFS)
args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
if (fc->minor < 9) {
switch (args->in.h.opcode) {
switch (args->opcode) {
case FUSE_LOOKUP:
case FUSE_CREATE:
case FUSE_MKNOD:
case FUSE_MKDIR:
case FUSE_SYMLINK:
case FUSE_LINK:
args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
break;
case FUSE_GETATTR:
case FUSE_SETATTR:
args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
break;
}
}
if (fc->minor < 12) {
switch (args->in.h.opcode) {
switch (args->opcode) {
case FUSE_CREATE:
args->in.args[0].size = sizeof(struct fuse_open_in);
args->in_args[0].size = sizeof(struct fuse_open_in);
break;
case FUSE_MKNOD:
args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
break;
}
}
}
static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req)
{
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
}
static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
{
req->in.h.opcode = args->opcode;
req->in.h.nodeid = args->nodeid;
req->args = args;
}
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
{
struct fuse_req *req;
ssize_t ret;
req = fuse_get_req(fc, 0);
if (args->force) {
atomic_inc(&fc->num_waiting);
req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL);
if (!args->nocreds)
fuse_force_creds(fc, req);
__set_bit(FR_WAITING, &req->flags);
__set_bit(FR_FORCE, &req->flags);
} else {
WARN_ON(args->nocreds);
req = fuse_get_req(fc, false);
if (IS_ERR(req))
return PTR_ERR(req);
}
/* Needs to be done after fuse_get_req() so that fc->minor is valid */
fuse_adjust_compat(fc, args);
fuse_args_to_req(req, args);
req->in.h.opcode = args->in.h.opcode;
req->in.h.nodeid = args->in.h.nodeid;
req->in.numargs = args->in.numargs;
memcpy(req->in.args, args->in.args,
args->in.numargs * sizeof(struct fuse_in_arg));
req->out.argvar = args->out.argvar;
req->out.numargs = args->out.numargs;
memcpy(req->out.args, args->out.args,
args->out.numargs * sizeof(struct fuse_arg));
fuse_request_send(fc, req);
if (!args->noreply)
__set_bit(FR_ISREPLY, &req->flags);
__fuse_request_send(fc, req);
ret = req->out.h.error;
if (!ret && args->out.argvar) {
BUG_ON(args->out.numargs != 1);
ret = req->out.args[0].size;
if (!ret && args->out_argvar) {
BUG_ON(args->out_numargs == 0);
ret = args->out_args[args->out_numargs - 1].size;
}
fuse_put_request(fc, req);
return ret;
}
bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
static bool fuse_request_queue_background(struct fuse_conn *fc,
struct fuse_req *req)
{
bool queued = false;
......@@ -681,56 +538,63 @@ bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
return queued;
}
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
gfp_t gfp_flags)
{
WARN_ON(!req->end);
struct fuse_req *req;
if (args->force) {
WARN_ON(!args->nocreds);
req = fuse_request_alloc(gfp_flags);
if (!req)
return -ENOMEM;
__set_bit(FR_BACKGROUND, &req->flags);
} else {
WARN_ON(args->nocreds);
req = fuse_get_req(fc, true);
if (IS_ERR(req))
return PTR_ERR(req);
}
fuse_args_to_req(req, args);
if (!fuse_request_queue_background(fc, req)) {
req->out.h.error = -ENOTCONN;
req->end(fc, req);
fuse_put_request(fc, req);
return -ENOTCONN;
}
return 0;
}
EXPORT_SYMBOL_GPL(fuse_request_send_background);
EXPORT_SYMBOL_GPL(fuse_simple_background);
static int fuse_request_send_notify_reply(struct fuse_conn *fc,
struct fuse_req *req, u64 unique)
static int fuse_simple_notify_reply(struct fuse_conn *fc,
struct fuse_args *args, u64 unique)
{
int err = -ENODEV;
struct fuse_req *req;
struct fuse_iqueue *fiq = &fc->iq;
int err = 0;
req = fuse_get_req(fc, false);
if (IS_ERR(req))
return PTR_ERR(req);
__clear_bit(FR_ISREPLY, &req->flags);
req->in.h.unique = unique;
spin_lock(&fiq->waitq.lock);
fuse_args_to_req(req, args);
spin_lock(&fiq->lock);
if (fiq->connected) {
queue_request(fiq, req);
err = 0;
queue_request_and_unlock(fiq, req);
} else {
err = -ENODEV;
spin_unlock(&fiq->lock);
fuse_put_request(fc, req);
}
spin_unlock(&fiq->waitq.lock);
return err;
}
void fuse_force_forget(struct file *file, u64 nodeid)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
struct fuse_forget_in inarg;
memset(&inarg, 0, sizeof(inarg));
inarg.nlookup = 1;
req = fuse_get_req_nofail_nopages(fc, file);
req->in.h.opcode = FUSE_FORGET;
req->in.h.nodeid = nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
__clear_bit(FR_ISREPLY, &req->flags);
__fuse_request_send(fc, req);
/* ignore errors */
fuse_put_request(fc, req);
}
/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
......@@ -1084,14 +948,15 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
{
unsigned i;
struct fuse_req *req = cs->req;
struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
int err;
unsigned offset = req->page_descs[i].offset;
unsigned count = min(nbytes, req->page_descs[i].length);
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
err = fuse_copy_page(cs, &req->pages[i], offset, count,
zeroing);
err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
if (err)
return err;
......@@ -1149,12 +1014,12 @@ static int request_pending(struct fuse_iqueue *fiq)
* Unlike other requests this is assembled on demand, without a need
* to allocate a separate fuse_req structure.
*
* Called with fiq->waitq.lock held, releases it
* Called with fiq->lock held, releases it
*/
static int fuse_read_interrupt(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs,
size_t nbytes, struct fuse_req *req)
__releases(fiq->waitq.lock)
__releases(fiq->lock)
{
struct fuse_in_header ih;
struct fuse_interrupt_in arg;
......@@ -1169,7 +1034,7 @@ __releases(fiq->waitq.lock)
ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
arg.unique = req->in.h.unique;
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
if (nbytes < reqsize)
return -EINVAL;
......@@ -1181,9 +1046,9 @@ __releases(fiq->waitq.lock)
return err ? err : reqsize;
}
static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
unsigned max,
unsigned *countp)
struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
unsigned int max,
unsigned int *countp)
{
struct fuse_forget_link *head = fiq->forget_list_head.next;
struct fuse_forget_link **newhead = &head;
......@@ -1202,14 +1067,15 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
return head;
}
EXPORT_SYMBOL(fuse_dequeue_forget);
static int fuse_read_single_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs,
size_t nbytes)
__releases(fiq->waitq.lock)
__releases(fiq->lock)
{
int err;
struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
struct fuse_forget_in arg = {
.nlookup = forget->forget_one.nlookup,
};
......@@ -1220,7 +1086,7 @@ __releases(fiq->waitq.lock)
.len = sizeof(ih) + sizeof(arg),
};
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
kfree(forget);
if (nbytes < ih.len)
return -EINVAL;
......@@ -1238,7 +1104,7 @@ __releases(fiq->waitq.lock)
static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, size_t nbytes)
__releases(fiq->waitq.lock)
__releases(fiq->lock)
{
int err;
unsigned max_forgets;
......@@ -1252,13 +1118,13 @@ __releases(fiq->waitq.lock)
};
if (nbytes < ih.len) {
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
return -EINVAL;
}
max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
head = dequeue_forget(fiq, max_forgets, &count);
spin_unlock(&fiq->waitq.lock);
head = fuse_dequeue_forget(fiq, max_forgets, &count);
spin_unlock(&fiq->lock);
arg.count = count;
ih.len += count * sizeof(struct fuse_forget_one);
......@@ -1288,7 +1154,7 @@ __releases(fiq->waitq.lock)
static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
struct fuse_copy_state *cs,
size_t nbytes)
__releases(fiq->waitq.lock)
__releases(fiq->lock)
{
if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
return fuse_read_single_forget(fiq, cs, nbytes);
......@@ -1302,7 +1168,7 @@ __releases(fiq->waitq.lock)
* the pending list and copies request data to userspace buffer. If
* no reply is needed (FORGET) or request has been aborted or there
* was an error during the copying then it's finished by calling
* request_end(). Otherwise add it to the processing list, and set
* fuse_request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
......@@ -1313,21 +1179,42 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
struct fuse_iqueue *fiq = &fc->iq;
struct fuse_pqueue *fpq = &fud->pq;
struct fuse_req *req;
struct fuse_in *in;
struct fuse_args *args;
unsigned reqsize;
unsigned int hash;
/*
* Require sane minimum read buffer - that has capacity for fixed part
* of any request header + negotiated max_write room for data.
*
* Historically libfuse reserves 4K for fixed header room, but e.g.
* GlusterFS reserves only 80 bytes
*
* = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
*
* which is the absolute minimum any sane filesystem should be using
* for header room.
*/
if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
sizeof(struct fuse_in_header) +
sizeof(struct fuse_write_in) +
fc->max_write))
return -EINVAL;
restart:
spin_lock(&fiq->waitq.lock);
err = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
!request_pending(fiq))
goto err_unlock;
for (;;) {
spin_lock(&fiq->lock);
if (!fiq->connected || request_pending(fiq))
break;
spin_unlock(&fiq->lock);
err = wait_event_interruptible_exclusive_locked(fiq->waitq,
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
err = wait_event_interruptible_exclusive(fiq->waitq,
!fiq->connected || request_pending(fiq));
if (err)
goto err_unlock;
return err;
}
if (!fiq->connected) {
err = fc->aborted ? -ECONNABORTED : -ENODEV;
......@@ -1351,28 +1238,28 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
req = list_entry(fiq->pending.next, struct fuse_req, list);
clear_bit(FR_PENDING, &req->flags);
list_del_init(&req->list);
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
in = &req->in;
reqsize = in->h.len;
args = req->args;
reqsize = req->in.h.len;
/* If request is too large, reply with an error and restart the read */
if (nbytes < reqsize) {
req->out.h.error = -EIO;
/* SETXATTR is special, since it may contain too large data */
if (in->h.opcode == FUSE_SETXATTR)
if (args->opcode == FUSE_SETXATTR)
req->out.h.error = -E2BIG;
request_end(fc, req);
fuse_request_end(fc, req);
goto restart;
}
spin_lock(&fpq->lock);
list_add(&req->list, &fpq->io);
spin_unlock(&fpq->lock);
cs->req = req;
err = fuse_copy_one(cs, &in->h, sizeof(in->h));
err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
if (!err)
err = fuse_copy_args(cs, in->numargs, in->argpages,
(struct fuse_arg *) in->args, 0);
err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
(struct fuse_arg *) args->in_args, 0);
fuse_copy_finish(cs);
spin_lock(&fpq->lock);
clear_bit(FR_LOCKED, &req->flags);
......@@ -1405,11 +1292,11 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
if (!test_bit(FR_PRIVATE, &req->flags))
list_del_init(&req->list);
spin_unlock(&fpq->lock);
request_end(fc, req);
fuse_request_end(fc, req);
return err;
err_unlock:
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
return err;
}
......@@ -1728,9 +1615,19 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
return err;
}
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_retrieve_args {
struct fuse_args_pages ap;
struct fuse_notify_retrieve_in inarg;
};
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
release_pages(req->pages, req->num_pages);
struct fuse_retrieve_args *ra =
container_of(args, typeof(*ra), ap.args);
release_pages(ra->ap.pages, ra->ap.num_pages);
kfree(ra);
}
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
......@@ -1738,13 +1635,16 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
{
int err;
struct address_space *mapping = inode->i_mapping;
struct fuse_req *req;
pgoff_t index;
loff_t file_size;
unsigned int num;
unsigned int offset;
size_t total_len = 0;
unsigned int num_pages;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
struct fuse_args_pages *ap;
struct fuse_args *args;
offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
......@@ -1758,19 +1658,26 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
req = fuse_get_req(fc, num_pages);
if (IS_ERR(req))
return PTR_ERR(req);
args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
req->in.h.opcode = FUSE_NOTIFY_REPLY;
req->in.h.nodeid = outarg->nodeid;
req->in.numargs = 2;
req->in.argpages = 1;
req->end = fuse_retrieve_end;
ra = kzalloc(args_size, GFP_KERNEL);
if (!ra)
return -ENOMEM;
ap = &ra->ap;
ap->pages = (void *) (ra + 1);
ap->descs = (void *) (ap->pages + num_pages);
args = &ap->args;
args->nodeid = outarg->nodeid;
args->opcode = FUSE_NOTIFY_REPLY;
args->in_numargs = 2;
args->in_pages = true;
args->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_SHIFT;
while (num && req->num_pages < num_pages) {
while (num && ap->num_pages < num_pages) {
struct page *page;
unsigned int this_num;
......@@ -1779,27 +1686,25 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
break;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].offset = offset;
req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].offset = offset;
ap->descs[ap->num_pages].length = this_num;
ap->num_pages++;
offset = 0;
num -= this_num;
total_len += this_num;
index++;
}
req->misc.retrieve_in.offset = outarg->offset;
req->misc.retrieve_in.size = total_len;
req->in.args[0].size = sizeof(req->misc.retrieve_in);
req->in.args[0].value = &req->misc.retrieve_in;
req->in.args[1].size = total_len;
ra->inarg.offset = outarg->offset;
ra->inarg.size = total_len;
args->in_args[0].size = sizeof(ra->inarg);
args->in_args[0].value = &ra->inarg;
args->in_args[1].size = total_len;
err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
if (err) {
fuse_retrieve_end(fc, req);
fuse_put_request(fc, req);
}
err = fuse_simple_notify_reply(fc, args, outarg->notify_unique);
if (err)
fuse_retrieve_end(fc, args, err);
return err;
}
......@@ -1885,27 +1790,25 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
return NULL;
}
static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
unsigned nbytes)
{
unsigned reqsize = sizeof(struct fuse_out_header);
if (out->h.error)
return nbytes != reqsize ? -EINVAL : 0;
reqsize += len_args(out->numargs, out->args);
reqsize += fuse_len_args(args->out_numargs, args->out_args);
if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
return -EINVAL;
else if (reqsize > nbytes) {
struct fuse_arg *lastarg = &out->args[out->numargs-1];
struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
unsigned diffsize = reqsize - nbytes;
if (diffsize > lastarg->size)
return -EINVAL;
lastarg->size -= diffsize;
}
return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
out->page_zeroing);
return fuse_copy_args(cs, args->out_numargs, args->out_pages,
args->out_args, args->page_zeroing);
}
/*
......@@ -1913,7 +1816,7 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
* the write buffer. The request is then searched on the processing
* list by the unique ID found in the header. If found, then remove
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
* The request is finished by calling fuse_request_end().
*/
static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
struct fuse_copy_state *cs, size_t nbytes)
......@@ -1984,10 +1887,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
set_bit(FR_LOCKED, &req->flags);
spin_unlock(&fpq->lock);
cs->req = req;
if (!req->out.page_replace)
if (!req->args->page_replace)
cs->move_pages = 0;
err = copy_out_args(cs, &req->out, nbytes);
if (oh.error)
err = nbytes != sizeof(oh) ? -EINVAL : 0;
else
err = copy_out_args(cs, req->args, nbytes);
fuse_copy_finish(cs);
spin_lock(&fpq->lock);
......@@ -2000,7 +1906,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
list_del_init(&req->list);
spin_unlock(&fpq->lock);
request_end(fc, req);
fuse_request_end(fc, req);
out:
return err ? err : nbytes;
......@@ -2121,12 +2027,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
fiq = &fud->fc->iq;
poll_wait(file, &fiq->waitq, wait);
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
if (!fiq->connected)
mask = EPOLLERR;
else if (request_pending(fiq))
mask |= EPOLLIN | EPOLLRDNORM;
spin_unlock(&fiq->waitq.lock);
spin_unlock(&fiq->lock);
return mask;
}
......@@ -2140,7 +2046,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
req->out.h.error = -ECONNABORTED;
clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
request_end(fc, req);
fuse_request_end(fc, req);
}
}
......@@ -2221,15 +2127,15 @@ void fuse_abort_conn(struct fuse_conn *fc)
flush_bg_queue(fc);
spin_unlock(&fc->bg_lock);
spin_lock(&fiq->waitq.lock);
spin_lock(&fiq->lock);
fiq->connected = 0;
list_for_each_entry(req, &fiq->pending, list)
clear_bit(FR_PENDING, &req->flags);
list_splice_tail_init(&fiq->pending, &to_end);
while (forget_pending(fiq))
kfree(dequeue_forget(fiq, 1, NULL));
wake_up_all_locked(&fiq->waitq);
spin_unlock(&fiq->waitq.lock);
kfree(fuse_dequeue_forget(fiq, 1, NULL));
wake_up_all(&fiq->waitq);
spin_unlock(&fiq->lock);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
end_polls(fc);
wake_up_all(&fc->blocked_waitq);
......@@ -2296,7 +2202,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
if (new->private_data)
return -EINVAL;
fud = fuse_dev_alloc(fc);
fud = fuse_dev_alloc_install(fc);
if (!fud)
return -ENOMEM;
......
......@@ -24,20 +24,54 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}
#if BITS_PER_LONG >= 64
static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
{
entry->d_fsdata = (void *) time;
}
static inline u64 fuse_dentry_time(const struct dentry *entry)
{
return (u64)entry->d_fsdata;
}
#else
union fuse_dentry {
u64 time;
struct rcu_head rcu;
};
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
{
((union fuse_dentry *) entry->d_fsdata)->time = time;
((union fuse_dentry *) dentry->d_fsdata)->time = time;
}
static inline u64 fuse_dentry_time(struct dentry *entry)
static inline u64 fuse_dentry_time(const struct dentry *entry)
{
return ((union fuse_dentry *) entry->d_fsdata)->time;
}
#endif
static void fuse_dentry_settime(struct dentry *dentry, u64 time)
{
struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
bool delete = !time && fc->delete_stale;
/*
* Mess with DCACHE_OP_DELETE because dput() will be faster without it.
* Don't care about races, either way it's just an optimization
*/
if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
(delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
spin_lock(&dentry->d_lock);
if (!delete)
dentry->d_flags &= ~DCACHE_OP_DELETE;
else
dentry->d_flags |= DCACHE_OP_DELETE;
spin_unlock(&dentry->d_lock);
}
__fuse_dentry_settime(dentry, time);
}
/*
* FUSE caches dentries and attributes with separate timeout. The
......@@ -139,14 +173,14 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
struct fuse_entry_out *outarg)
{
memset(outarg, 0, sizeof(struct fuse_entry_out));
args->in.h.opcode = FUSE_LOOKUP;
args->in.h.nodeid = nodeid;
args->in.numargs = 1;
args->in.args[0].size = name->len + 1;
args->in.args[0].value = name->name;
args->out.numargs = 1;
args->out.args[0].size = sizeof(struct fuse_entry_out);
args->out.args[0].value = outarg;
args->opcode = FUSE_LOOKUP;
args->nodeid = nodeid;
args->in_numargs = 1;
args->in_args[0].size = name->len + 1;
args->in_args[0].value = name->name;
args->out_numargs = 1;
args->out_args[0].size = sizeof(struct fuse_entry_out);
args->out_args[0].value = outarg;
}
/*
......@@ -242,9 +276,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
}
#if BITS_PER_LONG < 64
static int fuse_dentry_init(struct dentry *dentry)
{
dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
return dentry->d_fsdata ? 0 : -ENOMEM;
}
......@@ -254,16 +290,27 @@ static void fuse_dentry_release(struct dentry *dentry)
kfree_rcu(fd, rcu);
}
#endif
static int fuse_dentry_delete(const struct dentry *dentry)
{
return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
}
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
.d_delete = fuse_dentry_delete,
#if BITS_PER_LONG < 64
.d_init = fuse_dentry_init,
.d_release = fuse_dentry_release,
#endif
};
const struct dentry_operations fuse_root_dentry_operations = {
#if BITS_PER_LONG < 64
.d_init = fuse_dentry_init,
.d_release = fuse_dentry_release,
#endif
};
int fuse_valid_type(int m)
......@@ -410,18 +457,18 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.flags = flags;
inarg.mode = mode;
inarg.umask = current_umask();
args.in.h.opcode = FUSE_CREATE;
args.in.h.nodeid = get_node_id(dir);
args.in.numargs = 2;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = entry->d_name.len + 1;
args.in.args[1].value = entry->d_name.name;
args.out.numargs = 2;
args.out.args[0].size = sizeof(outentry);
args.out.args[0].value = &outentry;
args.out.args[1].size = sizeof(outopen);
args.out.args[1].value = &outopen;
args.opcode = FUSE_CREATE;
args.nodeid = get_node_id(dir);
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name;
args.out_numargs = 2;
args.out_args[0].size = sizeof(outentry);
args.out_args[0].value = &outentry;
args.out_args[1].size = sizeof(outopen);
args.out_args[1].value = &outopen;
err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
......@@ -526,10 +573,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
return -ENOMEM;
memset(&outarg, 0, sizeof(outarg));
args->in.h.nodeid = get_node_id(dir);
args->out.numargs = 1;
args->out.args[0].size = sizeof(outarg);
args->out.args[0].value = &outarg;
args->nodeid = get_node_id(dir);
args->out_numargs = 1;
args->out_args[0].size = sizeof(outarg);
args->out_args[0].value = &outarg;
err = fuse_simple_request(fc, args);
if (err)
goto out_put_forget_req;
......@@ -582,12 +629,12 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
inarg.umask = current_umask();
args.in.h.opcode = FUSE_MKNOD;
args.in.numargs = 2;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = entry->d_name.len + 1;
args.in.args[1].value = entry->d_name.name;
args.opcode = FUSE_MKNOD;
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name;
return create_new_entry(fc, &args, dir, entry, mode);
}
......@@ -609,12 +656,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.umask = current_umask();
args.in.h.opcode = FUSE_MKDIR;
args.in.numargs = 2;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = entry->d_name.len + 1;
args.in.args[1].value = entry->d_name.name;
args.opcode = FUSE_MKDIR;
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name;
return create_new_entry(fc, &args, dir, entry, S_IFDIR);
}
......@@ -625,12 +672,12 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
unsigned len = strlen(link) + 1;
FUSE_ARGS(args);
args.in.h.opcode = FUSE_SYMLINK;
args.in.numargs = 2;
args.in.args[0].size = entry->d_name.len + 1;
args.in.args[0].value = entry->d_name.name;
args.in.args[1].size = len;
args.in.args[1].value = link;
args.opcode = FUSE_SYMLINK;
args.in_numargs = 2;
args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name;
args.in_args[1].size = len;
args.in_args[1].value = link;
return create_new_entry(fc, &args, dir, entry, S_IFLNK);
}
......@@ -648,11 +695,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
struct fuse_conn *fc = get_fuse_conn(dir);
FUSE_ARGS(args);
args.in.h.opcode = FUSE_UNLINK;
args.in.h.nodeid = get_node_id(dir);
args.in.numargs = 1;
args.in.args[0].size = entry->d_name.len + 1;
args.in.args[0].value = entry->d_name.name;
args.opcode = FUSE_UNLINK;
args.nodeid = get_node_id(dir);
args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name;
err = fuse_simple_request(fc, &args);
if (!err) {
struct inode *inode = d_inode(entry);
......@@ -684,11 +731,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
struct fuse_conn *fc = get_fuse_conn(dir);
FUSE_ARGS(args);
args.in.h.opcode = FUSE_RMDIR;
args.in.h.nodeid = get_node_id(dir);
args.in.numargs = 1;
args.in.args[0].size = entry->d_name.len + 1;
args.in.args[0].value = entry->d_name.name;
args.opcode = FUSE_RMDIR;
args.nodeid = get_node_id(dir);
args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name;
err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(d_inode(entry));
......@@ -711,15 +758,15 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
inarg.flags = flags;
args.in.h.opcode = opcode;
args.in.h.nodeid = get_node_id(olddir);
args.in.numargs = 3;
args.in.args[0].size = argsize;
args.in.args[0].value = &inarg;
args.in.args[1].size = oldent->d_name.len + 1;
args.in.args[1].value = oldent->d_name.name;
args.in.args[2].size = newent->d_name.len + 1;
args.in.args[2].value = newent->d_name.name;
args.opcode = opcode;
args.nodeid = get_node_id(olddir);
args.in_numargs = 3;
args.in_args[0].size = argsize;
args.in_args[0].value = &inarg;
args.in_args[1].size = oldent->d_name.len + 1;
args.in_args[1].value = oldent->d_name.name;
args.in_args[2].size = newent->d_name.len + 1;
args.in_args[2].value = newent->d_name.name;
err = fuse_simple_request(fc, &args);
if (!err) {
/* ctime changes */
......@@ -796,12 +843,12 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
args.in.h.opcode = FUSE_LINK;
args.in.numargs = 2;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = newent->d_name.len + 1;
args.in.args[1].value = newent->d_name.name;
args.opcode = FUSE_LINK;
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = newent->d_name.len + 1;
args.in_args[1].value = newent->d_name.name;
err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical"
......@@ -884,14 +931,14 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
inarg.getattr_flags |= FUSE_GETATTR_FH;
inarg.fh = ff->fh;
}
args.in.h.opcode = FUSE_GETATTR;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.opcode = FUSE_GETATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (!err) {
if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
......@@ -1056,11 +1103,11 @@ static int fuse_access(struct inode *inode, int mask)
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
args.in.h.opcode = FUSE_ACCESS;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.opcode = FUSE_ACCESS;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_access = 1;
......@@ -1152,38 +1199,36 @@ static int fuse_permission(struct inode *inode, int mask)
static int fuse_readlink_page(struct inode *inode, struct page *page)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
int err;
struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = {
.num_pages = 1,
.pages = &page,
.descs = &desc,
};
char *link;
ssize_t res;
ap.args.opcode = FUSE_READLINK;
ap.args.nodeid = get_node_id(inode);
ap.args.out_pages = true;
ap.args.out_argvar = true;
ap.args.page_zeroing = true;
ap.args.out_numargs = 1;
ap.args.out_args[0].size = desc.length;
res = fuse_simple_request(fc, &ap.args);
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
req->out.page_zeroing = 1;
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE - 1;
req->in.h.opcode = FUSE_READLINK;
req->in.h.nodeid = get_node_id(inode);
req->out.argvar = 1;
req->out.numargs = 1;
req->out.args[0].size = PAGE_SIZE - 1;
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_invalidate_atime(inode);
if (!err) {
char *link = page_address(page);
size_t len = req->out.args[0].size;
if (res < 0)
return res;
BUG_ON(len >= PAGE_SIZE);
link[len] = '\0';
}
if (WARN_ON(res >= PAGE_SIZE))
return -EIO;
fuse_put_request(fc, req);
fuse_invalidate_atime(inode);
link = page_address(page);
link[res] = '\0';
return err;
return 0;
}
static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
......@@ -1383,14 +1428,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
args->in.h.opcode = FUSE_SETATTR;
args->in.h.nodeid = get_node_id(inode);
args->in.numargs = 1;
args->in.args[0].size = sizeof(*inarg_p);
args->in.args[0].value = inarg_p;
args->out.numargs = 1;
args->out.args[0].size = sizeof(*outarg_p);
args->out.args[0].value = outarg_p;
args->opcode = FUSE_SETATTR;
args->nodeid = get_node_id(inode);
args->in_numargs = 1;
args->in_args[0].size = sizeof(*inarg_p);
args->in_args[0].value = inarg_p;
args->out_numargs = 1;
args->out_args[0].size = sizeof(*outarg_p);
args->out_args[0].value = outarg_p;
}
/*
......
......@@ -19,6 +19,18 @@
#include <linux/falloc.h>
#include <linux/uio.h>
static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
{
struct page **pages;
pages = kzalloc(npages * (sizeof(struct page *) +
sizeof(struct fuse_page_desc)), flags);
*desc = (void *) (pages + npages);
return pages;
}
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
int opcode, struct fuse_open_out *outargp)
{
......@@ -29,29 +41,36 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
args.in.h.opcode = opcode;
args.in.h.nodeid = nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(*outargp);
args.out.args[0].value = outargp;
args.opcode = opcode;
args.nodeid = nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(*outargp);
args.out_args[0].value = outargp;
return fuse_simple_request(fc, &args);
}
struct fuse_release_args {
struct fuse_args args;
struct fuse_release_in inarg;
struct inode *inode;
};
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
{
struct fuse_file *ff;
ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
if (unlikely(!ff))
return NULL;
ff->fc = fc;
ff->reserved_req = fuse_request_alloc(0);
if (unlikely(!ff->reserved_req)) {
ff->release_args = kzalloc(sizeof(*ff->release_args),
GFP_KERNEL_ACCOUNT);
if (!ff->release_args) {
kfree(ff);
return NULL;
}
......@@ -69,7 +88,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
void fuse_file_free(struct fuse_file *ff)
{
fuse_request_free(ff->reserved_req);
kfree(ff->release_args);
mutex_destroy(&ff->readdir.lock);
kfree(ff);
}
......@@ -80,34 +99,31 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
iput(req->misc.release.inode);
struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
iput(ra->inode);
kfree(ra);
}
static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
{
if (refcount_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
struct fuse_args *args = &ff->release_args->args;
if (isdir ? ff->fc->no_opendir : ff->fc->no_open) {
/*
* Drop the release request when client does not
* implement 'open'
*/
__clear_bit(FR_BACKGROUND, &req->flags);
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
/* Do nothing when client does not implement 'open' */
fuse_release_end(ff->fc, args, 0);
} else if (sync) {
__set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(ff->fc, req);
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
fuse_simple_request(ff->fc, args);
fuse_release_end(ff->fc, args, 0);
} else {
req->end = fuse_release_end;
__set_bit(FR_BACKGROUND, &req->flags);
fuse_request_send_background(ff->fc, req);
args->end = fuse_release_end;
if (fuse_simple_background(ff->fc, args,
GFP_KERNEL | __GFP_NOFAIL))
fuse_release_end(ff->fc, args, -ENOTCONN);
}
kfree(ff);
}
......@@ -227,8 +243,7 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
int flags, int opcode)
{
struct fuse_conn *fc = ff->fc;
struct fuse_req *req = ff->reserved_req;
struct fuse_release_in *inarg = &req->misc.release.in;
struct fuse_release_args *ra = ff->release_args;
/* Inode is NULL on error path of fuse_create_open() */
if (likely(fi)) {
......@@ -243,32 +258,33 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
wake_up_interruptible_all(&ff->poll_wait);
inarg->fh = ff->fh;
inarg->flags = flags;
req->in.h.opcode = opcode;
req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_release_in);
req->in.args[0].value = inarg;
ra->inarg.fh = ff->fh;
ra->inarg.flags = flags;
ra->args.in_numargs = 1;
ra->args.in_args[0].size = sizeof(struct fuse_release_in);
ra->args.in_args[0].value = &ra->inarg;
ra->args.opcode = opcode;
ra->args.nodeid = ff->nodeid;
ra->args.force = true;
ra->args.nocreds = true;
}
void fuse_release_common(struct file *file, bool isdir)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
struct fuse_file *ff = file->private_data;
struct fuse_req *req = ff->reserved_req;
struct fuse_release_args *ra = ff->release_args;
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
fuse_prepare_release(fi, ff, file->f_flags, opcode);
if (ff->flock) {
struct fuse_release_in *inarg = &req->misc.release.in;
inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
inarg->lock_owner = fuse_lock_owner_id(ff->fc,
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc,
(fl_owner_t) file);
}
/* Hold inode until release is finished */
req->misc.release.inode = igrab(file_inode(file));
ra->inode = igrab(file_inode(file));
/*
* Normally this will send the RELEASE request, however if
......@@ -279,7 +295,7 @@ void fuse_release_common(struct file *file, bool isdir)
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
*/
fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
fuse_file_put(ff, ff->fc->destroy, isdir);
}
static int fuse_open(struct inode *inode, struct file *file)
......@@ -335,19 +351,27 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
return (u64) v0 + ((u64) v1 << 32);
}
static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
struct fuse_writepage_args {
struct fuse_io_args ia;
struct list_head writepages_entry;
struct list_head queue_entry;
struct fuse_writepage_args *next;
struct inode *inode;
};
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
pgoff_t idx_from, pgoff_t idx_to)
{
struct fuse_req *req;
struct fuse_writepage_args *wpa;
list_for_each_entry(req, &fi->writepages, writepages_entry) {
list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
pgoff_t curr_index;
WARN_ON(get_fuse_inode(req->inode) != fi);
curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + req->num_pages &&
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + wpa->ia.ap.num_pages &&
curr_index <= idx_to) {
return req;
return wpa;
}
}
return NULL;
......@@ -383,12 +407,11 @@ static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
* Since fuse doesn't rely on the VM writeback tracking, this has to
* use some other means.
*/
static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
{
struct fuse_inode *fi = get_fuse_inode(inode);
wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
return 0;
}
/*
......@@ -411,8 +434,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_req *req;
struct fuse_flush_in inarg;
FUSE_ARGS(args);
int err;
if (is_bad_inode(inode))
......@@ -433,19 +456,17 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
req->in.h.opcode = FUSE_FLUSH;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
__set_bit(FR_FORCE, &req->flags);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
args.opcode = FUSE_FLUSH;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.force = true;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_flush = 1;
err = 0;
......@@ -465,11 +486,11 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
args.in.h.opcode = opcode;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.opcode = opcode;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
return fuse_simple_request(fc, &args);
}
......@@ -523,35 +544,35 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end,
return err;
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
size_t count, int opcode)
{
struct fuse_read_in *inarg = &req->misc.read.in;
struct fuse_file *ff = file->private_data;
struct fuse_args *args = &ia->ap.args;
inarg->fh = ff->fh;
inarg->offset = pos;
inarg->size = count;
inarg->flags = file->f_flags;
req->in.h.opcode = opcode;
req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_read_in);
req->in.args[0].value = inarg;
req->out.argvar = 1;
req->out.numargs = 1;
req->out.args[0].size = count;
ia->read.in.fh = ff->fh;
ia->read.in.offset = pos;
ia->read.in.size = count;
ia->read.in.flags = file->f_flags;
args->opcode = opcode;
args->nodeid = ff->nodeid;
args->in_numargs = 1;
args->in_args[0].size = sizeof(ia->read.in);
args->in_args[0].value = &ia->read.in;
args->out_argvar = true;
args->out_numargs = 1;
args->out_args[0].size = count;
}
static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
static void fuse_release_user_pages(struct fuse_args_pages *ap,
bool should_dirty)
{
unsigned i;
unsigned int i;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
for (i = 0; i < ap->num_pages; i++) {
if (should_dirty)
set_page_dirty_lock(page);
put_page(page);
set_page_dirty_lock(ap->pages[i]);
put_page(ap->pages[i]);
}
}
......@@ -621,64 +642,94 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
kref_put(&io->refcnt, fuse_io_release);
}
static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
unsigned int npages)
{
struct fuse_io_priv *io = req->io;
struct fuse_io_args *ia;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (ia) {
ia->io = io;
ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
&ia->ap.descs);
if (!ia->ap.pages) {
kfree(ia);
ia = NULL;
}
}
return ia;
}
static void fuse_io_free(struct fuse_io_args *ia)
{
kfree(ia->ap.pages);
kfree(ia);
}
static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
int err)
{
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
fuse_release_user_pages(req, io->should_dirty);
fuse_release_user_pages(&ia->ap, io->should_dirty);
if (io->write) {
if (req->misc.write.in.size != req->misc.write.out.size)
pos = req->misc.write.in.offset - io->offset +
req->misc.write.out.size;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
} else if (ia->write.in.size != ia->write.out.size) {
pos = ia->write.in.offset - io->offset +
ia->write.out.size;
}
} else {
if (req->misc.read.in.size != req->out.args[0].size)
pos = req->misc.read.in.offset - io->offset +
req->out.args[0].size;
u32 outsize = args->out_args[0].size;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
fuse_aio_complete(io, req->out.h.error, pos);
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
size_t num_bytes, struct fuse_io_priv *io)
static ssize_t fuse_async_req_send(struct fuse_conn *fc,
struct fuse_io_args *ia, size_t num_bytes)
{
ssize_t err;
struct fuse_io_priv *io = ia->io;
spin_lock(&io->lock);
kref_get(&io->refcnt);
io->size += num_bytes;
io->reqs++;
spin_unlock(&io->lock);
req->io = io;
req->end = fuse_aio_complete_req;
__fuse_get_request(req);
fuse_request_send_background(fc, req);
ia->ap.args.end = fuse_aio_complete_req;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
return num_bytes;
return err ?: num_bytes;
}
static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io,
loff_t pos, size_t count, fl_owner_t owner)
static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
fl_owner_t owner)
{
struct file *file = io->iocb->ki_filp;
struct file *file = ia->io->iocb->ki_filp;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
fuse_read_fill(req, file, pos, count, FUSE_READ);
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
if (owner != NULL) {
struct fuse_read_in *inarg = &req->misc.read.in;
inarg->read_flags |= FUSE_READ_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner);
}
if (io->async)
return fuse_async_req_send(fc, req, count, io);
if (ia->io->async)
return fuse_async_req_send(fc, ia, count);
fuse_request_send(fc, req);
return req->out.args[0].size;
return fuse_simple_request(fc, &ia->ap.args);
}
static void fuse_read_update_size(struct inode *inode, loff_t size,
......@@ -696,10 +747,9 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
spin_unlock(&fi->lock);
}
static void fuse_short_read(struct fuse_req *req, struct inode *inode,
u64 attr_ver)
static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
struct fuse_args_pages *ap)
{
size_t num_read = req->out.args[0].size;
struct fuse_conn *fc = get_fuse_conn(inode);
if (fc->writeback_cache) {
......@@ -712,28 +762,31 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
int start_idx = num_read >> PAGE_SHIFT;
size_t off = num_read & (PAGE_SIZE - 1);
for (i = start_idx; i < req->num_pages; i++) {
zero_user_segment(req->pages[i], off, PAGE_SIZE);
for (i = start_idx; i < ap->num_pages; i++) {
zero_user_segment(ap->pages[i], off, PAGE_SIZE);
off = 0;
}
} else {
loff_t pos = page_offset(req->pages[0]) + num_read;
loff_t pos = page_offset(ap->pages[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
}
static int fuse_do_readpage(struct file *file, struct page *page)
{
struct kiocb iocb;
struct fuse_io_priv io;
struct inode *inode = page->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
size_t num_read;
loff_t pos = page_offset(page);
size_t count = PAGE_SIZE;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
.ap.num_pages = 1,
.ap.pages = &page,
.ap.descs = &desc,
};
ssize_t res;
u64 attr_ver;
int err;
/*
* Page writeback can extend beyond the lifetime of the
......@@ -742,35 +795,21 @@ static int fuse_do_readpage(struct file *file, struct page *page)
*/
fuse_wait_on_page_writeback(inode, page->index);
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
attr_ver = fuse_get_attr_version(fc);
req->out.page_zeroing = 1;
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = count;
init_sync_kiocb(&iocb, file);
io = (struct fuse_io_priv) FUSE_IO_PRIV_SYNC(&iocb);
num_read = fuse_send_read(req, &io, pos, count, NULL);
err = req->out.h.error;
if (!err) {
fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
res = fuse_simple_request(fc, &ia.ap.args);
if (res < 0)
return res;
/*
* Short read means EOF. If file size is larger, truncate it
*/
if (num_read < count)
fuse_short_read(req, inode, attr_ver);
if (res < desc.length)
fuse_short_read(inode, attr_ver, res, &ia.ap);
SetPageUptodate(page);
}
fuse_put_request(fc, req);
return err;
return 0;
}
static int fuse_readpage(struct file *file, struct page *page)
......@@ -789,15 +828,18 @@ static int fuse_readpage(struct file *file, struct page *page)
return err;
}
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
int err)
{
int i;
size_t count = req->misc.read.in.size;
size_t num_read = req->out.args[0].size;
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap;
size_t count = ia->read.in.size;
size_t num_read = args->out_args[0].size;
struct address_space *mapping = NULL;
for (i = 0; mapping == NULL && i < req->num_pages; i++)
mapping = req->pages[i]->mapping;
for (i = 0; mapping == NULL && i < ap->num_pages; i++)
mapping = ap->pages[i]->mapping;
if (mapping) {
struct inode *inode = mapping->host;
......@@ -805,93 +847,97 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
/*
* Short read means EOF. If file size is larger, truncate it
*/
if (!req->out.h.error && num_read < count)
fuse_short_read(req, inode, req->misc.read.attr_ver);
if (!err && num_read < count)
fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
fuse_invalidate_atime(inode);
}
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
if (!req->out.h.error)
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
if (!err)
SetPageUptodate(page);
else
SetPageError(page);
unlock_page(page);
put_page(page);
}
if (req->ff)
fuse_file_put(req->ff, false, false);
if (ia->ff)
fuse_file_put(ia->ff, false, false);
fuse_io_free(ia);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
loff_t pos = page_offset(req->pages[0]);
size_t count = req->num_pages << PAGE_SHIFT;
req->out.argpages = 1;
req->out.page_zeroing = 1;
req->out.page_replace = 1;
fuse_read_fill(req, file, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT;
int err;
ap->args.out_pages = true;
ap->args.page_zeroing = true;
ap->args.page_replace = true;
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
req->ff = fuse_file_get(ff);
req->end = fuse_readpages_end;
fuse_request_send_background(fc, req);
ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end;
err = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
if (!err)
return;
} else {
fuse_request_send(fc, req);
fuse_readpages_end(fc, req);
fuse_put_request(fc, req);
err = fuse_simple_request(fc, &ap->args);
}
fuse_readpages_end(fc, &ap->args, err);
}
struct fuse_fill_data {
struct fuse_req *req;
struct fuse_io_args *ia;
struct file *file;
struct inode *inode;
unsigned nr_pages;
unsigned int nr_pages;
unsigned int max_pages;
};
static int fuse_readpages_fill(void *_data, struct page *page)
{
struct fuse_fill_data *data = _data;
struct fuse_req *req = data->req;
struct fuse_io_args *ia = data->ia;
struct fuse_args_pages *ap = &ia->ap;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
fuse_wait_on_page_writeback(inode, page->index);
if (req->num_pages &&
(req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
if (ap->num_pages &&
(ap->num_pages == fc->max_pages ||
(ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
data->max_pages = min_t(unsigned int, data->nr_pages,
fc->max_pages);
fuse_send_readpages(req, data->file);
if (fc->async_read)
req = fuse_get_req_for_background(fc, nr_alloc);
else
req = fuse_get_req(fc, nr_alloc);
data->req = req;
if (IS_ERR(req)) {
fuse_send_readpages(ia, data->file);
data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
if (!ia) {
unlock_page(page);
return PTR_ERR(req);
return -ENOMEM;
}
ap = &ia->ap;
}
if (WARN_ON(req->num_pages >= req->max_pages)) {
if (WARN_ON(ap->num_pages >= data->max_pages)) {
unlock_page(page);
fuse_put_request(fc, req);
fuse_io_free(ia);
return -EIO;
}
get_page(page);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = PAGE_SIZE;
req->num_pages++;
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].length = PAGE_SIZE;
ap->num_pages++;
data->nr_pages--;
return 0;
}
......@@ -903,7 +949,6 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data;
int err;
unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
err = -EIO;
if (is_bad_inode(inode))
......@@ -911,21 +956,20 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
data.file = file;
data.inode = inode;
if (fc->async_read)
data.req = fuse_get_req_for_background(fc, nr_alloc);
else
data.req = fuse_get_req(fc, nr_alloc);
data.nr_pages = nr_pages;
err = PTR_ERR(data.req);
if (IS_ERR(data.req))
data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
;
data.ia = fuse_io_alloc(NULL, data.max_pages);
err = -ENOMEM;
if (!data.ia)
goto out;
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
if (!err) {
if (data.req->num_pages)
fuse_send_readpages(data.req, file);
if (data.ia->ap.num_pages)
fuse_send_readpages(data.ia, file);
else
fuse_put_request(fc, data.req);
fuse_io_free(data.ia);
}
out:
return err;
......@@ -952,54 +996,65 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
return generic_file_read_iter(iocb, to);
}
static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
loff_t pos, size_t count)
{
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_write_out *outarg = &req->misc.write.out;
struct fuse_args *args = &ia->ap.args;
inarg->fh = ff->fh;
inarg->offset = pos;
inarg->size = count;
req->in.h.opcode = FUSE_WRITE;
req->in.h.nodeid = ff->nodeid;
req->in.numargs = 2;
ia->write.in.fh = ff->fh;
ia->write.in.offset = pos;
ia->write.in.size = count;
args->opcode = FUSE_WRITE;
args->nodeid = ff->nodeid;
args->in_numargs = 2;
if (ff->fc->minor < 9)
req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else
req->in.args[0].size = sizeof(struct fuse_write_in);
req->in.args[0].value = inarg;
req->in.args[1].size = count;
req->out.numargs = 1;
req->out.args[0].size = sizeof(struct fuse_write_out);
req->out.args[0].value = outarg;
args->in_args[0].size = sizeof(ia->write.in);
args->in_args[0].value = &ia->write.in;
args->in_args[1].size = count;
args->out_numargs = 1;
args->out_args[0].size = sizeof(ia->write.out);
args->out_args[0].value = &ia->write.out;
}
static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
loff_t pos, size_t count, fl_owner_t owner)
static unsigned int fuse_write_flags(struct kiocb *iocb)
{
struct kiocb *iocb = io->iocb;
unsigned int flags = iocb->ki_filp->f_flags;
if (iocb->ki_flags & IOCB_DSYNC)
flags |= O_DSYNC;
if (iocb->ki_flags & IOCB_SYNC)
flags |= O_SYNC;
return flags;
}
static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
size_t count, fl_owner_t owner)
{
struct kiocb *iocb = ia->io->iocb;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_write_in *inarg = &ia->write.in;
ssize_t err;
fuse_write_fill(req, ff, pos, count);
inarg->flags = file->f_flags;
if (iocb->ki_flags & IOCB_DSYNC)
inarg->flags |= O_DSYNC;
if (iocb->ki_flags & IOCB_SYNC)
inarg->flags |= O_SYNC;
fuse_write_args_fill(ia, ff, pos, count);
inarg->flags = fuse_write_flags(iocb);
if (owner != NULL) {
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
}
if (io->async)
return fuse_async_req_send(fc, req, count, io);
if (ia->io->async)
return fuse_async_req_send(fc, ia, count);
err = fuse_simple_request(fc, &ia->ap.args);
if (!err && ia->write.out.size > count)
err = -EIO;
fuse_request_send(fc, req);
return req->misc.write.out.size;
return err ?: ia->write.out.size;
}
bool fuse_write_update_size(struct inode *inode, loff_t pos)
......@@ -1019,26 +1074,31 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos)
return ret;
}
static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
struct inode *inode, loff_t pos,
size_t count)
static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
struct kiocb *iocb, struct inode *inode,
loff_t pos, size_t count)
{
size_t res;
unsigned offset;
unsigned i;
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
struct fuse_args_pages *ap = &ia->ap;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
unsigned int offset, i;
int err;
for (i = 0; i < req->num_pages; i++)
fuse_wait_on_page_writeback(inode, req->pages[i]->index);
for (i = 0; i < ap->num_pages; i++)
fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
res = fuse_send_write(req, &io, pos, count, NULL);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
offset = req->page_descs[0].offset;
count = res;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
err = fuse_simple_request(fc, &ap->args);
if (!req->out.h.error && !offset && count >= PAGE_SIZE)
offset = ap->descs[0].offset;
count = ia->write.out.size;
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
if (!err && !offset && count >= PAGE_SIZE)
SetPageUptodate(page);
if (count > PAGE_SIZE - offset)
......@@ -1051,20 +1111,21 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct kiocb *iocb,
put_page(page);
}
return res;
return err;
}
static ssize_t fuse_fill_write_pages(struct fuse_req *req,
static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos)
struct iov_iter *ii, loff_t pos,
unsigned int max_pages)
{
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
int err;
req->in.argpages = 1;
req->page_descs[0].offset = offset;
ap->args.in_pages = true;
ap->descs[0].offset = offset;
do {
size_t tmp;
......@@ -1100,9 +1161,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
}
err = 0;
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = tmp;
req->num_pages++;
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].length = tmp;
ap->num_pages++;
count += tmp;
pos += tmp;
......@@ -1113,7 +1174,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
req->num_pages < req->max_pages && offset == 0);
ap->num_pages < max_pages && offset == 0);
return count > 0 ? count : err;
}
......@@ -1141,27 +1202,27 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
do {
struct fuse_req *req;
ssize_t count;
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
if (!ap->pages) {
err = -ENOMEM;
break;
}
count = fuse_fill_write_pages(req, mapping, ii, pos);
count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
if (count <= 0) {
err = count;
} else {
size_t num_written;
num_written = fuse_send_write_pages(req, iocb, inode,
err = fuse_send_write_pages(&ia, iocb, inode,
pos, count);
err = req->out.h.error;
if (!err) {
size_t num_written = ia.write.out.size;
res += num_written;
pos += num_written;
......@@ -1170,7 +1231,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
err = -EIO;
}
}
fuse_put_request(fc, req);
kfree(ap->pages);
} while (!err && iov_iter_count(ii));
if (res > 0)
......@@ -1258,14 +1319,14 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
return written ? written : err;
}
static inline void fuse_page_descs_length_init(struct fuse_req *req,
unsigned index, unsigned nr_pages)
static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
unsigned int index,
unsigned int nr_pages)
{
int i;
for (i = index; i < index + nr_pages; i++)
req->page_descs[i].length = PAGE_SIZE -
req->page_descs[i].offset;
descs[i].length = PAGE_SIZE - descs[i].offset;
}
static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
......@@ -1279,8 +1340,9 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
return min(iov_iter_single_seg_count(ii), max_size);
}
static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t *nbytesp, int write)
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
unsigned int max_pages)
{
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
......@@ -1291,21 +1353,21 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
if (write)
req->in.args[1].value = (void *) user_addr;
ap->args.in_args[1].value = (void *) user_addr;
else
req->out.args[0].value = (void *) user_addr;
ap->args.out_args[0].value = (void *) user_addr;
iov_iter_advance(ii, frag_size);
*nbytesp = frag_size;
return 0;
}
while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
unsigned npages;
size_t start;
ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
*nbytesp - nbytes,
req->max_pages - req->num_pages,
max_pages - ap->num_pages,
&start);
if (ret < 0)
break;
......@@ -1316,18 +1378,18 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
ret += start;
npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
req->page_descs[req->num_pages].offset = start;
fuse_page_descs_length_init(req, req->num_pages, npages);
ap->descs[ap->num_pages].offset = start;
fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
req->num_pages += npages;
req->page_descs[req->num_pages - 1].length -=
ap->num_pages += npages;
ap->descs[ap->num_pages - 1].length -=
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
if (write)
req->in.argpages = 1;
ap->args.in_pages = 1;
else
req->out.argpages = 1;
ap->args.out_pages = 1;
*nbytesp = nbytes;
......@@ -1349,17 +1411,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
pgoff_t idx_from = pos >> PAGE_SHIFT;
pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
ssize_t res = 0;
struct fuse_req *req;
int err = 0;
struct fuse_io_args *ia;
unsigned int max_pages;
if (io->async)
req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
fc->max_pages));
else
req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
if (IS_ERR(req))
return PTR_ERR(req);
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
if (!ia)
return -ENOMEM;
ia->io = io;
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
inode_lock(inode);
......@@ -1370,54 +1431,49 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
io->should_dirty = !write && iter_is_iovec(iter);
while (count) {
size_t nres;
ssize_t nres;
fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(req, iter, &nbytes, write);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
max_pages);
if (err && !nbytes)
break;
if (write) {
if (!capable(CAP_FSETID)) {
struct fuse_write_in *inarg;
if (!capable(CAP_FSETID))
ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
inarg = &req->misc.write.in;
inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
}
nres = fuse_send_write(req, io, pos, nbytes, owner);
nres = fuse_send_write(ia, pos, nbytes, owner);
} else {
nres = fuse_send_read(req, io, pos, nbytes, owner);
nres = fuse_send_read(ia, pos, nbytes, owner);
}
if (!io->async)
fuse_release_user_pages(req, io->should_dirty);
if (req->out.h.error) {
err = req->out.h.error;
break;
} else if (nres > nbytes) {
res = 0;
err = -EIO;
if (!io->async || nres < 0) {
fuse_release_user_pages(&ia->ap, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
if (nres < 0) {
err = nres;
break;
}
WARN_ON(nres > nbytes);
count -= nres;
res += nres;
pos += nres;
if (nres != nbytes)
break;
if (count) {
fuse_put_request(fc, req);
if (io->async)
req = fuse_get_req_for_background(fc,
iov_iter_npages(iter, fc->max_pages));
else
req = fuse_get_req(fc, iov_iter_npages(iter,
fc->max_pages));
if (IS_ERR(req))
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
if (!ia)
break;
}
}
if (!IS_ERR(req))
fuse_put_request(fc, req);
if (ia)
fuse_io_free(ia);
if (res > 0)
*ppos = pos;
......@@ -1509,45 +1565,53 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return fuse_direct_write_iter(iocb, from);
}
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
int i;
for (i = 0; i < req->num_pages; i++)
__free_page(req->pages[i]);
for (i = 0; i < ap->num_pages; i++)
__free_page(ap->pages[i]);
if (wpa->ia.ff)
fuse_file_put(wpa->ia.ff, false, false);
if (req->ff)
fuse_file_put(req->ff, false, false);
kfree(ap->pages);
kfree(wpa);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_writepage_finish(struct fuse_conn *fc,
struct fuse_writepage_args *wpa)
{
struct inode *inode = req->inode;
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
list_del(&req->writepages_entry);
for (i = 0; i < req->num_pages; i++) {
list_del(&wpa->writepages_entry);
for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
}
wake_up(&fi->page_waitq);
}
/* Called under fi->lock, may release and reacquire it */
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
loff_t size)
static void fuse_send_writepage(struct fuse_conn *fc,
struct fuse_writepage_args *wpa, loff_t size)
__releases(fi->lock)
__acquires(fi->lock)
{
struct fuse_req *aux, *next;
struct fuse_inode *fi = get_fuse_inode(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
__u64 data_size = req->num_pages * PAGE_SIZE;
bool queued;
struct fuse_writepage_args *aux, *next;
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_args *args = &wpa->ia.ap.args;
__u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
int err;
fi->writectr++;
if (inarg->offset + data_size <= size) {
inarg->size = data_size;
} else if (inarg->offset < size) {
......@@ -1557,29 +1621,36 @@ __acquires(fi->lock)
goto out_free;
}
req->in.args[1].size = inarg->size;
queued = fuse_request_queue_background(fc, req);
args->in_args[1].size = inarg->size;
args->force = true;
args->nocreds = true;
err = fuse_simple_background(fc, args, GFP_ATOMIC);
if (err == -ENOMEM) {
spin_unlock(&fi->lock);
err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL);
spin_lock(&fi->lock);
}
/* Fails on broken connection only */
if (unlikely(!queued))
if (unlikely(err))
goto out_free;
fi->writectr++;
return;
out_free:
fuse_writepage_finish(fc, req);
fi->writectr--;
fuse_writepage_finish(fc, wpa);
spin_unlock(&fi->lock);
/* After fuse_writepage_finish() aux request list is private */
for (aux = req->misc.write.next; aux; aux = next) {
next = aux->misc.write.next;
aux->misc.write.next = NULL;
fuse_writepage_free(fc, aux);
fuse_put_request(fc, aux);
for (aux = wpa->next; aux; aux = next) {
next = aux->next;
aux->next = NULL;
fuse_writepage_free(aux);
}
fuse_writepage_free(fc, req);
fuse_put_request(fc, req);
fuse_writepage_free(wpa);
spin_lock(&fi->lock);
}
......@@ -1596,29 +1667,34 @@ __acquires(fi->lock)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
loff_t crop = i_size_read(inode);
struct fuse_req *req;
struct fuse_writepage_args *wpa;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
list_del_init(&req->list);
fuse_send_writepage(fc, req, crop);
wpa = list_entry(fi->queued_writes.next,
struct fuse_writepage_args, queue_entry);
list_del_init(&wpa->queue_entry);
fuse_send_writepage(fc, wpa, crop);
}
}
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
struct inode *inode = req->inode;
struct fuse_writepage_args *wpa =
container_of(args, typeof(*wpa), ia.ap.args);
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
mapping_set_error(inode->i_mapping, req->out.h.error);
mapping_set_error(inode->i_mapping, error);
spin_lock(&fi->lock);
while (req->misc.write.next) {
while (wpa->next) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_req *next = req->misc.write.next;
req->misc.write.next = next->misc.write.next;
next->misc.write.next = NULL;
next->ff = fuse_file_get(req->ff);
struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_writepage_args *next = wpa->next;
wpa->next = next->next;
next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff);
list_add(&next->writepages_entry, &fi->writepages);
/*
......@@ -1647,9 +1723,9 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
fuse_send_writepage(fc, next, inarg->offset + inarg->size);
}
fi->writectr--;
fuse_writepage_finish(fc, req);
fuse_writepage_finish(fc, wpa);
spin_unlock(&fi->lock);
fuse_writepage_free(fc, req);
fuse_writepage_free(wpa);
}
static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
......@@ -1691,52 +1767,71 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
{
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
if (wpa) {
ap = &wpa->ia.ap;
ap->num_pages = 0;
ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
if (!ap->pages) {
kfree(wpa);
wpa = NULL;
}
}
return wpa;
}
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
struct page *tmp_page;
int error = -ENOMEM;
set_page_writeback(page);
req = fuse_request_alloc_nofs(1);
if (!req)
wpa = fuse_writepage_args_alloc();
if (!wpa)
goto err;
ap = &wpa->ia.ap;
/* writeback always goes to bg_queue */
__set_bit(FR_BACKGROUND, &req->flags);
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!tmp_page)
goto err_free;
error = -EIO;
req->ff = fuse_write_file_get(fc, fi);
if (!req->ff)
wpa->ia.ff = fuse_write_file_get(fc, fi);
if (!wpa->ia.ff)
goto err_nofile;
fuse_write_fill(req, req->ff, page_offset(page), 0);
fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->misc.write.next = NULL;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
req->page_descs[0].offset = 0;
req->page_descs[0].length = PAGE_SIZE;
req->end = fuse_writepage_end;
req->inode = inode;
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->next = NULL;
ap->args.in_pages = true;
ap->num_pages = 1;
ap->pages[0] = tmp_page;
ap->descs[0].offset = 0;
ap->descs[0].length = PAGE_SIZE;
ap->args.end = fuse_writepage_end;
wpa->inode = inode;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
spin_lock(&fi->lock);
list_add(&req->writepages_entry, &fi->writepages);
list_add_tail(&req->list, &fi->queued_writes);
list_add(&wpa->writepages_entry, &fi->writepages);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
......@@ -1747,7 +1842,7 @@ static int fuse_writepage_locked(struct page *page)
err_nofile:
__free_page(tmp_page);
err_free:
fuse_request_free(req);
kfree(wpa);
err:
mapping_set_error(page->mapping, error);
end_page_writeback(page);
......@@ -1767,6 +1862,7 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
......@@ -1777,23 +1873,50 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
}
struct fuse_fill_wb_data {
struct fuse_req *req;
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
struct inode *inode;
struct page **orig_pages;
unsigned int max_pages;
};
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
{
struct fuse_args_pages *ap = &data->wpa->ia.ap;
struct fuse_conn *fc = get_fuse_conn(data->inode);
struct page **pages;
struct fuse_page_desc *descs;
unsigned int npages = min_t(unsigned int,
max_t(unsigned int, data->max_pages * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
WARN_ON(npages <= data->max_pages);
pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
if (!pages)
return false;
memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
kfree(ap->pages);
ap->pages = pages;
ap->descs = descs;
data->max_pages = npages;
return true;
}
static void fuse_writepages_send(struct fuse_fill_wb_data *data)
{
struct fuse_req *req = data->req;
struct fuse_writepage_args *wpa = data->wpa;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
int num_pages = req->num_pages;
int num_pages = wpa->ia.ap.num_pages;
int i;
req->ff = fuse_file_get(data->ff);
wpa->ia.ff = fuse_file_get(data->ff);
spin_lock(&fi->lock);
list_add_tail(&req->list, &fi->queued_writes);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
......@@ -1808,54 +1931,52 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
* this new request onto the auxiliary list, otherwise reuse the existing one by
* copying the new page contents over to the old temporary page.
*/
static bool fuse_writepage_in_flight(struct fuse_req *new_req,
static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
struct page *page)
{
struct fuse_conn *fc = get_fuse_conn(new_req->inode);
struct fuse_inode *fi = get_fuse_inode(new_req->inode);
struct fuse_req *tmp;
struct fuse_req *old_req;
struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
struct fuse_writepage_args *tmp;
struct fuse_writepage_args *old_wpa;
struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
WARN_ON(new_req->num_pages != 0);
WARN_ON(new_ap->num_pages != 0);
spin_lock(&fi->lock);
list_del(&new_req->writepages_entry);
old_req = fuse_find_writeback(fi, page->index, page->index);
if (!old_req) {
list_add(&new_req->writepages_entry, &fi->writepages);
list_del(&new_wpa->writepages_entry);
old_wpa = fuse_find_writeback(fi, page->index, page->index);
if (!old_wpa) {
list_add(&new_wpa->writepages_entry, &fi->writepages);
spin_unlock(&fi->lock);
return false;
}
new_req->num_pages = 1;
for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) {
new_ap->num_pages = 1;
for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
pgoff_t curr_index;
WARN_ON(tmp->inode != new_req->inode);
curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
WARN_ON(tmp->inode != new_wpa->inode);
curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
if (curr_index == page->index) {
WARN_ON(tmp->num_pages != 1);
WARN_ON(!test_bit(FR_PENDING, &tmp->flags));
swap(tmp->pages[0], new_req->pages[0]);
WARN_ON(tmp->ia.ap.num_pages != 1);
swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
break;
}
}
if (!tmp) {
new_req->misc.write.next = old_req->misc.write.next;
old_req->misc.write.next = new_req;
new_wpa->next = old_wpa->next;
old_wpa->next = new_wpa;
}
spin_unlock(&fi->lock);
if (tmp) {
struct backing_dev_info *bdi = inode_to_bdi(new_req->inode);
struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
wb_writeout_inc(&bdi->wb);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
fuse_writepage_free(new_wpa);
}
return true;
......@@ -1865,7 +1986,8 @@ static int fuse_writepages_fill(struct page *page,
struct writeback_control *wbc, void *_data)
{
struct fuse_fill_wb_data *data = _data;
struct fuse_req *req = data->req;
struct fuse_writepage_args *wpa = data->wpa;
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
......@@ -1888,16 +2010,16 @@ static int fuse_writepages_fill(struct page *page,
*/
is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages &&
(is_writeback || req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
if (wpa && ap->num_pages &&
(is_writeback || ap->num_pages == fc->max_pages ||
(ap->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data);
data->req = NULL;
} else if (req && req->num_pages == req->max_pages) {
if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) {
data->wpa = NULL;
} else if (wpa && ap->num_pages == data->max_pages) {
if (!fuse_pages_realloc(data)) {
fuse_writepages_send(data);
req = data->req = NULL;
data->wpa = NULL;
}
}
......@@ -1915,59 +2037,60 @@ static int fuse_writepages_fill(struct page *page,
* This is ensured by holding the page lock in page_mkwrite() while
* checking fuse_page_is_writeback(). We already hold the page lock
* since clear_page_dirty_for_io() and keep it held until we add the
* request to the fi->writepages list and increment req->num_pages.
* request to the fi->writepages list and increment ap->num_pages.
* After this fuse_page_is_writeback() will indicate that the page is
* under writeback, so we can release the page lock.
*/
if (data->req == NULL) {
if (data->wpa == NULL) {
struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
if (!req) {
wpa = fuse_writepage_args_alloc();
if (!wpa) {
__free_page(tmp_page);
goto out_unlock;
}
data->max_pages = 1;
fuse_write_fill(req, data->ff, page_offset(page), 0);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->misc.write.next = NULL;
req->in.argpages = 1;
__set_bit(FR_BACKGROUND, &req->flags);
req->num_pages = 0;
req->end = fuse_writepage_end;
req->inode = inode;
ap = &wpa->ia.ap;
fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0);
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
wpa->next = NULL;
ap->args.in_pages = true;
ap->args.end = fuse_writepage_end;
ap->num_pages = 0;
wpa->inode = inode;
spin_lock(&fi->lock);
list_add(&req->writepages_entry, &fi->writepages);
list_add(&wpa->writepages_entry, &fi->writepages);
spin_unlock(&fi->lock);
data->req = req;
data->wpa = wpa;
}
set_page_writeback(page);
copy_highpage(tmp_page, page);
req->pages[req->num_pages] = tmp_page;
req->page_descs[req->num_pages].offset = 0;
req->page_descs[req->num_pages].length = PAGE_SIZE;
ap->pages[ap->num_pages] = tmp_page;
ap->descs[ap->num_pages].offset = 0;
ap->descs[ap->num_pages].length = PAGE_SIZE;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
err = 0;
if (is_writeback && fuse_writepage_in_flight(req, page)) {
if (is_writeback && fuse_writepage_in_flight(wpa, page)) {
end_page_writeback(page);
data->req = NULL;
data->wpa = NULL;
goto out_unlock;
}
data->orig_pages[req->num_pages] = page;
data->orig_pages[ap->num_pages] = page;
/*
* Protected by fi->lock against concurrent access by
* fuse_page_is_writeback().
*/
spin_lock(&fi->lock);
req->num_pages++;
ap->num_pages++;
spin_unlock(&fi->lock);
out_unlock:
......@@ -1989,7 +2112,7 @@ static int fuse_writepages(struct address_space *mapping,
goto out;
data.inode = inode;
data.req = NULL;
data.wpa = NULL;
data.ff = NULL;
err = -ENOMEM;
......@@ -2000,9 +2123,9 @@ static int fuse_writepages(struct address_space *mapping,
goto out;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.req) {
if (data.wpa) {
/* Ignore errors if we can write at least one page */
BUG_ON(!data.req->num_pages);
WARN_ON(!data.wpa->ia.ap.num_pages);
fuse_writepages_send(&data);
err = 0;
}
......@@ -2222,11 +2345,11 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
inarg->lk.pid = pid;
if (flock)
inarg->lk_flags |= FUSE_LK_FLOCK;
args->in.h.opcode = opcode;
args->in.h.nodeid = get_node_id(inode);
args->in.numargs = 1;
args->in.args[0].size = sizeof(*inarg);
args->in.args[0].value = inarg;
args->opcode = opcode;
args->nodeid = get_node_id(inode);
args->in_numargs = 1;
args->in_args[0].size = sizeof(*inarg);
args->in_args[0].value = inarg;
}
static int fuse_getlk(struct file *file, struct file_lock *fl)
......@@ -2239,9 +2362,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
int err;
fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (!err)
err = convert_fuse_file_lock(fc, &outarg.lk, fl);
......@@ -2336,14 +2459,14 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
memset(&inarg, 0, sizeof(inarg));
inarg.block = block;
inarg.blocksize = inode->i_sb->s_blocksize;
args.in.h.opcode = FUSE_BMAP;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.opcode = FUSE_BMAP;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS)
fc->no_bmap = 1;
......@@ -2368,14 +2491,14 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
if (fc->no_lseek)
goto fallback;
args.in.h.opcode = FUSE_LSEEK;
args.in.h.nodeid = ff->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.opcode = FUSE_LSEEK;
args.nodeid = ff->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (err) {
if (err == -ENOSYS) {
......@@ -2573,14 +2696,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
.flags = flags
};
struct fuse_ioctl_out outarg;
struct fuse_req *req = NULL;
struct page **pages = NULL;
struct iovec *iov_page = NULL;
struct iovec *in_iov = NULL, *out_iov = NULL;
unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
size_t in_size, out_size, transferred, c;
unsigned int in_iovs = 0, out_iovs = 0, max_pages;
size_t in_size, out_size, c;
ssize_t transferred;
int err, i;
struct iov_iter ii;
struct fuse_args_pages ap = {};
#if BITS_PER_LONG == 32
inarg.flags |= FUSE_IOCTL_32BIT;
......@@ -2598,11 +2721,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page)
if (!ap.pages || !iov_page)
goto out;
fuse_page_descs_length_init(ap.descs, 0, fc->max_pages);
/*
* If restricted, initialize IO parameters as encoded in @cmd.
* RETRY from server is not allowed.
......@@ -2639,56 +2764,44 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -ENOMEM;
if (max_pages > fc->max_pages)
goto out;
while (num_pages < max_pages) {
pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
if (!pages[num_pages])
while (ap.num_pages < max_pages) {
ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
if (!ap.pages[ap.num_pages])
goto out;
num_pages++;
ap.num_pages++;
}
req = fuse_get_req(fc, num_pages);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
goto out;
}
memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
req->num_pages = num_pages;
fuse_page_descs_length_init(req, 0, req->num_pages);
/* okay, let's send it to the client */
req->in.h.opcode = FUSE_IOCTL;
req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
ap.args.opcode = FUSE_IOCTL;
ap.args.nodeid = ff->nodeid;
ap.args.in_numargs = 1;
ap.args.in_args[0].size = sizeof(inarg);
ap.args.in_args[0].value = &inarg;
if (in_size) {
req->in.numargs++;
req->in.args[1].size = in_size;
req->in.argpages = 1;
ap.args.in_numargs++;
ap.args.in_args[1].size = in_size;
ap.args.in_pages = true;
err = -EFAULT;
iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
}
req->out.numargs = 2;
req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
req->out.args[1].size = out_size;
req->out.argpages = 1;
req->out.argvar = 1;
ap.args.out_numargs = 2;
ap.args.out_args[0].size = sizeof(outarg);
ap.args.out_args[0].value = &outarg;
ap.args.out_args[1].size = out_size;
ap.args.out_pages = true;
ap.args.out_argvar = true;
fuse_request_send(fc, req);
err = req->out.h.error;
transferred = req->out.args[1].size;
fuse_put_request(fc, req);
req = NULL;
if (err)
transferred = fuse_simple_request(fc, &ap.args);
err = transferred;
if (transferred < 0)
goto out;
/* did it ask for retry? */
......@@ -2713,7 +2826,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
vaddr = kmap_atomic(pages[0]);
vaddr = kmap_atomic(ap.pages[0]);
err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0);
......@@ -2741,19 +2854,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
err = 0;
out:
if (req)
fuse_put_request(fc, req);
free_page((unsigned long) iov_page);
while (num_pages)
__free_page(pages[--num_pages]);
kfree(pages);
while (ap.num_pages)
__free_page(ap.pages[--ap.num_pages]);
kfree(ap.pages);
return err ? err : outarg.result;
}
......@@ -2861,14 +2972,14 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
fuse_register_polled_file(fc, ff);
}
args.in.h.opcode = FUSE_POLL;
args.in.h.nodeid = ff->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.opcode = FUSE_POLL;
args.nodeid = ff->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (!err)
......@@ -3076,11 +3187,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
args.in.h.opcode = FUSE_FALLOCATE;
args.in.h.nodeid = ff->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.opcode = FUSE_FALLOCATE;
args.nodeid = ff->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_fallocate = 1;
......@@ -3168,14 +3279,14 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
args.in.h.opcode = FUSE_COPY_FILE_RANGE;
args.in.h.nodeid = ff_in->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.opcode = FUSE_COPY_FILE_RANGE;
args.nodeid = ff_in->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_copy_file_range = 1;
......
......@@ -47,9 +47,6 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
/** Number of page pointers embedded in fuse_req */
#define FUSE_REQ_INLINE_PAGES 1
/** List of active connections */
extern struct list_head fuse_conn_list;
......@@ -164,17 +161,15 @@ enum {
};
struct fuse_conn;
struct fuse_release_args;
/** FUSE specific file data */
struct fuse_file {
/** Fuse connection for this file */
struct fuse_conn *fc;
/*
* Request reserved for flush and release.
* Modified under relative fuse_inode::lock.
*/
struct fuse_req *reserved_req;
/* Argument space reserved for release */
struct fuse_release_args *release_args;
/** Kernel file handle guaranteed to be unique */
u64 kh;
......@@ -229,57 +224,12 @@ struct fuse_in_arg {
const void *value;
};
/** The request input */
struct fuse_in {
/** The request header */
struct fuse_in_header h;
/** True if the data for the last argument is in req->pages */
unsigned argpages:1;
/** Number of arguments */
unsigned numargs;
/** Array of arguments */
struct fuse_in_arg args[3];
};
/** One output argument of a request */
struct fuse_arg {
unsigned size;
void *value;
};
/** The request output */
struct fuse_out {
/** Header returned from userspace */
struct fuse_out_header h;
/*
* The following bitfields are not changed during the request
* processing
*/
/** Last argument is variable length (can be shorter than
arg->size) */
unsigned argvar:1;
/** Last argument is a list of pages to copy data to */
unsigned argpages:1;
/** Zero partially or not copied pages */
unsigned page_zeroing:1;
/** Pages may be replaced with new ones */
unsigned page_replace:1;
/** Number or arguments */
unsigned numargs;
/** Array of arguments */
struct fuse_arg args[2];
};
/** FUSE page descriptor */
struct fuse_page_desc {
unsigned int length;
......@@ -287,20 +237,28 @@ struct fuse_page_desc {
};
struct fuse_args {
struct {
struct {
uint32_t opcode;
uint64_t nodeid;
} h;
unsigned numargs;
struct fuse_in_arg args[3];
uint32_t opcode;
unsigned short in_numargs;
unsigned short out_numargs;
bool force:1;
bool noreply:1;
bool nocreds:1;
bool in_pages:1;
bool out_pages:1;
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
};
} in;
struct {
unsigned argvar:1;
unsigned numargs;
struct fuse_arg args[2];
} out;
struct fuse_args_pages {
struct fuse_args args;
struct page **pages;
struct fuse_page_desc *descs;
unsigned int num_pages;
};
#define FUSE_ARGS(args) struct fuse_args args = {}
......@@ -373,83 +331,70 @@ struct fuse_req {
/** Entry on the interrupts list */
struct list_head intr_entry;
/* Input/output arguments */
struct fuse_args *args;
/** refcount */
refcount_t count;
/* Request flags, updated with test/set/clear_bit() */
unsigned long flags;
/** The request input */
struct fuse_in in;
/* The request input header */
struct {
struct fuse_in_header h;
} in;
/** The request output */
struct fuse_out out;
/* The request output header */
struct {
struct fuse_out_header h;
} out;
/** Used to wake up the task waiting for completion of request*/
wait_queue_head_t waitq;
/** Data for asynchronous requests */
union {
struct {
struct fuse_release_in in;
struct inode *inode;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
struct cuse_init_in cuse_init_in;
struct {
struct fuse_read_in in;
u64 attr_ver;
} read;
struct {
struct fuse_write_in in;
struct fuse_write_out out;
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
} misc;
/** page vector */
struct page **pages;
/** page-descriptor vector */
struct fuse_page_desc *page_descs;
/** size of the 'pages' array */
unsigned max_pages;
/** inline page vector */
struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
/** inline page-descriptor vector */
struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
/** number of pages in vector */
unsigned num_pages;
/** File used in the request (or NULL) */
struct fuse_file *ff;
/** Inode used in the request or NULL */
struct inode *inode;
};
/** AIO control block */
struct fuse_io_priv *io;
struct fuse_iqueue;
/** Link on fi->writepages */
struct list_head writepages_entry;
/**
* Input queue callbacks
*
* Input queue signalling is device-specific. For example, the /dev/fuse file
* uses fiq->waitq and fasync to wake processes that are waiting on queue
* readiness. These callbacks allow other device types to respond to input
* queue activity.
*/
struct fuse_iqueue_ops {
/**
* Signal that a forget has been queued
*/
void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
__releases(fiq->lock);
/** Request completion callback */
void (*end)(struct fuse_conn *, struct fuse_req *);
/**
* Signal that an INTERRUPT request has been queued
*/
void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
__releases(fiq->lock);
/** Request is stolen from fuse_file->reserved_req */
struct file *stolen_file;
/**
* Signal that a request has been queued
*/
void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
__releases(fiq->lock);
};
/** /dev/fuse input queue operations */
extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
struct fuse_iqueue {
/** Connection established */
unsigned connected;
/** Lock protecting accesses to members of this structure */
spinlock_t lock;
/** Readers of the connection are waiting on this */
wait_queue_head_t waitq;
......@@ -471,6 +416,12 @@ struct fuse_iqueue {
/** O_ASYNC requests */
struct fasync_struct *fasync;
/** Device-specific callbacks */
const struct fuse_iqueue_ops *ops;
/** Device-specific state */
void *priv;
};
#define FUSE_PQ_HASH_BITS 8
......@@ -504,6 +455,29 @@ struct fuse_dev {
struct list_head entry;
};
struct fuse_fs_context {
int fd;
unsigned int rootmode;
kuid_t user_id;
kgid_t group_id;
bool is_bdev:1;
bool fd_present:1;
bool rootmode_present:1;
bool user_id_present:1;
bool group_id_present:1;
bool default_permissions:1;
bool allow_other:1;
bool destroy:1;
bool no_control:1;
bool no_force_umount:1;
unsigned int max_read;
unsigned int blksize;
const char *subtype;
/* fuse_dev pointer to fill in, should contain NULL on entry */
void **fudptr;
};
/**
* A Fuse connection.
*
......@@ -584,9 +558,6 @@ struct fuse_conn {
/** waitq for blocked connection */
wait_queue_head_t blocked_waitq;
/** waitq for reserved requests */
wait_queue_head_t reserved_req_waitq;
/** Connection established, cleared on umount, connection
abort and device release */
unsigned connected;
......@@ -721,6 +692,18 @@ struct fuse_conn {
/** Does the filesystem support copy_file_range? */
unsigned no_copy_file_range:1;
/* Send DESTROY request */
unsigned int destroy:1;
/* Delete dentries that have gone stale */
unsigned int delete_stale:1;
/** Do not create entry in fusectl fs */
unsigned int no_control:1;
/** Do not allow MNT_FORCE umount */
unsigned int no_force_umount:1;
/** The number of requests waiting for completion */
atomic_t num_waiting;
......@@ -742,9 +725,6 @@ struct fuse_conn {
/** Key for lock owner ID scrambling */
u32 scramble_key[4];
/** Reserved request for the DESTROY message */
struct fuse_req *destroy_req;
/** Version counter for attribute changes */
atomic64_t attr_version;
......@@ -820,14 +800,32 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
struct fuse_forget_link *fuse_alloc_forget(void);
/* Used by READDIRPLUS */
void fuse_force_forget(struct file *file, u64 nodeid);
struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
unsigned int max,
unsigned int *countp);
/**
/*
* Initialize READ or READDIR request
*/
void fuse_read_fill(struct fuse_req *req, struct file *file,
loff_t pos, size_t count, int opcode);
struct fuse_io_args {
union {
struct {
struct fuse_read_in in;
u64 attr_ver;
} read;
struct {
struct fuse_write_in in;
struct fuse_write_out out;
} write;
};
struct fuse_args_pages ap;
struct fuse_io_priv *io;
struct fuse_file *ff;
};
void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
size_t count, int opcode);
/**
* Send OPEN or OPENDIR request
......@@ -899,62 +897,17 @@ void fuse_dev_cleanup(void);
int fuse_ctl_init(void);
void __exit fuse_ctl_cleanup(void);
/**
* Allocate a request
*/
struct fuse_req *fuse_request_alloc(unsigned npages);
struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags);
/**
* Free a request
*/
void fuse_request_free(struct fuse_req *req);
/**
* Get a request, may fail with -ENOMEM,
* caller should specify # elements in req->pages[] explicitly
*/
struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
unsigned npages);
/*
* Increment reference count on request
*/
void __fuse_get_request(struct fuse_req *req);
/**
* Gets a requests for a file operation, always succeeds
*/
struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
struct file *file);
/**
* Decrement reference count of a request. If count goes to zero free
* the request.
*/
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
/**
* Send a request (synchronous)
*/
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
/**
* Simple request sending that does request allocation and freeing
*/
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
gfp_t gfp_flags);
/**
* Send a request in the background
* End a finished request
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req);
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc);
......@@ -980,15 +933,33 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
/**
* Initialize fuse_conn
*/
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
/**
* Release reference to fuse_conn
*/
void fuse_conn_put(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc(void);
void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
void fuse_dev_free(struct fuse_dev *fud);
void fuse_send_init(struct fuse_conn *fc);
/**
* Fill in superblock and initialize fuse connection
* @sb: partially-initialized superblock to fill in
* @ctx: mount context
*/
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
/**
* Disassociate fuse connection from superblock and kill the superblock
*
* Calls kill_anon_super(), do not use with bdev mounts.
*/
void fuse_kill_sb_anon(struct super_block *sb);
/**
* Add connection to control filesystem
......@@ -1093,4 +1064,15 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
/* readdir.c */
int fuse_readdir(struct file *file, struct dir_context *ctx);
/**
* Return the number of bytes in an arguments list
*/
unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
/**
* Get the next unique ID for a request
*/
u64 fuse_get_unique(struct fuse_iqueue *fiq);
void fuse_free_conn(struct fuse_conn *fc);
#endif /* _FS_FUSE_I_H */
......@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/parser.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/sched.h>
......@@ -59,24 +60,13 @@ MODULE_PARM_DESC(max_user_congthresh,
/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
struct fuse_mount_data {
int fd;
unsigned rootmode;
kuid_t user_id;
kgid_t group_id;
unsigned fd_present:1;
unsigned rootmode_present:1;
unsigned user_id_present:1;
unsigned group_id_present:1;
unsigned default_permissions:1;
unsigned allow_other:1;
unsigned max_read;
unsigned blksize;
};
#ifdef CONFIG_BLOCK
static struct file_system_type fuseblk_fs_type;
#endif
struct fuse_forget_link *fuse_alloc_forget(void)
{
return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
}
static struct inode *fuse_alloc_inode(struct super_block *sb)
......@@ -374,19 +364,21 @@ void fuse_unlock_inode(struct inode *inode, bool locked)
static void fuse_umount_begin(struct super_block *sb)
{
fuse_abort_conn(get_fuse_conn_super(sb));
struct fuse_conn *fc = get_fuse_conn_super(sb);
if (!fc->no_force_umount)
fuse_abort_conn(fc);
}
static void fuse_send_destroy(struct fuse_conn *fc)
{
struct fuse_req *req = fc->destroy_req;
if (req && fc->conn_init) {
fc->destroy_req = NULL;
req->in.h.opcode = FUSE_DESTROY;
__set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(fc, req);
fuse_put_request(fc, req);
if (fc->conn_init) {
FUSE_ARGS(args);
args.opcode = FUSE_DESTROY;
args.force = true;
args.nocreds = true;
fuse_simple_request(fc, &args);
}
}
......@@ -430,12 +422,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
}
memset(&outarg, 0, sizeof(outarg));
args.in.numargs = 0;
args.in.h.opcode = FUSE_STATFS;
args.in.h.nodeid = get_node_id(d_inode(dentry));
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.in_numargs = 0;
args.opcode = FUSE_STATFS;
args.nodeid = get_node_id(d_inode(dentry));
args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (!err)
convert_fuse_statfs(buf, &outarg.st);
......@@ -443,6 +435,8 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
}
enum {
OPT_SOURCE,
OPT_SUBTYPE,
OPT_FD,
OPT_ROOTMODE,
OPT_USER_ID,
......@@ -454,111 +448,109 @@ enum {
OPT_ERR
};
static const match_table_t tokens = {
{OPT_FD, "fd=%u"},
{OPT_ROOTMODE, "rootmode=%o"},
{OPT_USER_ID, "user_id=%u"},
{OPT_GROUP_ID, "group_id=%u"},
{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_ALLOW_OTHER, "allow_other"},
{OPT_MAX_READ, "max_read=%u"},
{OPT_BLKSIZE, "blksize=%u"},
{OPT_ERR, NULL}
static const struct fs_parameter_spec fuse_param_specs[] = {
fsparam_string ("source", OPT_SOURCE),
fsparam_u32 ("fd", OPT_FD),
fsparam_u32oct ("rootmode", OPT_ROOTMODE),
fsparam_u32 ("user_id", OPT_USER_ID),
fsparam_u32 ("group_id", OPT_GROUP_ID),
fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS),
fsparam_flag ("allow_other", OPT_ALLOW_OTHER),
fsparam_u32 ("max_read", OPT_MAX_READ),
fsparam_u32 ("blksize", OPT_BLKSIZE),
fsparam_string ("subtype", OPT_SUBTYPE),
{}
};
static int fuse_match_uint(substring_t *s, unsigned int *res)
{
int err = -ENOMEM;
char *buf = match_strdup(s);
if (buf) {
err = kstrtouint(buf, 10, res);
kfree(buf);
}
return err;
}
static const struct fs_parameter_description fuse_fs_parameters = {
.name = "fuse",
.specs = fuse_param_specs,
};
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
struct user_namespace *user_ns)
static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
char *p;
memset(d, 0, sizeof(struct fuse_mount_data));
d->max_read = ~0;
d->blksize = FUSE_DEFAULT_BLKSIZE;
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case OPT_FD:
if (match_int(&args[0], &value))
struct fs_parse_result result;
struct fuse_fs_context *ctx = fc->fs_private;
int opt;
opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case OPT_SOURCE:
if (fc->source)
return invalf(fc, "fuse: Multiple sources specified");
fc->source = param->string;
param->string = NULL;
break;
case OPT_SUBTYPE:
if (ctx->subtype)
return invalf(fc, "fuse: Multiple subtypes specified");
ctx->subtype = param->string;
param->string = NULL;
return 0;
d->fd = value;
d->fd_present = 1;
case OPT_FD:
ctx->fd = result.uint_32;
ctx->fd_present = 1;
break;
case OPT_ROOTMODE:
if (match_octal(&args[0], &value))
return 0;
if (!fuse_valid_type(value))
return 0;
d->rootmode = value;
d->rootmode_present = 1;
if (!fuse_valid_type(result.uint_32))
return invalf(fc, "fuse: Invalid rootmode");
ctx->rootmode = result.uint_32;
ctx->rootmode_present = 1;
break;
case OPT_USER_ID:
if (fuse_match_uint(&args[0], &uv))
return 0;
d->user_id = make_kuid(user_ns, uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
if (!uid_valid(ctx->user_id))
return invalf(fc, "fuse: Invalid user_id");
ctx->user_id_present = 1;
break;
case OPT_GROUP_ID:
if (fuse_match_uint(&args[0], &uv))
return 0;
d->group_id = make_kgid(user_ns, uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
if (!gid_valid(ctx->group_id))
return invalf(fc, "fuse: Invalid group_id");
ctx->group_id_present = 1;
break;
case OPT_DEFAULT_PERMISSIONS:
d->default_permissions = 1;
ctx->default_permissions = 1;
break;
case OPT_ALLOW_OTHER:
d->allow_other = 1;
ctx->allow_other = 1;
break;
case OPT_MAX_READ:
if (match_int(&args[0], &value))
return 0;
d->max_read = value;
ctx->max_read = result.uint_32;
break;
case OPT_BLKSIZE:
if (!is_bdev || match_int(&args[0], &value))
return 0;
d->blksize = value;
if (!ctx->is_bdev)
return invalf(fc, "fuse: blksize only supported for fuseblk");
ctx->blksize = result.uint_32;
break;
default:
return 0;
}
return -EINVAL;
}
if (!d->fd_present || !d->rootmode_present ||
!d->user_id_present || !d->group_id_present)
return 0;
}
return 1;
static void fuse_free_fc(struct fs_context *fc)
{
struct fuse_fs_context *ctx = fc->fs_private;
if (ctx) {
kfree(ctx->subtype);
kfree(ctx);
}
}
static int fuse_show_options(struct seq_file *m, struct dentry *root)
......@@ -579,14 +571,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
static void fuse_iqueue_init(struct fuse_iqueue *fiq)
static void fuse_iqueue_init(struct fuse_iqueue *fiq,
const struct fuse_iqueue_ops *ops,
void *priv)
{
memset(fiq, 0, sizeof(struct fuse_iqueue));
spin_lock_init(&fiq->lock);
init_waitqueue_head(&fiq->waitq);
INIT_LIST_HEAD(&fiq->pending);
INIT_LIST_HEAD(&fiq->interrupts);
fiq->forget_list_tail = &fiq->forget_list_head;
fiq->connected = 1;
fiq->ops = ops;
fiq->priv = priv;
}
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
......@@ -600,7 +597,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
fpq->connected = 1;
}
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
{
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
......@@ -609,8 +607,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1);
init_waitqueue_head(&fc->blocked_waitq);
init_waitqueue_head(&fc->reserved_req_waitq);
fuse_iqueue_init(&fc->iq);
fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
INIT_LIST_HEAD(&fc->bg_queue);
INIT_LIST_HEAD(&fc->entry);
INIT_LIST_HEAD(&fc->devices);
......@@ -633,8 +630,6 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
void fuse_conn_put(struct fuse_conn *fc)
{
if (refcount_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
put_pid_ns(fc->pid_ns);
put_user_ns(fc->user_ns);
fc->release(fc);
......@@ -822,9 +817,12 @@ static const struct super_operations fuse_super_operations = {
static void sanitize_global_limit(unsigned *limit)
{
/*
* The default maximum number of async requests is calculated to consume
* 1/2^13 of the total memory, assuming 392 bytes per request.
*/
if (*limit == 0)
*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
sizeof(struct fuse_req);
*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
if (*limit >= 1 << 16)
*limit = (1 << 16) - 1;
......@@ -870,11 +868,19 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
spin_unlock(&fc->bg_lock);
}
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_init_args {
struct fuse_args args;
struct fuse_init_in in;
struct fuse_init_out out;
};
static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
struct fuse_init_out *arg = &req->misc.init_out;
struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
struct fuse_init_out *arg = &ia->out;
if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
if (error || arg->major != FUSE_KERNEL_VERSION)
fc->conn_error = 1;
else {
unsigned long ra_pages;
......@@ -951,18 +957,23 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->max_write = max_t(unsigned, 4096, fc->max_write);
fc->conn_init = 1;
}
kfree(ia);
fuse_set_initialized(fc);
wake_up_all(&fc->blocked_waitq);
}
static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
void fuse_send_init(struct fuse_conn *fc)
{
struct fuse_init_in *arg = &req->misc.init_in;
struct fuse_init_args *ia;
ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
ia->in.major = FUSE_KERNEL_VERSION;
ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
ia->in.flags |=
FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
......@@ -971,26 +982,32 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
req->in.args[0].value = arg;
req->out.numargs = 1;
ia->args.opcode = FUSE_INIT;
ia->args.in_numargs = 1;
ia->args.in_args[0].size = sizeof(ia->in);
ia->args.in_args[0].value = &ia->in;
ia->args.out_numargs = 1;
/* Variable length argument used for backward compatibility
with interface version < 7.5. Rest of init_out is zeroed
by do_get_request(), so a short reply is not a problem */
req->out.argvar = 1;
req->out.args[0].size = sizeof(struct fuse_init_out);
req->out.args[0].value = &req->misc.init_out;
req->end = process_init_reply;
fuse_request_send_background(fc, req);
ia->args.out_argvar = 1;
ia->args.out_args[0].size = sizeof(ia->out);
ia->args.out_args[0].value = &ia->out;
ia->args.force = true;
ia->args.nocreds = true;
ia->args.end = process_init_reply;
if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
process_init_reply(fc, &ia->args, -ENOTCONN);
}
EXPORT_SYMBOL_GPL(fuse_send_init);
static void fuse_free_conn(struct fuse_conn *fc)
void fuse_free_conn(struct fuse_conn *fc)
{
WARN_ON(!list_empty(&fc->devices));
kfree_rcu(fc, rcu);
}
EXPORT_SYMBOL_GPL(fuse_free_conn);
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
......@@ -1032,7 +1049,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
return 0;
}
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
struct fuse_dev *fuse_dev_alloc(void)
{
struct fuse_dev *fud;
struct list_head *pq;
......@@ -1048,16 +1065,33 @@ struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
}
fud->pq.processing = pq;
fud->fc = fuse_conn_get(fc);
fuse_pqueue_init(&fud->pq);
return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);
void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
{
fud->fc = fuse_conn_get(fc);
spin_lock(&fc->lock);
list_add_tail(&fud->entry, &fc->devices);
spin_unlock(&fc->lock);
}
EXPORT_SYMBOL_GPL(fuse_dev_install);
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
{
struct fuse_dev *fud;
fud = fuse_dev_alloc();
if (!fud)
return NULL;
fuse_dev_install(fud, fc);
return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);
EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
void fuse_dev_free(struct fuse_dev *fud)
{
......@@ -1075,17 +1109,13 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
{
struct fuse_dev *fud;
struct fuse_conn *fc;
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct inode *root;
struct fuse_mount_data d;
struct file *file;
struct dentry *root_dentry;
struct fuse_req *init_req;
int err;
int is_bdev = sb->s_bdev != NULL;
err = -EINVAL;
if (sb->s_flags & SB_MANDLOCK)
......@@ -1093,19 +1123,19 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
goto err;
if (is_bdev) {
if (ctx->is_bdev) {
#ifdef CONFIG_BLOCK
err = -EINVAL;
if (!sb_set_blocksize(sb, d.blksize))
if (!sb_set_blocksize(sb, ctx->blksize))
goto err;
#endif
} else {
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
}
sb->s_subtype = ctx->subtype;
ctx->subtype = NULL;
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_xattr = fuse_xattr_handlers;
......@@ -1116,19 +1146,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
file = fget(d.fd);
err = -EINVAL;
if (!file)
goto err;
/*
* Require mount to happen from the same user namespace which
* opened /dev/fuse to prevent potential attacks.
*/
if (file->f_op != &fuse_dev_operations ||
file->f_cred->user_ns != sb->s_user_ns)
goto err_fput;
/*
* If we are not in the initial user namespace posix
* acls must be translated.
......@@ -1136,17 +1153,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_user_ns != &init_user_ns)
sb->s_xattr = fuse_no_acl_xattr_handlers;
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
err = -ENOMEM;
if (!fc)
goto err_fput;
fuse_conn_init(fc, sb->s_user_ns);
fc->release = fuse_free_conn;
fud = fuse_dev_alloc(fc);
fud = fuse_dev_alloc_install(fc);
if (!fud)
goto err_put_conn;
goto err;
fc->dev = sb->s_dev;
fc->sb = sb;
......@@ -1159,17 +1168,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->dont_mask = 1;
sb->s_flags |= SB_POSIXACL;
fc->default_permissions = d.default_permissions;
fc->allow_other = d.allow_other;
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read);
/* Used by get_root_inode() */
sb->s_fs_info = fc;
fc->default_permissions = ctx->default_permissions;
fc->allow_other = ctx->allow_other;
fc->user_id = ctx->user_id;
fc->group_id = ctx->group_id;
fc->max_read = max_t(unsigned, 4096, ctx->max_read);
fc->destroy = ctx->destroy;
fc->no_control = ctx->no_control;
fc->no_force_umount = ctx->no_force_umount;
err = -ENOMEM;
root = fuse_get_root_inode(sb, d.rootmode);
root = fuse_get_root_inode(sb, ctx->rootmode);
sb->s_d_op = &fuse_root_dentry_operations;
root_dentry = d_make_root(root);
if (!root_dentry)
......@@ -1177,20 +1186,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
/* Root dentry doesn't have .d_revalidate */
sb->s_d_op = &fuse_dentry_operations;
init_req = fuse_request_alloc(0);
if (!init_req)
goto err_put_root;
__set_bit(FR_BACKGROUND, &init_req->flags);
if (is_bdev) {
fc->destroy_req = fuse_request_alloc(0);
if (!fc->destroy_req)
goto err_free_init_req;
}
mutex_lock(&fuse_mutex);
err = -EINVAL;
if (file->private_data)
if (*ctx->fudptr)
goto err_unlock;
err = fuse_ctl_add_conn(fc);
......@@ -1199,27 +1197,62 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
list_add_tail(&fc->entry, &fuse_conn_list);
sb->s_root = root_dentry;
file->private_data = fud;
*ctx->fudptr = fud;
mutex_unlock(&fuse_mutex);
return 0;
err_unlock:
mutex_unlock(&fuse_mutex);
dput(root_dentry);
err_dev_free:
fuse_dev_free(fud);
err:
return err;
}
EXPORT_SYMBOL_GPL(fuse_fill_super_common);
static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fsc->fs_private;
struct file *file;
int err;
struct fuse_conn *fc;
err = -EINVAL;
file = fget(ctx->fd);
if (!file)
goto err;
/*
* Require mount to happen from the same user namespace which
* opened /dev/fuse to prevent potential attacks.
*/
if ((file->f_op != &fuse_dev_operations) ||
(file->f_cred->user_ns != sb->s_user_ns))
goto err_fput;
ctx->fudptr = &file->private_data;
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
err = -ENOMEM;
if (!fc)
goto err_fput;
fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
fc->release = fuse_free_conn;
sb->s_fs_info = fc;
err = fuse_fill_super_common(sb, ctx);
if (err)
goto err_put_conn;
/*
* atomic_dec_and_test() in fput() provides the necessary
* memory barrier for file->private_data to be visible on all
* CPUs after this
*/
fput(file);
fuse_send_init(fc, init_req);
fuse_send_init(get_fuse_conn_super(sb));
return 0;
err_unlock:
mutex_unlock(&fuse_mutex);
err_free_init_req:
fuse_request_free(init_req);
err_put_root:
dput(root_dentry);
err_dev_free:
fuse_dev_free(fud);
err_put_conn:
fuse_conn_put(fc);
sb->s_fs_info = NULL;
......@@ -1229,11 +1262,52 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
static struct dentry *fuse_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *raw_data)
static int fuse_get_tree(struct fs_context *fc)
{
struct fuse_fs_context *ctx = fc->fs_private;
if (!ctx->fd_present || !ctx->rootmode_present ||
!ctx->user_id_present || !ctx->group_id_present)
return -EINVAL;
#ifdef CONFIG_BLOCK
if (ctx->is_bdev)
return get_tree_bdev(fc, fuse_fill_super);
#endif
return get_tree_nodev(fc, fuse_fill_super);
}
static const struct fs_context_operations fuse_context_ops = {
.free = fuse_free_fc,
.parse_param = fuse_parse_param,
.get_tree = fuse_get_tree,
};
/*
* Set up the filesystem mount context.
*/
static int fuse_init_fs_context(struct fs_context *fc)
{
return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
struct fuse_fs_context *ctx;
ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->max_read = ~0;
ctx->blksize = FUSE_DEFAULT_BLKSIZE;
#ifdef CONFIG_BLOCK
if (fc->fs_type == &fuseblk_fs_type) {
ctx->is_bdev = true;
ctx->destroy = true;
}
#endif
fc->fs_private = ctx;
fc->ops = &fuse_context_ops;
return 0;
}
static void fuse_sb_destroy(struct super_block *sb)
......@@ -1241,6 +1315,7 @@ static void fuse_sb_destroy(struct super_block *sb)
struct fuse_conn *fc = get_fuse_conn_super(sb);
if (fc) {
if (fc->destroy)
fuse_send_destroy(fc);
fuse_abort_conn(fc);
......@@ -1252,29 +1327,24 @@ static void fuse_sb_destroy(struct super_block *sb)
}
}
static void fuse_kill_sb_anon(struct super_block *sb)
void fuse_kill_sb_anon(struct super_block *sb)
{
fuse_sb_destroy(sb);
kill_anon_super(sb);
}
EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
.mount = fuse_mount,
.init_fs_context = fuse_init_fs_context,
.parameters = &fuse_fs_parameters,
.kill_sb = fuse_kill_sb_anon,
};
MODULE_ALIAS_FS("fuse");
#ifdef CONFIG_BLOCK
static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *raw_data)
{
return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
}
static void fuse_kill_sb_blk(struct super_block *sb)
{
fuse_sb_destroy(sb);
......@@ -1284,7 +1354,8 @@ static void fuse_kill_sb_blk(struct super_block *sb)
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
.mount = fuse_mount_blk,
.init_fs_context = fuse_init_fs_context,
.parameters = &fuse_fs_parameters,
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
......
......@@ -249,6 +249,27 @@ static int fuse_direntplus_link(struct file *file,
return 0;
}
static void fuse_force_forget(struct file *file, u64 nodeid)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_forget_in inarg;
FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.nlookup = 1;
args.opcode = FUSE_FORGET;
args.nodeid = nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.force = true;
args.noreply = true;
fuse_simple_request(fc, &args);
/* ignore errors */
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{
......@@ -295,62 +316,55 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
int plus;
ssize_t res;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
u64 attr_version = 0;
bool locked;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
if (!page)
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
ap->args.out_pages = 1;
ap->num_pages = 1;
ap->pages = &page;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
res = fuse_simple_request(fc, &ap->args);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (!nbytes) {
if (res >= 0) {
if (!res) {
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
res = parse_dirplusfile(page_address(page), res,
file, ctx, attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
res = parse_dirfile(page_address(page), res, file,
ctx);
}
}
__free_page(page);
fuse_invalidate_atime(inode);
return err;
return res;
}
enum fuse_parse_result {
......@@ -372,11 +386,13 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
for (;;) {
struct fuse_dirent *dirent = addr + offset;
unsigned int nbytes = size - offset;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
size_t reclen;
if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
break;
reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
return FOUND_ERR;
if (WARN_ON(reclen > nbytes))
......
......@@ -25,15 +25,15 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
args.in.h.opcode = FUSE_SETXATTR;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 3;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = strlen(name) + 1;
args.in.args[1].value = name;
args.in.args[2].size = size;
args.in.args[2].value = value;
args.opcode = FUSE_SETXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 3;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = strlen(name) + 1;
args.in_args[1].value = name;
args.in_args[2].size = size;
args.in_args[2].value = value;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_setxattr = 1;
......@@ -60,22 +60,22 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
args.in.h.opcode = FUSE_GETXATTR;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 2;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.in.args[1].size = strlen(name) + 1;
args.in.args[1].value = name;
args.opcode = FUSE_GETXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 2;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.in_args[1].size = strlen(name) + 1;
args.in_args[1].value = name;
/* This is really two different operations rolled into one */
args.out.numargs = 1;
args.out_numargs = 1;
if (size) {
args.out.argvar = 1;
args.out.args[0].size = size;
args.out.args[0].value = value;
args.out_argvar = true;
args.out_args[0].size = size;
args.out_args[0].value = value;
} else {
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
ret = fuse_simple_request(fc, &args);
if (!ret && !size)
......@@ -121,20 +121,20 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
args.in.h.opcode = FUSE_LISTXATTR;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.opcode = FUSE_LISTXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
/* This is really two different operations rolled into one */
args.out.numargs = 1;
args.out_numargs = 1;
if (size) {
args.out.argvar = 1;
args.out.args[0].size = size;
args.out.args[0].value = list;
args.out_argvar = true;
args.out_args[0].size = size;
args.out_args[0].value = list;
} else {
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg;
}
ret = fuse_simple_request(fc, &args);
if (!ret && !size)
......@@ -157,11 +157,11 @@ int fuse_removexattr(struct inode *inode, const char *name)
if (fc->no_removexattr)
return -EOPNOTSUPP;
args.in.h.opcode = FUSE_REMOVEXATTR;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
args.in.args[0].size = strlen(name) + 1;
args.in.args[0].value = name;
args.opcode = FUSE_REMOVEXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 1;
args.in_args[0].size = strlen(name) + 1;
args.in_args[0].value = name;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_removexattr = 1;
......
......@@ -2802,8 +2802,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
put_filesystem(type);
return -EINVAL;
}
} else {
subtype = "";
}
}
......
......@@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s)
static void show_type(struct seq_file *m, struct super_block *sb)
{
mangle(m, sb->s_type->name);
if (sb->s_subtype && sb->s_subtype[0]) {
if (sb->s_subtype) {
seq_putc(m, '.');
mangle(m, sb->s_subtype);
}
......
......@@ -1555,11 +1555,6 @@ int vfs_get_tree(struct fs_context *fc)
sb = fc->root->d_sb;
WARN_ON(!sb->s_bdi);
if (fc->subtype && !sb->s_subtype) {
sb->s_subtype = fc->subtype;
fc->subtype = NULL;
}
/*
* Write barrier is for super_cache_count(). We place it before setting
* SB_BORN as the data dependency between the two functions is the
......
......@@ -95,7 +95,6 @@ struct fs_context {
const struct cred *cred; /* The mounter's credentials */
struct fc_log *log; /* Logging buffer */
const char *source; /* The source name (eg. dev path) */
const char *subtype; /* The subtype to set on the superblock */
void *security; /* Linux S&M options */
void *s_fs_info; /* Proposed s_fs_info */
unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
......
......@@ -425,6 +425,10 @@ enum fuse_opcode {
/* CUSE specific operations */
CUSE_INIT = 4096,
/* Reserved opcodes: helpful to detect structure endian-ness */
CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */
FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */
};
enum fuse_notify_code {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment