Commit 9b5cf826 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:
 "As well as the usual bug fixes, this adds the following new features:

   - cached readdir and readlink

   - max I/O size increased from 128k to 1M

   - improved performance and scalability of request queues

   - copy_file_range support

  The only non-fuse bits are trivial cleanups of macros in
  <linux/bitops.h>"

* tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (31 commits)
  fuse: enable caching of symlinks
  fuse: only invalidate atime in direct read
  fuse: don't need GETATTR after every READ
  fuse: allow fine grained attr cache invaldation
  bitops: protect variables in bit_clear_unless() macro
  bitops: protect variables in set_mask_bits() macro
  fuse: realloc page array
  fuse: add max_pages to init_out
  fuse: allocate page array more efficiently
  fuse: reduce size of struct fuse_inode
  fuse: use iversion for readdir cache verification
  fuse: use mtime for readdir cache verification
  fuse: add readdir cache version
  fuse: allow using readdir cache
  fuse: allow caching readdir
  fuse: extract fuse_emit() helper
  fuse: add FOPEN_CACHE_DIR
  fuse: split out readdir.c
  fuse: Use hash table to link processing request
  fuse: kill req->intr_unique
  ...
parents 31990f0f 5571f1e6
...@@ -5,4 +5,4 @@ ...@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_CUSE) += cuse.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
...@@ -107,7 +107,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file, ...@@ -107,7 +107,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file,
if (!fc) if (!fc)
return 0; return 0;
val = fc->max_background; val = READ_ONCE(fc->max_background);
fuse_conn_put(fc); fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val); return fuse_conn_limit_read(file, buf, len, ppos, val);
...@@ -125,7 +125,12 @@ static ssize_t fuse_conn_max_background_write(struct file *file, ...@@ -125,7 +125,12 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
if (ret > 0) { if (ret > 0) {
struct fuse_conn *fc = fuse_ctl_file_conn_get(file); struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) { if (fc) {
spin_lock(&fc->bg_lock);
fc->max_background = val; fc->max_background = val;
fc->blocked = fc->num_background >= fc->max_background;
if (!fc->blocked)
wake_up(&fc->blocked_waitq);
spin_unlock(&fc->bg_lock);
fuse_conn_put(fc); fuse_conn_put(fc);
} }
} }
...@@ -144,7 +149,7 @@ static ssize_t fuse_conn_congestion_threshold_read(struct file *file, ...@@ -144,7 +149,7 @@ static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
if (!fc) if (!fc)
return 0; return 0;
val = fc->congestion_threshold; val = READ_ONCE(fc->congestion_threshold);
fuse_conn_put(fc); fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val); return fuse_conn_limit_read(file, buf, len, ppos, val);
...@@ -155,18 +160,31 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, ...@@ -155,18 +160,31 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
unsigned uninitialized_var(val); unsigned uninitialized_var(val);
struct fuse_conn *fc;
ssize_t ret; ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val, ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
max_user_congthresh); max_user_congthresh);
if (ret > 0) { if (ret <= 0)
struct fuse_conn *fc = fuse_ctl_file_conn_get(file); goto out;
if (fc) { fc = fuse_ctl_file_conn_get(file);
if (!fc)
goto out;
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val; fc->congestion_threshold = val;
fuse_conn_put(fc); if (fc->sb) {
if (fc->num_background < fc->congestion_threshold) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
} else {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
} }
} }
spin_unlock(&fc->bg_lock);
fuse_conn_put(fc);
out:
return ret; return ret;
} }
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse"); MODULE_ALIAS("devname:fuse");
/* Ordinary requests have even IDs, while interrupts IDs are odd */
#define FUSE_INT_REQ_BIT (1ULL << 0)
#define FUSE_REQ_ID_STEP (1ULL << 1)
static struct kmem_cache *fuse_req_cachep; static struct kmem_cache *fuse_req_cachep;
static struct fuse_dev *fuse_get_dev(struct file *file) static struct fuse_dev *fuse_get_dev(struct file *file)
...@@ -40,9 +44,6 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, ...@@ -40,9 +44,6 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
struct fuse_page_desc *page_descs, struct fuse_page_desc *page_descs,
unsigned npages) unsigned npages)
{ {
memset(req, 0, sizeof(*req));
memset(pages, 0, sizeof(*pages) * npages);
memset(page_descs, 0, sizeof(*page_descs) * npages);
INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry); INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq); init_waitqueue_head(&req->waitq);
...@@ -53,31 +54,37 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, ...@@ -53,31 +54,37 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
__set_bit(FR_PENDING, &req->flags); __set_bit(FR_PENDING, &req->flags);
} }
static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
{ {
struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
if (req) {
struct page **pages; struct page **pages;
struct fuse_page_desc *page_descs;
if (npages <= FUSE_REQ_INLINE_PAGES) { pages = kzalloc(npages * (sizeof(struct page *) +
pages = req->inline_pages; sizeof(struct fuse_page_desc)), flags);
page_descs = req->inline_page_descs; *desc = (void *) pages + npages * sizeof(struct page *);
} else {
pages = kmalloc_array(npages, sizeof(struct page *),
flags);
page_descs =
kmalloc_array(npages,
sizeof(struct fuse_page_desc),
flags);
}
if (!pages || !page_descs) { return pages;
kfree(pages); }
kfree(page_descs);
static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
{
struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
if (req) {
struct page **pages = NULL;
struct fuse_page_desc *page_descs = NULL;
WARN_ON(npages > FUSE_MAX_MAX_PAGES);
if (npages > FUSE_REQ_INLINE_PAGES) {
pages = fuse_req_pages_alloc(npages, flags,
&page_descs);
if (!pages) {
kmem_cache_free(fuse_req_cachep, req); kmem_cache_free(fuse_req_cachep, req);
return NULL; return NULL;
} }
} else if (npages) {
pages = req->inline_pages;
page_descs = req->inline_page_descs;
}
fuse_request_init(req, pages, page_descs, npages); fuse_request_init(req, pages, page_descs, npages);
} }
...@@ -95,12 +102,41 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages) ...@@ -95,12 +102,41 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
return __fuse_request_alloc(npages, GFP_NOFS); return __fuse_request_alloc(npages, GFP_NOFS);
} }
void fuse_request_free(struct fuse_req *req) static void fuse_req_pages_free(struct fuse_req *req)
{ {
if (req->pages != req->inline_pages) { if (req->pages != req->inline_pages)
kfree(req->pages); kfree(req->pages);
kfree(req->page_descs); }
}
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags)
{
struct page **pages;
struct fuse_page_desc *page_descs;
unsigned int npages = min_t(unsigned int,
max_t(unsigned int, req->max_pages * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
WARN_ON(npages <= req->max_pages);
pages = fuse_req_pages_alloc(npages, flags, &page_descs);
if (!pages)
return false;
memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
memcpy(page_descs, req->page_descs,
sizeof(struct fuse_page_desc) * req->max_pages);
fuse_req_pages_free(req);
req->pages = pages;
req->page_descs = page_descs;
req->max_pages = npages;
return true;
}
void fuse_request_free(struct fuse_req *req)
{
fuse_req_pages_free(req);
kmem_cache_free(fuse_req_cachep, req); kmem_cache_free(fuse_req_cachep, req);
} }
...@@ -235,8 +271,10 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) ...@@ -235,8 +271,10 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
struct file *file = req->stolen_file; struct file *file = req->stolen_file;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
WARN_ON(req->max_pages);
spin_lock(&fc->lock); spin_lock(&fc->lock);
fuse_request_init(req, req->pages, req->page_descs, req->max_pages); memset(req, 0, sizeof(*req));
fuse_request_init(req, NULL, NULL, 0);
BUG_ON(ff->reserved_req); BUG_ON(ff->reserved_req);
ff->reserved_req = req; ff->reserved_req = req;
wake_up_all(&fc->reserved_req_waitq); wake_up_all(&fc->reserved_req_waitq);
...@@ -287,10 +325,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) ...@@ -287,10 +325,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
* We get here in the unlikely case that a background * We get here in the unlikely case that a background
* request was allocated but not sent * request was allocated but not sent
*/ */
spin_lock(&fc->lock); spin_lock(&fc->bg_lock);
if (!fc->blocked) if (!fc->blocked)
wake_up(&fc->blocked_waitq); wake_up(&fc->blocked_waitq);
spin_unlock(&fc->lock); spin_unlock(&fc->bg_lock);
} }
if (test_bit(FR_WAITING, &req->flags)) { if (test_bit(FR_WAITING, &req->flags)) {
...@@ -319,7 +357,13 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) ...@@ -319,7 +357,13 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
static u64 fuse_get_unique(struct fuse_iqueue *fiq) static u64 fuse_get_unique(struct fuse_iqueue *fiq)
{ {
return ++fiq->reqctr; fiq->reqctr += FUSE_REQ_ID_STEP;
return fiq->reqctr;
}
static unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
} }
static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
...@@ -353,12 +397,13 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, ...@@ -353,12 +397,13 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
static void flush_bg_queue(struct fuse_conn *fc) static void flush_bg_queue(struct fuse_conn *fc)
{ {
struct fuse_iqueue *fiq = &fc->iq;
while (fc->active_background < fc->max_background && while (fc->active_background < fc->max_background &&
!list_empty(&fc->bg_queue)) { !list_empty(&fc->bg_queue)) {
struct fuse_req *req; struct fuse_req *req;
struct fuse_iqueue *fiq = &fc->iq;
req = list_entry(fc->bg_queue.next, struct fuse_req, list); req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
list_del(&req->list); list_del(&req->list);
fc->active_background++; fc->active_background++;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->waitq.lock);
...@@ -389,14 +434,21 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -389,14 +434,21 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_PENDING, &req->flags));
WARN_ON(test_bit(FR_SENT, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags));
if (test_bit(FR_BACKGROUND, &req->flags)) { if (test_bit(FR_BACKGROUND, &req->flags)) {
spin_lock(&fc->lock); spin_lock(&fc->bg_lock);
clear_bit(FR_BACKGROUND, &req->flags); clear_bit(FR_BACKGROUND, &req->flags);
if (fc->num_background == fc->max_background) if (fc->num_background == fc->max_background) {
fc->blocked = 0; fc->blocked = 0;
/* Wake up next waiter, if any */
if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq); wake_up(&fc->blocked_waitq);
} else if (!fc->blocked) {
/*
* Wake up next waiter, if any. It's okay to use
* waitqueue_active(), as we've already synced up
* fc->blocked with waiters with the wake_up() call
* above.
*/
if (waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);
}
if (fc->num_background == fc->congestion_threshold && fc->sb) { if (fc->num_background == fc->congestion_threshold && fc->sb) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
...@@ -405,7 +457,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -405,7 +457,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
fc->num_background--; fc->num_background--;
fc->active_background--; fc->active_background--;
flush_bg_queue(fc); flush_bg_queue(fc);
spin_unlock(&fc->lock); spin_unlock(&fc->bg_lock);
} }
wake_up(&req->waitq); wake_up(&req->waitq);
if (req->end) if (req->end)
...@@ -573,20 +625,18 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) ...@@ -573,20 +625,18 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
return ret; return ret;
} }
/* bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
* Called under fc->lock
*
* fc->connected must have been checked previously
*/
void fuse_request_send_background_locked(struct fuse_conn *fc,
struct fuse_req *req)
{ {
BUG_ON(!test_bit(FR_BACKGROUND, &req->flags)); bool queued = false;
WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
if (!test_bit(FR_WAITING, &req->flags)) { if (!test_bit(FR_WAITING, &req->flags)) {
__set_bit(FR_WAITING, &req->flags); __set_bit(FR_WAITING, &req->flags);
atomic_inc(&fc->num_waiting); atomic_inc(&fc->num_waiting);
} }
__set_bit(FR_ISREPLY, &req->flags); __set_bit(FR_ISREPLY, &req->flags);
spin_lock(&fc->bg_lock);
if (likely(fc->connected)) {
fc->num_background++; fc->num_background++;
if (fc->num_background == fc->max_background) if (fc->num_background == fc->max_background)
fc->blocked = 1; fc->blocked = 1;
...@@ -596,17 +646,17 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, ...@@ -596,17 +646,17 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
} }
list_add_tail(&req->list, &fc->bg_queue); list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc); flush_bg_queue(fc);
queued = true;
}
spin_unlock(&fc->bg_lock);
return queued;
} }
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{ {
BUG_ON(!req->end); WARN_ON(!req->end);
spin_lock(&fc->lock); if (!fuse_request_queue_background(fc, req)) {
if (fc->connected) {
fuse_request_send_background_locked(fc, req);
spin_unlock(&fc->lock);
} else {
spin_unlock(&fc->lock);
req->out.h.error = -ENOTCONN; req->out.h.error = -ENOTCONN;
req->end(fc, req); req->end(fc, req);
fuse_put_request(fc, req); fuse_put_request(fc, req);
...@@ -1084,12 +1134,11 @@ __releases(fiq->waitq.lock) ...@@ -1084,12 +1134,11 @@ __releases(fiq->waitq.lock)
int err; int err;
list_del_init(&req->intr_entry); list_del_init(&req->intr_entry);
req->intr_unique = fuse_get_unique(fiq);
memset(&ih, 0, sizeof(ih)); memset(&ih, 0, sizeof(ih));
memset(&arg, 0, sizeof(arg)); memset(&arg, 0, sizeof(arg));
ih.len = reqsize; ih.len = reqsize;
ih.opcode = FUSE_INTERRUPT; ih.opcode = FUSE_INTERRUPT;
ih.unique = req->intr_unique; ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
arg.unique = req->in.h.unique; arg.unique = req->in.h.unique;
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->waitq.lock);
...@@ -1238,6 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1238,6 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
struct fuse_req *req; struct fuse_req *req;
struct fuse_in *in; struct fuse_in *in;
unsigned reqsize; unsigned reqsize;
unsigned int hash;
restart: restart:
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->waitq.lock);
...@@ -1310,13 +1360,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1310,13 +1360,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
err = reqsize; err = reqsize;
goto out_end; goto out_end;
} }
list_move_tail(&req->list, &fpq->processing); hash = fuse_req_hash(req->in.h.unique);
spin_unlock(&fpq->lock); list_move_tail(&req->list, &fpq->processing[hash]);
__fuse_get_request(req);
set_bit(FR_SENT, &req->flags); set_bit(FR_SENT, &req->flags);
spin_unlock(&fpq->lock);
/* matches barrier in request_wait_answer() */ /* matches barrier in request_wait_answer() */
smp_mb__after_atomic(); smp_mb__after_atomic();
if (test_bit(FR_INTERRUPTED, &req->flags)) if (test_bit(FR_INTERRUPTED, &req->flags))
queue_interrupt(fiq, req); queue_interrupt(fiq, req);
fuse_put_request(fc, req);
return reqsize; return reqsize;
...@@ -1663,7 +1716,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1663,7 +1716,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num; unsigned int num;
unsigned int offset; unsigned int offset;
size_t total_len = 0; size_t total_len = 0;
int num_pages; unsigned int num_pages;
offset = outarg->offset & ~PAGE_MASK; offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode); file_size = i_size_read(inode);
...@@ -1675,7 +1728,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1675,7 +1728,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
num = file_size - outarg->offset; num = file_size - outarg->offset;
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); num_pages = min(num_pages, fc->max_pages);
req = fuse_get_req(fc, num_pages); req = fuse_get_req(fc, num_pages);
if (IS_ERR(req)) if (IS_ERR(req))
...@@ -1792,10 +1845,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, ...@@ -1792,10 +1845,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
/* Look up request on processing list by unique ID */ /* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
{ {
unsigned int hash = fuse_req_hash(unique);
struct fuse_req *req; struct fuse_req *req;
list_for_each_entry(req, &fpq->processing, list) { list_for_each_entry(req, &fpq->processing[hash], list) {
if (req->in.h.unique == unique || req->intr_unique == unique) if (req->in.h.unique == unique)
return req; return req;
} }
return NULL; return NULL;
...@@ -1869,22 +1923,26 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, ...@@ -1869,22 +1923,26 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
if (!fpq->connected) if (!fpq->connected)
goto err_unlock_pq; goto err_unlock_pq;
req = request_find(fpq, oh.unique); req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
if (!req) if (!req)
goto err_unlock_pq; goto err_unlock_pq;
/* Is it an interrupt reply? */ /* Is it an interrupt reply ID? */
if (req->intr_unique == oh.unique) { if (oh.unique & FUSE_INT_REQ_BIT) {
__fuse_get_request(req);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
err = -EINVAL; err = -EINVAL;
if (nbytes != sizeof(struct fuse_out_header)) if (nbytes != sizeof(struct fuse_out_header)) {
fuse_put_request(fc, req);
goto err_finish; goto err_finish;
}
if (oh.error == -ENOSYS) if (oh.error == -ENOSYS)
fc->no_interrupt = 1; fc->no_interrupt = 1;
else if (oh.error == -EAGAIN) else if (oh.error == -EAGAIN)
queue_interrupt(&fc->iq, req); queue_interrupt(&fc->iq, req);
fuse_put_request(fc, req);
fuse_copy_finish(cs); fuse_copy_finish(cs);
return nbytes; return nbytes;
...@@ -2102,9 +2160,13 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) ...@@ -2102,9 +2160,13 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
struct fuse_dev *fud; struct fuse_dev *fud;
struct fuse_req *req, *next; struct fuse_req *req, *next;
LIST_HEAD(to_end); LIST_HEAD(to_end);
unsigned int i;
/* Background queuing checks fc->connected under bg_lock */
spin_lock(&fc->bg_lock);
fc->connected = 0; fc->connected = 0;
fc->blocked = 0; spin_unlock(&fc->bg_lock);
fc->aborted = is_abort; fc->aborted = is_abort;
fuse_set_initialized(fc); fuse_set_initialized(fc);
list_for_each_entry(fud, &fc->devices, entry) { list_for_each_entry(fud, &fc->devices, entry) {
...@@ -2123,11 +2185,16 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) ...@@ -2123,11 +2185,16 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
} }
spin_unlock(&req->waitq.lock); spin_unlock(&req->waitq.lock);
} }
list_splice_tail_init(&fpq->processing, &to_end); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_tail_init(&fpq->processing[i],
&to_end);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
} }
spin_lock(&fc->bg_lock);
fc->blocked = 0;
fc->max_background = UINT_MAX; fc->max_background = UINT_MAX;
flush_bg_queue(fc); flush_bg_queue(fc);
spin_unlock(&fc->bg_lock);
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->waitq.lock);
fiq->connected = 0; fiq->connected = 0;
...@@ -2163,10 +2230,12 @@ int fuse_dev_release(struct inode *inode, struct file *file) ...@@ -2163,10 +2230,12 @@ int fuse_dev_release(struct inode *inode, struct file *file)
struct fuse_conn *fc = fud->fc; struct fuse_conn *fc = fud->fc;
struct fuse_pqueue *fpq = &fud->pq; struct fuse_pqueue *fpq = &fud->pq;
LIST_HEAD(to_end); LIST_HEAD(to_end);
unsigned int i;
spin_lock(&fpq->lock); spin_lock(&fpq->lock);
WARN_ON(!list_empty(&fpq->io)); WARN_ON(!list_empty(&fpq->io));
list_splice_init(&fpq->processing, &to_end); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
end_requests(fc, &to_end); end_requests(fc, &to_end);
......
...@@ -14,24 +14,9 @@ ...@@ -14,24 +14,9 @@
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/iversion.h>
#include <linux/posix_acl.h> #include <linux/posix_acl.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_advise_use_readdirplus(struct inode *dir) static void fuse_advise_use_readdirplus(struct inode *dir)
{ {
struct fuse_inode *fi = get_fuse_inode(dir); struct fuse_inode *fi = get_fuse_inode(dir);
...@@ -80,8 +65,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec) ...@@ -80,8 +65,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
* Set dentry and possibly attribute timeouts from the lookup/mk* * Set dentry and possibly attribute timeouts from the lookup/mk*
* replies * replies
*/ */
static void fuse_change_entry_timeout(struct dentry *entry, void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
struct fuse_entry_out *o)
{ {
fuse_dentry_settime(entry, fuse_dentry_settime(entry,
time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
...@@ -92,18 +76,29 @@ static u64 attr_timeout(struct fuse_attr_out *o) ...@@ -92,18 +76,29 @@ static u64 attr_timeout(struct fuse_attr_out *o)
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
} }
static u64 entry_attr_timeout(struct fuse_entry_out *o) u64 entry_attr_timeout(struct fuse_entry_out *o)
{ {
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
} }
static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
{
set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
}
/* /*
* Mark the attributes as stale, so that at the next call to * Mark the attributes as stale, so that at the next call to
* ->getattr() they will be fetched from userspace * ->getattr() they will be fetched from userspace
*/ */
void fuse_invalidate_attr(struct inode *inode) void fuse_invalidate_attr(struct inode *inode)
{ {
get_fuse_inode(inode)->i_time = 0; fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
}
static void fuse_dir_changed(struct inode *dir)
{
fuse_invalidate_attr(dir);
inode_maybe_inc_iversion(dir, false);
} }
/** /**
...@@ -113,7 +108,7 @@ void fuse_invalidate_attr(struct inode *inode) ...@@ -113,7 +108,7 @@ void fuse_invalidate_attr(struct inode *inode)
void fuse_invalidate_atime(struct inode *inode) void fuse_invalidate_atime(struct inode *inode)
{ {
if (!IS_RDONLY(inode)) if (!IS_RDONLY(inode))
fuse_invalidate_attr(inode); fuse_invalidate_attr_mask(inode, STATX_ATIME);
} }
/* /*
...@@ -262,11 +257,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) ...@@ -262,11 +257,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out; goto out;
} }
static int invalid_nodeid(u64 nodeid)
{
return !nodeid || nodeid == FUSE_ROOT_ID;
}
static int fuse_dentry_init(struct dentry *dentry) static int fuse_dentry_init(struct dentry *dentry)
{ {
dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL); dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
...@@ -469,7 +459,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -469,7 +459,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
kfree(forget); kfree(forget);
d_instantiate(entry, inode); d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry); fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir); fuse_dir_changed(dir);
err = finish_open(file, entry, generic_file_open); err = finish_open(file, entry, generic_file_open);
if (err) { if (err) {
fuse_sync_release(ff, flags); fuse_sync_release(ff, flags);
...@@ -583,7 +573,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, ...@@ -583,7 +573,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
} else { } else {
fuse_change_entry_timeout(entry, &outarg); fuse_change_entry_timeout(entry, &outarg);
} }
fuse_invalidate_attr(dir); fuse_dir_changed(dir);
return 0; return 0;
out_put_forget_req: out_put_forget_req:
...@@ -693,7 +683,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) ...@@ -693,7 +683,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
drop_nlink(inode); drop_nlink(inode);
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir); fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry); fuse_invalidate_entry_cache(entry);
fuse_update_ctime(inode); fuse_update_ctime(inode);
} else if (err == -EINTR) } else if (err == -EINTR)
...@@ -715,7 +705,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) ...@@ -715,7 +705,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fc, &args);
if (!err) { if (!err) {
clear_nlink(d_inode(entry)); clear_nlink(d_inode(entry));
fuse_invalidate_attr(dir); fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry); fuse_invalidate_entry_cache(entry);
} else if (err == -EINTR) } else if (err == -EINTR)
fuse_invalidate_entry(entry); fuse_invalidate_entry(entry);
...@@ -754,9 +744,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, ...@@ -754,9 +744,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
fuse_update_ctime(d_inode(newent)); fuse_update_ctime(d_inode(newent));
} }
fuse_invalidate_attr(olddir); fuse_dir_changed(olddir);
if (olddir != newdir) if (olddir != newdir)
fuse_invalidate_attr(newdir); fuse_dir_changed(newdir);
/* newent will end up negative */ /* newent will end up negative */
if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) { if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
...@@ -932,7 +922,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, ...@@ -932,7 +922,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
} }
static int fuse_update_get_attr(struct inode *inode, struct file *file, static int fuse_update_get_attr(struct inode *inode, struct file *file,
struct kstat *stat, unsigned int flags) struct kstat *stat, u32 request_mask,
unsigned int flags)
{ {
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0; int err = 0;
...@@ -942,6 +933,8 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, ...@@ -942,6 +933,8 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
sync = true; sync = true;
else if (flags & AT_STATX_DONT_SYNC) else if (flags & AT_STATX_DONT_SYNC)
sync = false; sync = false;
else if (request_mask & READ_ONCE(fi->inval_mask))
sync = true;
else else
sync = time_before64(fi->i_time, get_jiffies_64()); sync = time_before64(fi->i_time, get_jiffies_64());
...@@ -959,7 +952,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, ...@@ -959,7 +952,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
int fuse_update_attributes(struct inode *inode, struct file *file) int fuse_update_attributes(struct inode *inode, struct file *file)
{ {
return fuse_update_get_attr(inode, file, NULL, 0); /* Do *not* need to get atime for internal purposes */
return fuse_update_get_attr(inode, file, NULL,
STATX_BASIC_STATS & ~STATX_ATIME, 0);
} }
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
...@@ -989,7 +984,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, ...@@ -989,7 +984,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!entry) if (!entry)
goto unlock; goto unlock;
fuse_invalidate_attr(parent); fuse_dir_changed(parent);
fuse_invalidate_entry(entry); fuse_invalidate_entry(entry);
if (child_nodeid != 0 && d_really_is_positive(entry)) { if (child_nodeid != 0 && d_really_is_positive(entry)) {
...@@ -1165,271 +1160,78 @@ static int fuse_permission(struct inode *inode, int mask) ...@@ -1165,271 +1160,78 @@ static int fuse_permission(struct inode *inode, int mask)
return err; return err;
} }
static int parse_dirfile(char *buf, size_t nbytes, struct file *file, static int fuse_readlink_page(struct inode *inode, struct page *page)
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{ {
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
static int fuse_readdir(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req; struct fuse_req *req;
u64 attr_version = 0; int err;
bool locked;
if (is_bad_inode(inode))
return -EIO;
req = fuse_get_req(fc, 1); req = fuse_get_req(fc, 1);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
page = alloc_page(GFP_KERNEL); req->out.page_zeroing = 1;
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1; req->out.argpages = 1;
req->num_pages = 1; req->num_pages = 1;
req->pages[0] = page; req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE; req->page_descs[0].length = PAGE_SIZE - 1;
if (plus) { req->in.h.opcode = FUSE_READLINK;
attr_version = fuse_get_attr_version(fc); req->in.h.nodeid = get_node_id(inode);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, req->out.argvar = 1;
FUSE_READDIRPLUS); req->out.numargs = 1;
} else { req->out.args[0].size = PAGE_SIZE - 1;
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req); fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error; err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) { if (!err) {
if (plus) { char *link = page_address(page);
err = parse_dirplusfile(page_address(page), nbytes, size_t len = req->out.args[0].size;
file, ctx,
attr_version); BUG_ON(len >= PAGE_SIZE);
} else { link[len] = '\0';
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
} }
__free_page(page); fuse_put_request(fc, req);
fuse_invalidate_atime(inode); fuse_invalidate_atime(inode);
return err; return err;
} }
static const char *fuse_get_link(struct dentry *dentry, static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct inode *inode, struct delayed_call *callback)
struct delayed_call *done)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args); struct page *page;
char *link; int err;
ssize_t ret;
err = -EIO;
if (is_bad_inode(inode))
goto out_err;
if (fc->cache_symlinks)
return page_get_link(dentry, inode, callback);
err = -ECHILD;
if (!dentry) if (!dentry)
return ERR_PTR(-ECHILD); goto out_err;
link = kmalloc(PAGE_SIZE, GFP_KERNEL); page = alloc_page(GFP_KERNEL);
if (!link) err = -ENOMEM;
return ERR_PTR(-ENOMEM); if (!page)
goto out_err;
args.in.h.opcode = FUSE_READLINK; err = fuse_readlink_page(inode, page);
args.in.h.nodeid = get_node_id(inode); if (err) {
args.out.argvar = 1; __free_page(page);
args.out.numargs = 1; goto out_err;
args.out.args[0].size = PAGE_SIZE - 1;
args.out.args[0].value = link;
ret = fuse_simple_request(fc, &args);
if (ret < 0) {
kfree(link);
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
set_delayed_call(done, kfree_link, link);
} }
fuse_invalidate_atime(inode);
return link; set_delayed_call(callback, page_put_link, page);
return page_address(page);
out_err:
return ERR_PTR(err);
} }
static int fuse_dir_open(struct inode *inode, struct file *file) static int fuse_dir_open(struct inode *inode, struct file *file)
...@@ -1662,8 +1464,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1662,8 +1464,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
file = NULL; file = NULL;
} }
if (attr->ia_valid & ATTR_SIZE) if (attr->ia_valid & ATTR_SIZE) {
if (WARN_ON(!S_ISREG(inode->i_mode)))
return -EIO;
is_truncate = true; is_truncate = true;
}
if (is_truncate) { if (is_truncate) {
fuse_set_nowrite(inode); fuse_set_nowrite(inode);
...@@ -1811,7 +1616,7 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, ...@@ -1811,7 +1616,7 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
if (!fuse_allow_current_process(fc)) if (!fuse_allow_current_process(fc))
return -EACCES; return -EACCES;
return fuse_update_get_attr(inode, NULL, stat, flags); return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
} }
static const struct inode_operations fuse_dir_inode_operations = { static const struct inode_operations fuse_dir_inode_operations = {
...@@ -1867,11 +1672,37 @@ void fuse_init_common(struct inode *inode) ...@@ -1867,11 +1672,37 @@ void fuse_init_common(struct inode *inode)
void fuse_init_dir(struct inode *inode) void fuse_init_dir(struct inode *inode)
{ {
struct fuse_inode *fi = get_fuse_inode(inode);
inode->i_op = &fuse_dir_inode_operations; inode->i_op = &fuse_dir_inode_operations;
inode->i_fop = &fuse_dir_operations; inode->i_fop = &fuse_dir_operations;
spin_lock_init(&fi->rdc.lock);
fi->rdc.cached = false;
fi->rdc.size = 0;
fi->rdc.pos = 0;
fi->rdc.version = 0;
} }
static int fuse_symlink_readpage(struct file *null, struct page *page)
{
int err = fuse_readlink_page(page->mapping->host, page);
if (!err)
SetPageUptodate(page);
unlock_page(page);
return err;
}
static const struct address_space_operations fuse_symlink_aops = {
.readpage = fuse_symlink_readpage,
};
void fuse_init_symlink(struct inode *inode) void fuse_init_symlink(struct inode *inode)
{ {
inode->i_op = &fuse_symlink_inode_operations; inode->i_op = &fuse_symlink_inode_operations;
inode->i_data.a_ops = &fuse_symlink_aops;
inode_nohighmem(inode);
} }
...@@ -59,6 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) ...@@ -59,6 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
} }
INIT_LIST_HEAD(&ff->write_entry); INIT_LIST_HEAD(&ff->write_entry);
mutex_init(&ff->readdir.lock);
refcount_set(&ff->count, 1); refcount_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node); RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait); init_waitqueue_head(&ff->poll_wait);
...@@ -73,6 +74,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) ...@@ -73,6 +74,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
void fuse_file_free(struct fuse_file *ff) void fuse_file_free(struct fuse_file *ff)
{ {
fuse_request_free(ff->reserved_req); fuse_request_free(ff->reserved_req);
mutex_destroy(&ff->readdir.lock);
kfree(ff); kfree(ff);
} }
...@@ -848,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page) ...@@ -848,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page)
fuse_wait_on_page_writeback(inode, page->index); fuse_wait_on_page_writeback(inode, page->index);
if (req->num_pages && if (req->num_pages &&
(req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_read || (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) { req->pages[req->num_pages - 1]->index + 1 != page->index)) {
int nr_alloc = min_t(unsigned, data->nr_pages, unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
FUSE_MAX_PAGES_PER_REQ); fc->max_pages);
fuse_send_readpages(req, data->file); fuse_send_readpages(req, data->file);
if (fc->async_read) if (fc->async_read)
req = fuse_get_req_for_background(fc, nr_alloc); req = fuse_get_req_for_background(fc, nr_alloc);
...@@ -887,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, ...@@ -887,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data; struct fuse_fill_data data;
int err; int err;
int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ); unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
err = -EIO; err = -EIO;
if (is_bad_inode(inode)) if (is_bad_inode(inode))
...@@ -1102,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, ...@@ -1102,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
return count > 0 ? count : err; return count > 0 ? count : err;
} }
static inline unsigned fuse_wr_pages(loff_t pos, size_t len) static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
unsigned int max_pages)
{ {
return min_t(unsigned, return min_t(unsigned int,
((pos + len - 1) >> PAGE_SHIFT) - ((pos + len - 1) >> PAGE_SHIFT) -
(pos >> PAGE_SHIFT) + 1, (pos >> PAGE_SHIFT) + 1,
FUSE_MAX_PAGES_PER_REQ); max_pages);
} }
static ssize_t fuse_perform_write(struct kiocb *iocb, static ssize_t fuse_perform_write(struct kiocb *iocb,
...@@ -1129,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, ...@@ -1129,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
do { do {
struct fuse_req *req; struct fuse_req *req;
ssize_t count; ssize_t count;
unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii)); unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
req = fuse_get_req(fc, nr_pages); req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) { if (IS_ERR(req)) {
...@@ -1319,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, ...@@ -1319,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }
static inline int fuse_iter_npages(const struct iov_iter *ii_p)
{
return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
}
ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags) loff_t *ppos, int flags)
{ {
...@@ -1343,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ...@@ -1343,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int err = 0; int err = 0;
if (io->async) if (io->async)
req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
fc->max_pages));
else else
req = fuse_get_req(fc, fuse_iter_npages(iter)); req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
...@@ -1390,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ...@@ -1390,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
fuse_put_request(fc, req); fuse_put_request(fc, req);
if (io->async) if (io->async)
req = fuse_get_req_for_background(fc, req = fuse_get_req_for_background(fc,
fuse_iter_npages(iter)); iov_iter_npages(iter, fc->max_pages));
else else
req = fuse_get_req(fc, fuse_iter_npages(iter)); req = fuse_get_req(fc, iov_iter_npages(iter,
fc->max_pages));
if (IS_ERR(req)) if (IS_ERR(req))
break; break;
} }
...@@ -1418,7 +1419,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, ...@@ -1418,7 +1419,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
res = fuse_direct_io(io, iter, ppos, 0); res = fuse_direct_io(io, iter, ppos, 0);
fuse_invalidate_attr(inode); fuse_invalidate_atime(inode);
return res; return res;
} }
...@@ -1487,6 +1488,7 @@ __acquires(fc->lock) ...@@ -1487,6 +1488,7 @@ __acquires(fc->lock)
struct fuse_inode *fi = get_fuse_inode(req->inode); struct fuse_inode *fi = get_fuse_inode(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_in *inarg = &req->misc.write.in;
__u64 data_size = req->num_pages * PAGE_SIZE; __u64 data_size = req->num_pages * PAGE_SIZE;
bool queued;
if (!fc->connected) if (!fc->connected)
goto out_free; goto out_free;
...@@ -1502,7 +1504,8 @@ __acquires(fc->lock) ...@@ -1502,7 +1504,8 @@ __acquires(fc->lock)
req->in.args[1].size = inarg->size; req->in.args[1].size = inarg->size;
fi->writectr++; fi->writectr++;
fuse_request_send_background_locked(fc, req); queued = fuse_request_queue_background(fc, req);
WARN_ON(!queued);
return; return;
out_free: out_free:
...@@ -1819,12 +1822,18 @@ static int fuse_writepages_fill(struct page *page, ...@@ -1819,12 +1822,18 @@ static int fuse_writepages_fill(struct page *page,
is_writeback = fuse_page_is_writeback(inode, page->index); is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages && if (req && req->num_pages &&
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ || (is_writeback || req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_write || (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data); fuse_writepages_send(data);
data->req = NULL; data->req = NULL;
} else if (req && req->num_pages == req->max_pages) {
if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) {
fuse_writepages_send(data);
req = data->req = NULL;
}
} }
err = -ENOMEM; err = -ENOMEM;
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!tmp_page) if (!tmp_page)
...@@ -1847,7 +1856,7 @@ static int fuse_writepages_fill(struct page *page, ...@@ -1847,7 +1856,7 @@ static int fuse_writepages_fill(struct page *page,
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM; err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ); req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
if (!req) { if (!req) {
__free_page(tmp_page); __free_page(tmp_page);
goto out_unlock; goto out_unlock;
...@@ -1904,6 +1913,7 @@ static int fuse_writepages(struct address_space *mapping, ...@@ -1904,6 +1913,7 @@ static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_wb_data data; struct fuse_fill_wb_data data;
int err; int err;
...@@ -1916,7 +1926,7 @@ static int fuse_writepages(struct address_space *mapping, ...@@ -1916,7 +1926,7 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL; data.ff = NULL;
err = -ENOMEM; err = -ENOMEM;
data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, data.orig_pages = kcalloc(fc->max_pages,
sizeof(struct page *), sizeof(struct page *),
GFP_NOFS); GFP_NOFS);
if (!data.orig_pages) if (!data.orig_pages)
...@@ -2387,10 +2397,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, ...@@ -2387,10 +2397,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
} }
/* Make sure iov_length() won't overflow */ /* Make sure iov_length() won't overflow */
static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
size_t count)
{ {
size_t n; size_t n;
u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; u32 max = fc->max_pages << PAGE_SHIFT;
for (n = 0; n < count; n++, iov++) { for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max) if (iov->iov_len > (size_t) max)
...@@ -2514,7 +2525,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2514,7 +2525,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM; err = -ENOMEM;
pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL); pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page) if (!pages || !iov_page)
goto out; goto out;
...@@ -2553,7 +2564,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2553,7 +2564,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */ /* make sure there are enough buffer pages and init request with them */
err = -ENOMEM; err = -ENOMEM;
if (max_pages > FUSE_MAX_PAGES_PER_REQ) if (max_pages > fc->max_pages)
goto out; goto out;
while (num_pages < max_pages) { while (num_pages < max_pages) {
pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
...@@ -2640,11 +2651,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2640,11 +2651,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page; in_iov = iov_page;
out_iov = in_iov + in_iovs; out_iov = in_iov + in_iovs;
err = fuse_verify_ioctl_iov(in_iov, in_iovs); err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
if (err) if (err)
goto out; goto out;
err = fuse_verify_ioctl_iov(out_iov, out_iovs); err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
if (err) if (err)
goto out; goto out;
...@@ -2835,9 +2846,9 @@ static void fuse_do_truncate(struct file *file) ...@@ -2835,9 +2846,9 @@ static void fuse_do_truncate(struct file *file)
fuse_do_setattr(file_dentry(file), &attr, file); fuse_do_setattr(file_dentry(file), &attr, file);
} }
static inline loff_t fuse_round_up(loff_t off) static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
{ {
return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); return round_up(off, fc->max_pages << PAGE_SHIFT);
} }
static ssize_t static ssize_t
...@@ -2866,7 +2877,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -2866,7 +2877,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
if (offset >= i_size) if (offset >= i_size)
return 0; return 0;
iov_iter_truncate(iter, fuse_round_up(i_size - offset)); iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
count = iov_iter_count(iter); count = iov_iter_count(iter);
} }
...@@ -3011,6 +3022,82 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3011,6 +3022,82 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err; return err;
} }
static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
struct fuse_file *ff_in = file_in->private_data;
struct fuse_file *ff_out = file_out->private_data;
struct inode *inode_out = file_inode(file_out);
struct fuse_inode *fi_out = get_fuse_inode(inode_out);
struct fuse_conn *fc = ff_in->fc;
FUSE_ARGS(args);
struct fuse_copy_file_range_in inarg = {
.fh_in = ff_in->fh,
.off_in = pos_in,
.nodeid_out = ff_out->nodeid,
.fh_out = ff_out->fh,
.off_out = pos_out,
.len = len,
.flags = flags
};
struct fuse_write_out outarg;
ssize_t err;
/* mark unstable when write-back is not used, and file_out gets
* extended */
bool is_unstable = (!fc->writeback_cache) &&
((pos_out + len) > inode_out->i_size);
if (fc->no_copy_file_range)
return -EOPNOTSUPP;
inode_lock(inode_out);
if (fc->writeback_cache) {
err = filemap_write_and_wait_range(inode_out->i_mapping,
pos_out, pos_out + len);
if (err)
goto out;
fuse_sync_writes(inode_out);
}
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
args.in.h.opcode = FUSE_COPY_FILE_RANGE;
args.in.h.nodeid = ff_in->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_copy_file_range = 1;
err = -EOPNOTSUPP;
}
if (err)
goto out;
if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
}
fuse_invalidate_attr(inode_out);
err = outarg.size;
out:
if (is_unstable)
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
inode_unlock(inode_out);
return err;
}
static const struct file_operations fuse_file_operations = { static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek, .llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter, .read_iter = fuse_file_read_iter,
...@@ -3027,6 +3114,7 @@ static const struct file_operations fuse_file_operations = { ...@@ -3027,6 +3114,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl, .compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll, .poll = fuse_file_poll,
.fallocate = fuse_file_fallocate, .fallocate = fuse_file_fallocate,
.copy_file_range = fuse_copy_file_range,
}; };
static const struct file_operations fuse_direct_io_file_operations = { static const struct file_operations fuse_direct_io_file_operations = {
...@@ -3062,6 +3150,14 @@ static const struct address_space_operations fuse_file_aops = { ...@@ -3062,6 +3150,14 @@ static const struct address_space_operations fuse_file_aops = {
void fuse_init_file_inode(struct inode *inode) void fuse_init_file_inode(struct inode *inode)
{ {
struct fuse_inode *fi = get_fuse_inode(inode);
inode->i_fop = &fuse_file_operations; inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops; inode->i_data.a_ops = &fuse_file_aops;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
INIT_LIST_HEAD(&fi->writepages);
} }
...@@ -28,8 +28,11 @@ ...@@ -28,8 +28,11 @@
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
/** Max number of pages that can be used in a single read request */ /** Default max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/** Maximum of max_pages received in init_out */
#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */ /** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN #define FUSE_NOWRITE INT_MIN
...@@ -77,6 +80,9 @@ struct fuse_inode { ...@@ -77,6 +80,9 @@ struct fuse_inode {
/** Time in jiffies until the file attributes are valid */ /** Time in jiffies until the file attributes are valid */
u64 i_time; u64 i_time;
/* Which attributes are invalid */
u32 inval_mask;
/** The sticky bit in inode->i_mode may have been removed, so /** The sticky bit in inode->i_mode may have been removed, so
preserve the original mode */ preserve the original mode */
umode_t orig_i_mode; umode_t orig_i_mode;
...@@ -87,21 +93,51 @@ struct fuse_inode { ...@@ -87,21 +93,51 @@ struct fuse_inode {
/** Version of last attribute change */ /** Version of last attribute change */
u64 attr_version; u64 attr_version;
/** Files usable in writepage. Protected by fc->lock */ union {
/* Write related fields (regular file only) */
struct {
/* Files usable in writepage. Protected by fc->lock */
struct list_head write_files; struct list_head write_files;
/** Writepages pending on truncate or fsync */ /* Writepages pending on truncate or fsync */
struct list_head queued_writes; struct list_head queued_writes;
/** Number of sent writes, a negative bias (FUSE_NOWRITE) /* Number of sent writes, a negative bias
* means more writes are blocked */ * (FUSE_NOWRITE) means more writes are blocked */
int writectr; int writectr;
/** Waitq for writepage completion */ /* Waitq for writepage completion */
wait_queue_head_t page_waitq; wait_queue_head_t page_waitq;
/** List of writepage requestst (pending or sent) */ /* List of writepage requestst (pending or sent) */
struct list_head writepages; struct list_head writepages;
};
/* readdir cache (directory only) */
struct {
/* true if fully cached */
bool cached;
/* size of cache */
loff_t size;
/* position at end of cache (position of next entry) */
loff_t pos;
/* version of the cache */
u64 version;
/* modification time of directory when cache was
* started */
struct timespec64 mtime;
/* iversion of directory when cache was started */
u64 iversion;
/* protects above fields */
spinlock_t lock;
} rdc;
};
/** Miscellaneous bits describing inode state */ /** Miscellaneous bits describing inode state */
unsigned long state; unsigned long state;
...@@ -148,6 +184,25 @@ struct fuse_file { ...@@ -148,6 +184,25 @@ struct fuse_file {
/** Entry on inode's write_files list */ /** Entry on inode's write_files list */
struct list_head write_entry; struct list_head write_entry;
/* Readdir related */
struct {
/*
* Protects below fields against (crazy) parallel readdir on
* same open file. Uncontended in the normal case.
*/
struct mutex lock;
/* Dir stream position */
loff_t pos;
/* Offset in cache */
loff_t cache_off;
/* Version of cache we are reading */
u64 version;
} readdir;
/** RB node to be linked on fuse_conn->polled_files */ /** RB node to be linked on fuse_conn->polled_files */
struct rb_node polled_node; struct rb_node polled_node;
...@@ -311,9 +366,6 @@ struct fuse_req { ...@@ -311,9 +366,6 @@ struct fuse_req {
/** refcount */ /** refcount */
refcount_t count; refcount_t count;
/** Unique ID for the interrupt request */
u64 intr_unique;
/* Request flags, updated with test/set/clear_bit() */ /* Request flags, updated with test/set/clear_bit() */
unsigned long flags; unsigned long flags;
...@@ -411,6 +463,9 @@ struct fuse_iqueue { ...@@ -411,6 +463,9 @@ struct fuse_iqueue {
struct fasync_struct *fasync; struct fasync_struct *fasync;
}; };
#define FUSE_PQ_HASH_BITS 8
#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS)
struct fuse_pqueue { struct fuse_pqueue {
/** Connection established */ /** Connection established */
unsigned connected; unsigned connected;
...@@ -418,8 +473,8 @@ struct fuse_pqueue { ...@@ -418,8 +473,8 @@ struct fuse_pqueue {
/** Lock protecting accessess to members of this structure */ /** Lock protecting accessess to members of this structure */
spinlock_t lock; spinlock_t lock;
/** The list of requests being processed */ /** Hash table of requests being processed */
struct list_head processing; struct list_head *processing;
/** The list of requests under I/O */ /** The list of requests under I/O */
struct list_head io; struct list_head io;
...@@ -476,6 +531,9 @@ struct fuse_conn { ...@@ -476,6 +531,9 @@ struct fuse_conn {
/** Maximum write size */ /** Maximum write size */
unsigned max_write; unsigned max_write;
/** Maxmum number of pages that can be used in a single request */
unsigned int max_pages;
/** Input queue */ /** Input queue */
struct fuse_iqueue iq; struct fuse_iqueue iq;
...@@ -500,6 +558,10 @@ struct fuse_conn { ...@@ -500,6 +558,10 @@ struct fuse_conn {
/** The list of background requests set aside for later queuing */ /** The list of background requests set aside for later queuing */
struct list_head bg_queue; struct list_head bg_queue;
/** Protects: max_background, congestion_threshold, num_background,
* active_background, bg_queue, blocked */
spinlock_t bg_lock;
/** Flag indicating that INIT reply has been received. Allocating /** Flag indicating that INIT reply has been received. Allocating
* any fuse request will be suspended until the flag is set */ * any fuse request will be suspended until the flag is set */
int initialized; int initialized;
...@@ -551,6 +613,9 @@ struct fuse_conn { ...@@ -551,6 +613,9 @@ struct fuse_conn {
/** handle fs handles killing suid/sgid/cap on write/chown/trunc */ /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
unsigned handle_killpriv:1; unsigned handle_killpriv:1;
/** cache READLINK responses in page cache */
unsigned cache_symlinks:1;
/* /*
* The following bitfields are only for optimization purposes * The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction * and hence races in setting them will not cause malfunction
...@@ -637,6 +702,9 @@ struct fuse_conn { ...@@ -637,6 +702,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */ /** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1; unsigned allow_other:1;
/** Does the filesystem support copy_file_range? */
unsigned no_copy_file_range:1;
/** The number of requests waiting for completion */ /** The number of requests waiting for completion */
atomic_t num_waiting; atomic_t num_waiting;
...@@ -697,6 +765,11 @@ static inline u64 get_node_id(struct inode *inode) ...@@ -697,6 +765,11 @@ static inline u64 get_node_id(struct inode *inode)
return get_fuse_inode(inode)->nodeid; return get_fuse_inode(inode)->nodeid;
} }
static inline int invalid_nodeid(u64 nodeid)
{
return !nodeid || nodeid == FUSE_ROOT_ID;
}
/** Device operations */ /** Device operations */
extern const struct file_operations fuse_dev_operations; extern const struct file_operations fuse_dev_operations;
...@@ -812,6 +885,10 @@ struct fuse_req *fuse_request_alloc(unsigned npages); ...@@ -812,6 +885,10 @@ struct fuse_req *fuse_request_alloc(unsigned npages);
struct fuse_req *fuse_request_alloc_nofs(unsigned npages); struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags);
/** /**
* Free a request * Free a request
*/ */
...@@ -856,9 +933,7 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); ...@@ -856,9 +933,7 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
* Send a request in the background * Send a request in the background
*/ */
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send_background_locked(struct fuse_conn *fc,
struct fuse_req *req);
/* Abort all requests */ /* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc, bool is_abort); void fuse_abort_conn(struct fuse_conn *fc, bool is_abort);
...@@ -873,6 +948,9 @@ void fuse_invalidate_entry_cache(struct dentry *entry); ...@@ -873,6 +948,9 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
void fuse_invalidate_atime(struct inode *inode); void fuse_invalidate_atime(struct inode *inode);
u64 entry_attr_timeout(struct fuse_entry_out *o);
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
/** /**
* Acquire reference to fuse_conn * Acquire reference to fuse_conn
*/ */
...@@ -992,4 +1070,8 @@ struct posix_acl; ...@@ -992,4 +1070,8 @@ struct posix_acl;
struct posix_acl *fuse_get_acl(struct inode *inode, int type); struct posix_acl *fuse_get_acl(struct inode *inode, int type);
int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type); int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
/* readdir.c */
int fuse_readdir(struct file *file, struct dir_context *ctx);
#endif /* _FS_FUSE_I_H */ #endif /* _FS_FUSE_I_H */
...@@ -90,16 +90,12 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) ...@@ -90,16 +90,12 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi = get_fuse_inode(inode); fi = get_fuse_inode(inode);
fi->i_time = 0; fi->i_time = 0;
fi->inval_mask = 0;
fi->nodeid = 0; fi->nodeid = 0;
fi->nlookup = 0; fi->nlookup = 0;
fi->attr_version = 0; fi->attr_version = 0;
fi->writectr = 0;
fi->orig_ino = 0; fi->orig_ino = 0;
fi->state = 0; fi->state = 0;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
mutex_init(&fi->mutex); mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget(); fi->forget = fuse_alloc_forget();
if (!fi->forget) { if (!fi->forget) {
...@@ -119,8 +115,10 @@ static void fuse_i_callback(struct rcu_head *head) ...@@ -119,8 +115,10 @@ static void fuse_i_callback(struct rcu_head *head)
static void fuse_destroy_inode(struct inode *inode) static void fuse_destroy_inode(struct inode *inode)
{ {
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files)); if (S_ISREG(inode->i_mode)) {
BUG_ON(!list_empty(&fi->queued_writes)); WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
mutex_destroy(&fi->mutex); mutex_destroy(&fi->mutex);
kfree(fi->forget); kfree(fi->forget);
call_rcu(&inode->i_rcu, fuse_i_callback); call_rcu(&inode->i_rcu, fuse_i_callback);
...@@ -167,6 +165,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, ...@@ -167,6 +165,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->attr_version = ++fc->attr_version; fi->attr_version = ++fc->attr_version;
fi->i_time = attr_valid; fi->i_time = attr_valid;
WRITE_ONCE(fi->inval_mask, 0);
inode->i_ino = fuse_squash_ino(attr->ino); inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
...@@ -594,9 +593,11 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq) ...@@ -594,9 +593,11 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq)
static void fuse_pqueue_init(struct fuse_pqueue *fpq) static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{ {
memset(fpq, 0, sizeof(struct fuse_pqueue)); unsigned int i;
spin_lock_init(&fpq->lock); spin_lock_init(&fpq->lock);
INIT_LIST_HEAD(&fpq->processing); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
INIT_LIST_HEAD(&fpq->processing[i]);
INIT_LIST_HEAD(&fpq->io); INIT_LIST_HEAD(&fpq->io);
fpq->connected = 1; fpq->connected = 1;
} }
...@@ -605,6 +606,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) ...@@ -605,6 +606,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
{ {
memset(fc, 0, sizeof(*fc)); memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock); spin_lock_init(&fc->lock);
spin_lock_init(&fc->bg_lock);
init_rwsem(&fc->killsb); init_rwsem(&fc->killsb);
refcount_set(&fc->count, 1); refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1); atomic_set(&fc->dev_count, 1);
...@@ -852,6 +854,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) ...@@ -852,6 +854,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_bgreq);
sanitize_global_limit(&max_user_congthresh); sanitize_global_limit(&max_user_congthresh);
spin_lock(&fc->bg_lock);
if (arg->max_background) { if (arg->max_background) {
fc->max_background = arg->max_background; fc->max_background = arg->max_background;
...@@ -865,6 +868,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) ...@@ -865,6 +868,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
fc->congestion_threshold > max_user_congthresh) fc->congestion_threshold > max_user_congthresh)
fc->congestion_threshold = max_user_congthresh; fc->congestion_threshold = max_user_congthresh;
} }
spin_unlock(&fc->bg_lock);
} }
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
...@@ -924,8 +928,15 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) ...@@ -924,8 +928,15 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->posix_acl = 1; fc->posix_acl = 1;
fc->sb->s_xattr = fuse_acl_xattr_handlers; fc->sb->s_xattr = fuse_acl_xattr_handlers;
} }
if (arg->flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1;
if (arg->flags & FUSE_ABORT_ERROR) if (arg->flags & FUSE_ABORT_ERROR)
fc->abort_err = 1; fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1));
}
} else { } else {
ra_pages = fc->max_read / PAGE_SIZE; ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1; fc->no_lock = 1;
...@@ -957,7 +968,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) ...@@ -957,7 +968,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR; FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS;
req->in.h.opcode = FUSE_INIT; req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1; req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg); req->in.args[0].size = sizeof(*arg);
...@@ -1022,16 +1033,25 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) ...@@ -1022,16 +1033,25 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
{ {
struct fuse_dev *fud; struct fuse_dev *fud;
struct list_head *pq;
fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
if (fud) { if (!fud)
return NULL;
pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
if (!pq) {
kfree(fud);
return NULL;
}
fud->pq.processing = pq;
fud->fc = fuse_conn_get(fc); fud->fc = fuse_conn_get(fc);
fuse_pqueue_init(&fud->pq); fuse_pqueue_init(&fud->pq);
spin_lock(&fc->lock); spin_lock(&fc->lock);
list_add_tail(&fud->entry, &fc->devices); list_add_tail(&fud->entry, &fc->devices);
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
}
return fud; return fud;
} }
...@@ -1141,6 +1161,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1141,6 +1161,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->user_id = d.user_id; fc->user_id = d.user_id;
fc->group_id = d.group_id; fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read); fc->max_read = max_t(unsigned, 4096, d.max_read);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
/* Used by get_root_inode() */ /* Used by get_root_inode() */
sb->s_fs_info = fc; sb->s_fs_info = fc;
......
/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
*/
#include "fuse_i.h"
#include <linux/iversion.h>
#include <linux/posix_acl.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_add_dirent_to_cache(struct file *file,
struct fuse_dirent *dirent, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
size_t reclen = FUSE_DIRENT_SIZE(dirent);
pgoff_t index;
struct page *page;
loff_t size;
u64 version;
unsigned int offset;
void *addr;
spin_lock(&fi->rdc.lock);
/*
* Is cache already completed? Or this entry does not go at the end of
* cache?
*/
if (fi->rdc.cached || pos != fi->rdc.pos) {
spin_unlock(&fi->rdc.lock);
return;
}
version = fi->rdc.version;
size = fi->rdc.size;
offset = size & ~PAGE_MASK;
index = size >> PAGE_SHIFT;
/* Dirent doesn't fit in current page? Jump to next page. */
if (offset + reclen > PAGE_SIZE) {
index++;
offset = 0;
}
spin_unlock(&fi->rdc.lock);
if (offset) {
page = find_lock_page(file->f_mapping, index);
} else {
page = find_or_create_page(file->f_mapping, index,
mapping_gfp_mask(file->f_mapping));
}
if (!page)
return;
spin_lock(&fi->rdc.lock);
/* Raced with another readdir */
if (fi->rdc.version != version || fi->rdc.size != size ||
WARN_ON(fi->rdc.pos != pos))
goto unlock;
addr = kmap_atomic(page);
if (!offset)
clear_page(addr);
memcpy(addr + offset, dirent, reclen);
kunmap_atomic(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
fi->rdc.pos = dirent->off;
unlock:
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
}
static void fuse_readdir_cache_end(struct file *file, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
loff_t end;
spin_lock(&fi->rdc.lock);
/* does cache end position match current position? */
if (fi->rdc.pos != pos) {
spin_unlock(&fi->rdc.lock);
return;
}
fi->rdc.cached = true;
end = ALIGN(fi->rdc.size, PAGE_SIZE);
spin_unlock(&fi->rdc.lock);
/* truncate unused tail of cache */
truncate_inode_pages(file->f_mapping, end);
}
static bool fuse_emit(struct file *file, struct dir_context *ctx,
struct fuse_dirent *dirent)
{
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!fuse_emit(file, ctx, dirent))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !fuse_emit(file, ctx, dirent);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
bool locked;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (!nbytes) {
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx, attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
}
__free_page(page);
fuse_invalidate_atime(inode);
return err;
}
enum fuse_parse_result {
FOUND_ERR = -1,
FOUND_NONE = 0,
FOUND_SOME,
FOUND_ALL,
};
static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
void *addr, unsigned int size,
struct dir_context *ctx)
{
unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
enum fuse_parse_result res = FOUND_NONE;
WARN_ON(offset >= size);
for (;;) {
struct fuse_dirent *dirent = addr + offset;
unsigned int nbytes = size - offset;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
break;
if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
return FOUND_ERR;
if (WARN_ON(reclen > nbytes))
return FOUND_ERR;
if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
return FOUND_ERR;
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type))
return FOUND_ALL;
ctx->pos = dirent->off;
}
ff->readdir.pos = dirent->off;
ff->readdir.cache_off += reclen;
offset += reclen;
}
return res;
}
static void fuse_rdc_reset(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
fi->rdc.cached = false;
fi->rdc.version++;
fi->rdc.size = 0;
fi->rdc.pos = 0;
}
#define UNCACHED 1
static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
enum fuse_parse_result res;
pgoff_t index;
unsigned int size;
struct page *page;
void *addr;
/* Seeked? If so, reset the cache stream */
if (ff->readdir.pos != ctx->pos) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* We're just about to start reading into the cache or reading the
* cache; both cases require an up-to-date mtime value.
*/
if (!ctx->pos && fc->auto_inval_data) {
int err = fuse_update_attributes(inode, file);
if (err)
return err;
}
retry:
spin_lock(&fi->rdc.lock);
retry_locked:
if (!fi->rdc.cached) {
/* Starting cache? Set cache mtime. */
if (!ctx->pos && !fi->rdc.size) {
fi->rdc.mtime = inode->i_mtime;
fi->rdc.iversion = inode_query_iversion(inode);
}
spin_unlock(&fi->rdc.lock);
return UNCACHED;
}
/*
* When at the beginning of the directory (i.e. just after opendir(3) or
* rewinddir(3)), then need to check whether directory contents have
* changed, and reset the cache if so.
*/
if (!ctx->pos) {
if (inode_peek_iversion(inode) != fi->rdc.iversion ||
!timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
fuse_rdc_reset(inode);
goto retry_locked;
}
}
/*
* If cache version changed since the last getdents() call, then reset
* the cache stream.
*/
if (ff->readdir.version != fi->rdc.version) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* If at the beginning of the cache, than reset version to
* current.
*/
if (ff->readdir.pos == 0)
ff->readdir.version = fi->rdc.version;
WARN_ON(fi->rdc.size < ff->readdir.cache_off);
index = ff->readdir.cache_off >> PAGE_SHIFT;
if (index == (fi->rdc.size >> PAGE_SHIFT))
size = fi->rdc.size & ~PAGE_MASK;
else
size = PAGE_SIZE;
spin_unlock(&fi->rdc.lock);
/* EOF? */
if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
return 0;
page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
spin_lock(&fi->rdc.lock);
if (!page) {
/*
* Uh-oh: page gone missing, cache is useless
*/
if (fi->rdc.version == ff->readdir.version)
fuse_rdc_reset(inode);
goto retry_locked;
}
/* Make sure it's still the same version after getting the page. */
if (ff->readdir.version != fi->rdc.version) {
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
goto retry;
}
spin_unlock(&fi->rdc.lock);
/*
* Contents of the page are now protected against changing by holding
* the page lock.
*/
addr = kmap(page);
res = fuse_parse_cache(ff, addr, size, ctx);
kunmap(page);
unlock_page(page);
put_page(page);
if (res == FOUND_ERR)
return -EIO;
if (res == FOUND_ALL)
return 0;
if (size == PAGE_SIZE) {
/* We hit end of page: skip to next page. */
ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
goto retry;
}
/*
* End of cache reached. If found position, then we are done, otherwise
* need to fall back to uncached, since the position we were looking for
* wasn't in the cache.
*/
return res == FOUND_SOME ? 0 : UNCACHED;
}
int fuse_readdir(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
int err;
if (is_bad_inode(inode))
return -EIO;
mutex_lock(&ff->readdir.lock);
err = UNCACHED;
if (ff->open_flags & FOPEN_CACHE_DIR)
err = fuse_readdir_cached(file, ctx);
if (err == UNCACHED)
err = fuse_readdir_uncached(file, ctx);
mutex_unlock(&ff->readdir.lock);
return err;
}
...@@ -236,33 +236,33 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, ...@@ -236,33 +236,33 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef set_mask_bits #ifndef set_mask_bits
#define set_mask_bits(ptr, _mask, _bits) \ #define set_mask_bits(ptr, mask, bits) \
({ \ ({ \
const typeof(*ptr) mask = (_mask), bits = (_bits); \ const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \
typeof(*ptr) old, new; \ typeof(*(ptr)) old__, new__; \
\ \
do { \ do { \
old = READ_ONCE(*ptr); \ old__ = READ_ONCE(*(ptr)); \
new = (old & ~mask) | bits; \ new__ = (old__ & ~mask__) | bits__; \
} while (cmpxchg(ptr, old, new) != old); \ } while (cmpxchg(ptr, old__, new__) != old__); \
\ \
new; \ new__; \
}) })
#endif #endif
#ifndef bit_clear_unless #ifndef bit_clear_unless
#define bit_clear_unless(ptr, _clear, _test) \ #define bit_clear_unless(ptr, clear, test) \
({ \ ({ \
const typeof(*ptr) clear = (_clear), test = (_test); \ const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
typeof(*ptr) old, new; \ typeof(*(ptr)) old__, new__; \
\ \
do { \ do { \
old = READ_ONCE(*ptr); \ old__ = READ_ONCE(*(ptr)); \
new = old & ~clear; \ new__ = old__ & ~clear__; \
} while (!(old & test) && \ } while (!(old__ & test__) && \
cmpxchg(ptr, old, new) != old); \ cmpxchg(ptr, old__, new__) != old__); \
\ \
!(old & test); \ !(old__ & test__); \
}) })
#endif #endif
......
...@@ -116,6 +116,12 @@ ...@@ -116,6 +116,12 @@
* *
* 7.27 * 7.27
* - add FUSE_ABORT_ERROR * - add FUSE_ABORT_ERROR
*
* 7.28
* - add FUSE_COPY_FILE_RANGE
* - add FOPEN_CACHE_DIR
* - add FUSE_MAX_PAGES, add max_pages to init_out
* - add FUSE_CACHE_SYMLINKS
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
...@@ -151,7 +157,7 @@ ...@@ -151,7 +157,7 @@
#define FUSE_KERNEL_VERSION 7 #define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */ /** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 27 #define FUSE_KERNEL_MINOR_VERSION 28
/** The node ID of the root inode */ /** The node ID of the root inode */
#define FUSE_ROOT_ID 1 #define FUSE_ROOT_ID 1
...@@ -219,10 +225,12 @@ struct fuse_file_lock { ...@@ -219,10 +225,12 @@ struct fuse_file_lock {
* FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
* FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_NONSEEKABLE: the file is not seekable
* FOPEN_CACHE_DIR: allow caching this directory
*/ */
#define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_KEEP_CACHE (1 << 1)
#define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_NONSEEKABLE (1 << 2)
#define FOPEN_CACHE_DIR (1 << 3)
/** /**
* INIT request/reply flags * INIT request/reply flags
...@@ -249,6 +257,8 @@ struct fuse_file_lock { ...@@ -249,6 +257,8 @@ struct fuse_file_lock {
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls * FUSE_POSIX_ACL: filesystem supports posix acls
* FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
* FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
* FUSE_CACHE_SYMLINKS: cache READLINK responses
*/ */
#define FUSE_ASYNC_READ (1 << 0) #define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_POSIX_LOCKS (1 << 1)
...@@ -272,6 +282,8 @@ struct fuse_file_lock { ...@@ -272,6 +282,8 @@ struct fuse_file_lock {
#define FUSE_HANDLE_KILLPRIV (1 << 19) #define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20) #define FUSE_POSIX_ACL (1 << 20)
#define FUSE_ABORT_ERROR (1 << 21) #define FUSE_ABORT_ERROR (1 << 21)
#define FUSE_MAX_PAGES (1 << 22)
#define FUSE_CACHE_SYMLINKS (1 << 23)
/** /**
* CUSE INIT request/reply flags * CUSE INIT request/reply flags
...@@ -381,6 +393,7 @@ enum fuse_opcode { ...@@ -381,6 +393,7 @@ enum fuse_opcode {
FUSE_READDIRPLUS = 44, FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45, FUSE_RENAME2 = 45,
FUSE_LSEEK = 46, FUSE_LSEEK = 46,
FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */ /* CUSE specific operations */
CUSE_INIT = 4096, CUSE_INIT = 4096,
...@@ -610,7 +623,9 @@ struct fuse_init_out { ...@@ -610,7 +623,9 @@ struct fuse_init_out {
uint16_t congestion_threshold; uint16_t congestion_threshold;
uint32_t max_write; uint32_t max_write;
uint32_t time_gran; uint32_t time_gran;
uint32_t unused[9]; uint16_t max_pages;
uint16_t padding;
uint32_t unused[8];
}; };
#define CUSE_INIT_INFO_MAX 4096 #define CUSE_INIT_INFO_MAX 4096
...@@ -792,4 +807,14 @@ struct fuse_lseek_out { ...@@ -792,4 +807,14 @@ struct fuse_lseek_out {
uint64_t offset; uint64_t offset;
}; };
struct fuse_copy_file_range_in {
uint64_t fh_in;
uint64_t off_in;
uint64_t nodeid_out;
uint64_t fh_out;
uint64_t off_out;
uint64_t len;
uint64_t flags;
};
#endif /* _LINUX_FUSE_H */ #endif /* _LINUX_FUSE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment