Commit 5da784cc authored by Constantine Shulyupin's avatar Constantine Shulyupin Committed by Miklos Szeredi

fuse: add max_pages to init_out

Replace FUSE_MAX_PAGES_PER_REQ with the configurable parameter max_pages to
improve performance.

Old RFC with detailed description of the problem and many fixes by Mitsuo
Hayasaka (mitsuo.hayasaka.hu@hitachi.com):
 - https://lkml.org/lkml/2012/7/5/136

We've encountered performance degradation and fixed it on a big and complex
virtual environment.

Environment to reproduce degradation and improvement:

1. Add lag to user mode FUSE
Add nanosleep(&(struct timespec){ 0, 1000 }, NULL); to xmp_write_buf in
passthrough_fh.c

2. patch UM fuse with configurable max_pages parameter. The patch will be
provided latter.

3. run test script and perform test on tmpfs
fuse_test()
{

       cd /tmp
       mkdir -p fusemnt
       passthrough_fh -o max_pages=$1 /tmp/fusemnt
       grep fuse /proc/self/mounts
       dd conv=fdatasync oflag=dsync if=/dev/zero of=fusemnt/tmp/tmp \
		count=1K bs=1M 2>&1 | grep -v records
       rm fusemnt/tmp/tmp
       killall passthrough_fh
}

Test results:

passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
	rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0
1073741824 bytes (1.1 GB) copied, 1.73867 s, 618 MB/s

passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
	rw,nosuid,nodev,relatime,user_id=0,group_id=0,max_pages=256 0 0
1073741824 bytes (1.1 GB) copied, 1.15643 s, 928 MB/s

Obviously with bigger lag the difference between 'before' and 'after'
will be more significant.

Mitsuo Hayasaka, in 2012 (https://lkml.org/lkml/2012/7/5/136),
observed improvement from 400-550 to 520-740.
Signed-off-by: default avatarConstantine Shulyupin <const@MakeLinux.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 8a7aa286
...@@ -61,6 +61,7 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) ...@@ -61,6 +61,7 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
struct page **pages = NULL; struct page **pages = NULL;
struct fuse_page_desc *page_descs = NULL; struct fuse_page_desc *page_descs = NULL;
WARN_ON(npages > FUSE_MAX_MAX_PAGES);
if (npages > FUSE_REQ_INLINE_PAGES) { if (npages > FUSE_REQ_INLINE_PAGES) {
pages = kzalloc(npages * (sizeof(*pages) + pages = kzalloc(npages * (sizeof(*pages) +
sizeof(*page_descs)), flags); sizeof(*page_descs)), flags);
...@@ -1674,7 +1675,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1674,7 +1675,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num; unsigned int num;
unsigned int offset; unsigned int offset;
size_t total_len = 0; size_t total_len = 0;
int num_pages; unsigned int num_pages;
offset = outarg->offset & ~PAGE_MASK; offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode); file_size = i_size_read(inode);
...@@ -1686,7 +1687,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1686,7 +1687,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
num = file_size - outarg->offset; num = file_size - outarg->offset;
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ); num_pages = min(num_pages, fc->max_pages);
req = fuse_get_req(fc, num_pages); req = fuse_get_req(fc, num_pages);
if (IS_ERR(req)) if (IS_ERR(req))
......
...@@ -850,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page) ...@@ -850,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page)
fuse_wait_on_page_writeback(inode, page->index); fuse_wait_on_page_writeback(inode, page->index);
if (req->num_pages && if (req->num_pages &&
(req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_read || (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) { req->pages[req->num_pages - 1]->index + 1 != page->index)) {
int nr_alloc = min_t(unsigned, data->nr_pages, unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
FUSE_MAX_PAGES_PER_REQ); fc->max_pages);
fuse_send_readpages(req, data->file); fuse_send_readpages(req, data->file);
if (fc->async_read) if (fc->async_read)
req = fuse_get_req_for_background(fc, nr_alloc); req = fuse_get_req_for_background(fc, nr_alloc);
...@@ -889,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, ...@@ -889,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data; struct fuse_fill_data data;
int err; int err;
int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ); unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
err = -EIO; err = -EIO;
if (is_bad_inode(inode)) if (is_bad_inode(inode))
...@@ -1104,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, ...@@ -1104,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
return count > 0 ? count : err; return count > 0 ? count : err;
} }
static inline unsigned fuse_wr_pages(loff_t pos, size_t len) static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
unsigned int max_pages)
{ {
return min_t(unsigned, return min_t(unsigned int,
((pos + len - 1) >> PAGE_SHIFT) - ((pos + len - 1) >> PAGE_SHIFT) -
(pos >> PAGE_SHIFT) + 1, (pos >> PAGE_SHIFT) + 1,
FUSE_MAX_PAGES_PER_REQ); max_pages);
} }
static ssize_t fuse_perform_write(struct kiocb *iocb, static ssize_t fuse_perform_write(struct kiocb *iocb,
...@@ -1131,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, ...@@ -1131,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
do { do {
struct fuse_req *req; struct fuse_req *req;
ssize_t count; ssize_t count;
unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii)); unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
req = fuse_get_req(fc, nr_pages); req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) { if (IS_ERR(req)) {
...@@ -1321,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, ...@@ -1321,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }
static inline int fuse_iter_npages(const struct iov_iter *ii_p)
{
return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
}
ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags) loff_t *ppos, int flags)
{ {
...@@ -1345,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ...@@ -1345,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int err = 0; int err = 0;
if (io->async) if (io->async)
req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
fc->max_pages));
else else
req = fuse_get_req(fc, fuse_iter_npages(iter)); req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
...@@ -1392,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ...@@ -1392,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
fuse_put_request(fc, req); fuse_put_request(fc, req);
if (io->async) if (io->async)
req = fuse_get_req_for_background(fc, req = fuse_get_req_for_background(fc,
fuse_iter_npages(iter)); iov_iter_npages(iter, fc->max_pages));
else else
req = fuse_get_req(fc, fuse_iter_npages(iter)); req = fuse_get_req(fc, iov_iter_npages(iter,
fc->max_pages));
if (IS_ERR(req)) if (IS_ERR(req))
break; break;
} }
...@@ -1823,7 +1822,7 @@ static int fuse_writepages_fill(struct page *page, ...@@ -1823,7 +1822,7 @@ static int fuse_writepages_fill(struct page *page,
is_writeback = fuse_page_is_writeback(inode, page->index); is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages && if (req && req->num_pages &&
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ || (is_writeback || req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_write || (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data); fuse_writepages_send(data);
...@@ -1851,7 +1850,7 @@ static int fuse_writepages_fill(struct page *page, ...@@ -1851,7 +1850,7 @@ static int fuse_writepages_fill(struct page *page,
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM; err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ); req = fuse_request_alloc_nofs(fc->max_pages);
if (!req) { if (!req) {
__free_page(tmp_page); __free_page(tmp_page);
goto out_unlock; goto out_unlock;
...@@ -1908,6 +1907,7 @@ static int fuse_writepages(struct address_space *mapping, ...@@ -1908,6 +1907,7 @@ static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_wb_data data; struct fuse_fill_wb_data data;
int err; int err;
...@@ -1920,7 +1920,7 @@ static int fuse_writepages(struct address_space *mapping, ...@@ -1920,7 +1920,7 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL; data.ff = NULL;
err = -ENOMEM; err = -ENOMEM;
data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, data.orig_pages = kcalloc(fc->max_pages,
sizeof(struct page *), sizeof(struct page *),
GFP_NOFS); GFP_NOFS);
if (!data.orig_pages) if (!data.orig_pages)
...@@ -2391,10 +2391,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, ...@@ -2391,10 +2391,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
} }
/* Make sure iov_length() won't overflow */ /* Make sure iov_length() won't overflow */
static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
size_t count)
{ {
size_t n; size_t n;
u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; u32 max = fc->max_pages << PAGE_SHIFT;
for (n = 0; n < count; n++, iov++) { for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max) if (iov->iov_len > (size_t) max)
...@@ -2518,7 +2519,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2518,7 +2519,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM; err = -ENOMEM;
pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL); pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page) if (!pages || !iov_page)
goto out; goto out;
...@@ -2557,7 +2558,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2557,7 +2558,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */ /* make sure there are enough buffer pages and init request with them */
err = -ENOMEM; err = -ENOMEM;
if (max_pages > FUSE_MAX_PAGES_PER_REQ) if (max_pages > fc->max_pages)
goto out; goto out;
while (num_pages < max_pages) { while (num_pages < max_pages) {
pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
...@@ -2644,11 +2645,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2644,11 +2645,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page; in_iov = iov_page;
out_iov = in_iov + in_iovs; out_iov = in_iov + in_iovs;
err = fuse_verify_ioctl_iov(in_iov, in_iovs); err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
if (err) if (err)
goto out; goto out;
err = fuse_verify_ioctl_iov(out_iov, out_iovs); err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
if (err) if (err)
goto out; goto out;
...@@ -2839,9 +2840,9 @@ static void fuse_do_truncate(struct file *file) ...@@ -2839,9 +2840,9 @@ static void fuse_do_truncate(struct file *file)
fuse_do_setattr(file_dentry(file), &attr, file); fuse_do_setattr(file_dentry(file), &attr, file);
} }
static inline loff_t fuse_round_up(loff_t off) static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
{ {
return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); return round_up(off, fc->max_pages << PAGE_SHIFT);
} }
static ssize_t static ssize_t
...@@ -2870,7 +2871,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -2870,7 +2871,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
if (offset >= i_size) if (offset >= i_size)
return 0; return 0;
iov_iter_truncate(iter, fuse_round_up(i_size - offset)); iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
count = iov_iter_count(iter); count = iov_iter_count(iter);
} }
......
...@@ -28,8 +28,11 @@ ...@@ -28,8 +28,11 @@
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
/** Max number of pages that can be used in a single read request */ /** Default max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/** Maximum of max_pages received in init_out */
#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */ /** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN #define FUSE_NOWRITE INT_MIN
...@@ -525,6 +528,9 @@ struct fuse_conn { ...@@ -525,6 +528,9 @@ struct fuse_conn {
/** Maximum write size */ /** Maximum write size */
unsigned max_write; unsigned max_write;
/** Maxmum number of pages that can be used in a single request */
unsigned int max_pages;
/** Input queue */ /** Input queue */
struct fuse_iqueue iq; struct fuse_iqueue iq;
......
...@@ -928,6 +928,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) ...@@ -928,6 +928,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
} }
if (arg->flags & FUSE_ABORT_ERROR) if (arg->flags & FUSE_ABORT_ERROR)
fc->abort_err = 1; fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1));
}
} else { } else {
ra_pages = fc->max_read / PAGE_SIZE; ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1; fc->no_lock = 1;
...@@ -959,7 +964,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) ...@@ -959,7 +964,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR; FUSE_ABORT_ERROR | FUSE_MAX_PAGES;
req->in.h.opcode = FUSE_INIT; req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1; req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg); req->in.args[0].size = sizeof(*arg);
...@@ -1152,6 +1157,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1152,6 +1157,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->user_id = d.user_id; fc->user_id = d.user_id;
fc->group_id = d.group_id; fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read); fc->max_read = max_t(unsigned, 4096, d.max_read);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
/* Used by get_root_inode() */ /* Used by get_root_inode() */
sb->s_fs_info = fc; sb->s_fs_info = fc;
......
...@@ -120,6 +120,7 @@ ...@@ -120,6 +120,7 @@
* 7.28 * 7.28
* - add FUSE_COPY_FILE_RANGE * - add FUSE_COPY_FILE_RANGE
* - add FOPEN_CACHE_DIR * - add FOPEN_CACHE_DIR
* - add FUSE_MAX_PAGES, add max_pages to init_out
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
...@@ -255,6 +256,7 @@ struct fuse_file_lock { ...@@ -255,6 +256,7 @@ struct fuse_file_lock {
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls * FUSE_POSIX_ACL: filesystem supports posix acls
* FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
* FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
*/ */
#define FUSE_ASYNC_READ (1 << 0) #define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_POSIX_LOCKS (1 << 1)
...@@ -278,6 +280,7 @@ struct fuse_file_lock { ...@@ -278,6 +280,7 @@ struct fuse_file_lock {
#define FUSE_HANDLE_KILLPRIV (1 << 19) #define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20) #define FUSE_POSIX_ACL (1 << 20)
#define FUSE_ABORT_ERROR (1 << 21) #define FUSE_ABORT_ERROR (1 << 21)
#define FUSE_MAX_PAGES (1 << 22)
/** /**
* CUSE INIT request/reply flags * CUSE INIT request/reply flags
...@@ -617,7 +620,9 @@ struct fuse_init_out { ...@@ -617,7 +620,9 @@ struct fuse_init_out {
uint16_t congestion_threshold; uint16_t congestion_threshold;
uint32_t max_write; uint32_t max_write;
uint32_t time_gran; uint32_t time_gran;
uint32_t unused[9]; uint16_t max_pages;
uint16_t padding;
uint32_t unused[8];
}; };
#define CUSE_INIT_INFO_MAX 4096 #define CUSE_INIT_INFO_MAX 4096
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment