Commit d15fee81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse update from Miklos Szeredi:
 "This series adds cached writeback support to fuse, improving write
  throughput"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: fix "uninitialized variable" warning
  fuse: Turn writeback cache on
  fuse: Fix O_DIRECT operations vs cached writeback misorder
  fuse: fuse_flush() should wait on writeback
  fuse: Implement write_begin/write_end callbacks
  fuse: restructure fuse_readpage()
  fuse: Flush files on wb close
  fuse: Trust kernel i_mtime only
  fuse: Trust kernel i_size only
  fuse: Connection bit for enabling writeback
  fuse: Prepare to handle short reads
  fuse: Linking file to inode helper
parents 56c225fe f3846266
......@@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
struct iovec iov = { .iov_base = buf, .iov_len = count };
struct fuse_io_priv io = { .async = 0, .file = file };
return fuse_direct_io(&io, &iov, 1, count, &pos, 0);
return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
}
static ssize_t cuse_write(struct file *file, const char __user *buf,
......@@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
return fuse_direct_io(&io, &iov, 1, count, &pos, 1);
return fuse_direct_io(&io, &iov, 1, count, &pos,
FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
static int cuse_open(struct inode *inode, struct file *file)
......
......@@ -839,6 +839,14 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
struct kstat *stat)
{
unsigned int blkbits;
struct fuse_conn *fc = get_fuse_conn(inode);
/* see the comment in fuse_change_attributes() */
if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
attr->size = i_size_read(inode);
attr->mtime = inode->i_mtime.tv_sec;
attr->mtimensec = inode->i_mtime.tv_nsec;
}
stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino;
......@@ -1477,12 +1485,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}
static bool update_mtime(unsigned ivalid)
static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
{
/* Always update if mtime is explicitly set */
if (ivalid & ATTR_MTIME_SET)
return true;
/* Or if kernel i_mtime is the official one */
if (trust_local_mtime)
return true;
/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
return false;
......@@ -1491,7 +1503,8 @@ static bool update_mtime(unsigned ivalid)
return true;
}
static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
bool trust_local_mtime)
{
unsigned ivalid = iattr->ia_valid;
......@@ -1510,11 +1523,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
if (!(ivalid & ATTR_ATIME_SET))
arg->valid |= FATTR_ATIME_NOW;
}
if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) {
arg->valid |= FATTR_MTIME;
arg->mtime = iattr->ia_mtime.tv_sec;
arg->mtimensec = iattr->ia_mtime.tv_nsec;
if (!(ivalid & ATTR_MTIME_SET))
if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime)
arg->valid |= FATTR_MTIME_NOW;
}
}
......@@ -1563,6 +1576,63 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
struct inode *inode,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
req->in.h.opcode = FUSE_SETATTR;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(*inarg_p);
req->in.args[0].value = inarg_p;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
req->out.args[0].size = sizeof(*outarg_p);
req->out.args[0].value = outarg_p;
}
/*
* Flush inode->i_mtime to the server
*/
int fuse_flush_mtime(struct file *file, bool nofail)
{
struct inode *inode = file->f_mapping->host;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req = NULL;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
int err;
if (nofail) {
req = fuse_get_req_nofail_nopages(fc, file);
} else {
req = fuse_get_req_nopages(fc);
if (IS_ERR(req))
return PTR_ERR(req);
}
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
inarg.valid |= FATTR_MTIME;
inarg.mtime = inode->i_mtime.tv_sec;
inarg.mtimensec = inode->i_mtime.tv_nsec;
fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err)
clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
return err;
}
/*
* Set attributes, and at the same time refresh them.
*
......@@ -1580,8 +1650,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
bool is_wb = fc->writeback_cache;
loff_t oldsize;
int err;
bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode);
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
attr->ia_valid |= ATTR_FORCE;
......@@ -1610,7 +1682,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(attr, &inarg);
iattr_to_fattr(attr, &inarg, trust_local_mtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
......@@ -1621,17 +1693,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
req->in.h.opcode = FUSE_SETATTR;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
fuse_request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
......@@ -1648,10 +1710,18 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
}
spin_lock(&fc->lock);
/* the kernel maintains i_mtime locally */
if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) {
inode->i_mtime = attr->ia_mtime;
clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
}
fuse_change_attributes_common(inode, &outarg.attr,
attr_timeout(&outarg));
oldsize = inode->i_size;
i_size_write(inode, outarg.attr.size);
/* see the comment in fuse_change_attributes() */
if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
i_size_write(inode, outarg.attr.size);
if (is_truncate) {
/* NOTE: this may release/reacquire fc->lock */
......@@ -1663,7 +1733,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
* Only call invalidate_inode_pages2() after removing
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping);
}
......@@ -1875,6 +1946,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
return err;
}
static int fuse_update_time(struct inode *inode, struct timespec *now,
int flags)
{
if (flags & S_MTIME) {
inode->i_mtime = *now;
set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state);
BUG_ON(!S_ISREG(inode->i_mode));
}
return 0;
}
static const struct inode_operations fuse_dir_inode_operations = {
.lookup = fuse_lookup,
.mkdir = fuse_mkdir,
......@@ -1914,6 +1996,7 @@ static const struct inode_operations fuse_common_inode_operations = {
.getxattr = fuse_getxattr,
.listxattr = fuse_listxattr,
.removexattr = fuse_removexattr,
.update_time = fuse_update_time,
};
static const struct inode_operations fuse_symlink_inode_operations = {
......
This diff is collapsed.
......@@ -119,6 +119,8 @@ enum {
FUSE_I_INIT_RDPLUS,
/** An operation changing file size is in progress */
FUSE_I_SIZE_UNSTABLE,
/** i_mtime has been updated locally; a flush to userspace needed */
FUSE_I_MTIME_DIRTY,
};
struct fuse_conn;
......@@ -480,6 +482,9 @@ struct fuse_conn {
/** Set if bdi is valid */
unsigned bdi_initialized:1;
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
......@@ -863,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
/**
* fuse_direct_io() flags
*/
/** If set, it is WRITE; otherwise - READ */
#define FUSE_DIO_WRITE (1 << 0)
/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
#define FUSE_DIO_CUSE (1 << 1)
ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
unsigned long nr_segs, size_t count, loff_t *ppos,
int write);
int flags);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags);
long fuse_ioctl_common(struct file *file, unsigned int cmd,
......@@ -873,7 +889,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file);
void fuse_write_update_size(struct inode *inode, loff_t pos);
bool fuse_write_update_size(struct inode *inode, loff_t pos);
int fuse_flush_mtime(struct file *file, bool nofail);
int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct file *file);
......
......@@ -170,8 +170,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
/* mtime from server may be stale due to local buffered write */
if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
}
inode->i_ctime.tv_sec = attr->ctime;
inode->i_ctime.tv_nsec = attr->ctimensec;
......@@ -197,6 +200,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
bool is_wb = fc->writeback_cache;
loff_t oldsize;
struct timespec old_mtime;
......@@ -211,10 +215,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
i_size_write(inode, attr->size);
/*
* In case of writeback_cache enabled, the cached writes beyond EOF
* extend local i_size without keeping userspace server in sync. So,
* attr->size coming from server can be stale. We cannot trust it.
*/
if (!is_wb || !S_ISREG(inode->i_mode))
i_size_write(inode, attr->size);
spin_unlock(&fc->lock);
if (S_ISREG(inode->i_mode)) {
if (!is_wb && S_ISREG(inode->i_mode)) {
bool inval = false;
if (oldsize != attr->size) {
......@@ -243,6 +253,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size;
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
if (S_ISREG(inode->i_mode)) {
fuse_init_common(inode);
fuse_init_file_inode(inode);
......@@ -289,7 +301,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return NULL;
if ((inode->i_state & I_NEW)) {
inode->i_flags |= S_NOATIME|S_NOCMTIME;
inode->i_flags |= S_NOATIME;
if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation;
inode->i_data.backing_dev_info = &fc->bdi;
fuse_init_inode(inode, attr);
......@@ -873,6 +887,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
}
if (arg->flags & FUSE_ASYNC_DIO)
fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
......@@ -900,7 +916,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO;
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
......
......@@ -93,6 +93,9 @@
*
* 7.22
* - add FUSE_ASYNC_DIO
*
* 7.23
* - add FUSE_WRITEBACK_CACHE
*/
#ifndef _LINUX_FUSE_H
......@@ -128,7 +131,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 22
#define FUSE_KERNEL_MINOR_VERSION 23
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
......@@ -219,6 +222,7 @@ struct fuse_file_lock {
* FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
* FUSE_READDIRPLUS_AUTO: adaptive readdirplus
* FUSE_ASYNC_DIO: asynchronous direct I/O submission
* FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
......@@ -236,6 +240,7 @@ struct fuse_file_lock {
#define FUSE_DO_READDIRPLUS (1 << 13)
#define FUSE_READDIRPLUS_AUTO (1 << 14)
#define FUSE_ASYNC_DIO (1 << 15)
#define FUSE_WRITEBACK_CACHE (1 << 16)
/**
* CUSE INIT request/reply flags
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment