Commit 4273b793 authored by Anand Avati's avatar Anand Avati Committed by Miklos Szeredi

fuse: O_DIRECT support for files

Implement ->direct_IO() method in aops. The ->direct_IO() method combines
the existing fuse_direct_read/fuse_direct_write methods to implement
O_DIRECT functionality.

Reaching ->direct_IO() in the read path via generic_file_aio_read ensures
proper synchronization with page cache with its existing framework.

Reaching ->direct_IO() in the write path via fuse_file_aio_write is made
to come via generic_file_direct_write() which makes it play nice with
the page cache w.r.t other mmap pages etc.

On files marked 'direct_io' by the filesystem server, IO always follows
the fuse_direct_read/write path. There is no effect of fcntl(O_DIRECT)
and it always succeeds.

On files not marked with 'direct_io' by the filesystem server, the IO
path depends on O_DIRECT flag by the application. This can be passed
at the time of open() as well as via fcntl().

Note that asynchronous O_DIRECT iocb jobs are completed synchronously
always (this has been the case with FUSE even before this patch)
Signed-off-by: default avatarAnand Avati <avati@redhat.com>
Reviewed-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@suse.cz>
parent ac45d613
...@@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (fc->no_create) if (fc->no_create)
return -ENOSYS; return -ENOSYS;
if (flags & O_DIRECT)
return -EINVAL;
forget = fuse_alloc_forget(); forget = fuse_alloc_forget();
if (!forget) if (!forget)
return -ENOMEM; return -ENOMEM;
......
...@@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) ...@@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
int err; int err;
/* VFS checks this, but only _after_ ->open() */
if (file->f_flags & O_DIRECT)
return -EINVAL;
err = generic_file_open(inode, file); err = generic_file_open(inode, file);
if (err) if (err)
return err; return err;
...@@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
size_t count = 0; size_t count = 0;
size_t ocount = 0;
ssize_t written = 0; ssize_t written = 0;
ssize_t written_buffered = 0;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
ssize_t err; ssize_t err;
struct iov_iter i; struct iov_iter i;
loff_t endbyte = 0;
WARN_ON(iocb->ki_pos != pos); WARN_ON(iocb->ki_pos != pos);
err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); ocount = 0;
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err) if (err)
return err; return err;
count = ocount;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
...@@ -962,11 +964,41 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -962,11 +964,41 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
file_update_time(file); file_update_time(file);
if (file->f_flags & O_DIRECT) {
written = generic_file_direct_write(iocb, iov, &nr_segs,
pos, &iocb->ki_pos,
count, ocount);
if (written < 0 || written == count)
goto out;
pos += written;
count -= written;
iov_iter_init(&i, iov, nr_segs, count, written);
written_buffered = fuse_perform_write(file, mapping, &i, pos);
if (written_buffered < 0) {
err = written_buffered;
goto out;
}
endbyte = pos + written_buffered - 1;
err = filemap_write_and_wait_range(file->f_mapping, pos,
endbyte);
if (err)
goto out;
invalidate_mapping_pages(file->f_mapping,
pos >> PAGE_CACHE_SHIFT,
endbyte >> PAGE_CACHE_SHIFT);
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
} else {
iov_iter_init(&i, iov, nr_segs, count, 0); iov_iter_init(&i, iov, nr_segs, count, 0);
written = fuse_perform_write(file, mapping, &i, pos); written = fuse_perform_write(file, mapping, &i, pos);
if (written >= 0) if (written >= 0)
iocb->ki_pos = pos + written; iocb->ki_pos = pos + written;
}
out: out:
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
...@@ -1101,30 +1133,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, ...@@ -1101,30 +1133,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
return res; return res;
} }
static ssize_t fuse_direct_write(struct file *file, const char __user *buf, static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res; ssize_t res;
if (is_bad_inode(inode))
return -EIO;
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
res = generic_write_checks(file, ppos, &count, 0); res = generic_write_checks(file, ppos, &count, 0);
if (!res) { if (!res) {
res = fuse_direct_io(file, buf, count, ppos, 1); res = fuse_direct_io(file, buf, count, ppos, 1);
if (res > 0) if (res > 0)
fuse_write_update_size(inode, *ppos); fuse_write_update_size(inode, *ppos);
} }
mutex_unlock(&inode->i_mutex);
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
return res; return res;
} }
static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res;
if (is_bad_inode(inode))
return -EIO;
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
res = __fuse_direct_write(file, buf, count, ppos);
mutex_unlock(&inode->i_mutex);
return res;
}
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{ {
__free_page(req->pages[0]); __free_page(req->pages[0]);
...@@ -2077,6 +2120,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, ...@@ -2077,6 +2120,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
return 0; return 0;
} }
static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos, int rw)
{
const struct iovec *vector = iov;
ssize_t ret = 0;
while (nr_segs > 0) {
void __user *base;
size_t len;
ssize_t nr;
base = vector->iov_base;
len = vector->iov_len;
vector++;
nr_segs--;
if (rw == WRITE)
nr = __fuse_direct_write(filp, base, len, ppos);
else
nr = fuse_direct_read(filp, base, len, ppos);
if (nr < 0) {
if (!ret)
ret = nr;
break;
}
ret += nr;
if (nr != len)
break;
}
return ret;
}
static ssize_t
fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
ssize_t ret = 0;
struct file *file = NULL;
loff_t pos = 0;
file = iocb->ki_filp;
pos = offset;
ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
return ret;
}
static const struct file_operations fuse_file_operations = { static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek, .llseek = fuse_file_llseek,
.read = do_sync_read, .read = do_sync_read,
...@@ -2120,6 +2214,7 @@ static const struct address_space_operations fuse_file_aops = { ...@@ -2120,6 +2214,7 @@ static const struct address_space_operations fuse_file_aops = {
.readpages = fuse_readpages, .readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers, .set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap, .bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
}; };
void fuse_init_file_inode(struct inode *inode) void fuse_init_file_inode(struct inode *inode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment