Commit c4c2416a authored by Gang He's avatar Gang He Committed by Linus Torvalds

ocfs2: nowait aio support

Return EAGAIN if any of the following checks fail for direct I/O:

 - Cannot get the related locks immediately

 - Blocks are not allocated at the write location, it will trigger block
   allocation and block IO operations.

[ghe@suse.com: v4]
  Link: http://lkml.kernel.org/r/1516007283-29932-4-git-send-email-ghe@suse.com
[ghe@suse.com: v2]
  Link: http://lkml.kernel.org/r/1511944612-9629-4-git-send-email-ghe@suse.com
Link: http://lkml.kernel.org/r/1511775987-841-4-git-send-email-ghe@suse.comSigned-off-by: default avatarGang He <ghe@suse.com>
Reviewed-by: default avatarAlex Chen <alex.chen@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ac604d3c
...@@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) ...@@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level); error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
if (lock_level && error >= 0) { if (lock_level && error >= 0) {
/* We release EX lock which used to update atime /* We release EX lock which used to update atime
* and get PR lock again to reduce contention * and get PR lock again to reduce contention
......
...@@ -2546,12 +2546,17 @@ int ocfs2_inode_lock_with_page(struct inode *inode, ...@@ -2546,12 +2546,17 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
int ocfs2_inode_lock_atime(struct inode *inode, int ocfs2_inode_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt, struct vfsmount *vfsmnt,
int *level) int *level, int wait)
{ {
int ret; int ret;
if (wait)
ret = ocfs2_inode_lock(inode, NULL, 0); ret = ocfs2_inode_lock(inode, NULL, 0);
else
ret = ocfs2_try_inode_lock(inode, NULL, 0);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
return ret; return ret;
} }
...@@ -2564,8 +2569,13 @@ int ocfs2_inode_lock_atime(struct inode *inode, ...@@ -2564,8 +2569,13 @@ int ocfs2_inode_lock_atime(struct inode *inode,
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
ocfs2_inode_unlock(inode, 0); ocfs2_inode_unlock(inode, 0);
if (wait)
ret = ocfs2_inode_lock(inode, &bh, 1); ret = ocfs2_inode_lock(inode, &bh, 1);
else
ret = ocfs2_try_inode_lock(inode, &bh, 1);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
return ret; return ret;
} }
......
...@@ -146,7 +146,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write); ...@@ -146,7 +146,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write);
void ocfs2_open_unlock(struct inode *inode); void ocfs2_open_unlock(struct inode *inode);
int ocfs2_inode_lock_atime(struct inode *inode, int ocfs2_inode_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt, struct vfsmount *vfsmnt,
int *level); int *level, int wait);
int ocfs2_inode_lock_full_nested(struct inode *inode, int ocfs2_inode_lock_full_nested(struct inode *inode,
struct buffer_head **ret_bh, struct buffer_head **ret_bh,
int ex, int ex,
......
...@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) ...@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
spin_unlock(&oi->ip_lock); spin_unlock(&oi->ip_lock);
} }
file->f_mode |= FMODE_NOWAIT;
leave: leave:
return status; return status;
} }
...@@ -2132,12 +2134,12 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, ...@@ -2132,12 +2134,12 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
} }
static int ocfs2_prepare_inode_for_write(struct file *file, static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t pos, loff_t pos, size_t count, int wait)
size_t count)
{ {
int ret = 0, meta_level = 0; int ret = 0, meta_level = 0, overwrite_io = 0;
struct dentry *dentry = file->f_path.dentry; struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct buffer_head *di_bh = NULL;
loff_t end; loff_t end;
/* /*
...@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ...@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* if we need to make modifications here. * if we need to make modifications here.
*/ */
for(;;) { for(;;) {
if (wait)
ret = ocfs2_inode_lock(inode, NULL, meta_level); ret = ocfs2_inode_lock(inode, NULL, meta_level);
else
ret = ocfs2_try_inode_lock(inode,
overwrite_io ? NULL : &di_bh, meta_level);
if (ret < 0) { if (ret < 0) {
meta_level = -1; meta_level = -1;
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
/*
* Check if IO will overwrite allocated blocks in case
* IOCB_NOWAIT flag is set.
*/
if (!wait && !overwrite_io) {
overwrite_io = 1;
if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
ret = -EAGAIN;
goto out_unlock;
}
ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
brelse(di_bh);
di_bh = NULL;
up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
goto out_unlock;
}
}
/* Clear suid / sgid if necessary. We do this here /* Clear suid / sgid if necessary. We do this here
* instead of later in the write path because * instead of later in the write path because
* remove_suid() calls ->setattr without any hint that * remove_suid() calls ->setattr without any hint that
...@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ...@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
out_unlock: out_unlock:
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
pos, count); pos, count, wait);
brelse(di_bh);
if (meta_level >= 0) if (meta_level >= 0)
ocfs2_inode_unlock(inode, meta_level); ocfs2_inode_unlock(inode, meta_level);
...@@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ...@@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
struct iov_iter *from) struct iov_iter *from)
{ {
int direct_io, rw_level; int rw_level;
ssize_t written = 0; ssize_t written = 0;
ssize_t ret; ssize_t ret;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
...@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
void *saved_ki_complete = NULL; void *saved_ki_complete = NULL;
int append_write = ((iocb->ki_pos + count) >= int append_write = ((iocb->ki_pos + count) >=
i_size_read(inode) ? 1 : 0); i_size_read(inode) ? 1 : 0);
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno,
...@@ -2230,11 +2263,16 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2230,11 +2263,16 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
file->f_path.dentry->d_name.name, file->f_path.dentry->d_name.name,
(unsigned int)from->nr_segs); /* GRRRRR */ (unsigned int)from->nr_segs); /* GRRRRR */
if (!direct_io && nowait)
return -EOPNOTSUPP;
if (count == 0) if (count == 0)
return 0; return 0;
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; if (nowait) {
if (!inode_trylock(inode))
return -EAGAIN;
} else
inode_lock(inode); inode_lock(inode);
/* /*
...@@ -2244,8 +2282,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2244,8 +2282,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
*/ */
rw_level = (!direct_io || full_coherency || append_write); rw_level = (!direct_io || full_coherency || append_write);
if (nowait)
ret = ocfs2_try_rw_lock(inode, rw_level);
else
ret = ocfs2_rw_lock(inode, rw_level); ret = ocfs2_rw_lock(inode, rw_level);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto out_mutex; goto out_mutex;
} }
...@@ -2260,8 +2302,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2260,8 +2302,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
* other nodes to drop their caches. Buffered I/O * other nodes to drop their caches. Buffered I/O
* already does this in write_begin(). * already does this in write_begin().
*/ */
if (nowait)
ret = ocfs2_try_inode_lock(inode, NULL, 1);
else
ret = ocfs2_inode_lock(inode, NULL, 1); ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
...@@ -2277,8 +2323,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2277,8 +2323,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
} }
count = ret; count = ret;
ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count); ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
...@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, ...@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
int ret = 0, rw_level = -1, lock_level = 0; int ret = 0, rw_level = -1, lock_level = 0;
struct file *filp = iocb->ki_filp; struct file *filp = iocb->ki_filp;
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno,
...@@ -2369,13 +2418,21 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, ...@@ -2369,13 +2418,21 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
goto bail; goto bail;
} }
if (!direct_io && nowait)
return -EOPNOTSUPP;
/* /*
* buffered reads protect themselves in ->readpage(). O_DIRECT reads * buffered reads protect themselves in ->readpage(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate. * need locks to protect pending reads from racing with truncate.
*/ */
if (iocb->ki_flags & IOCB_DIRECT) { if (direct_io) {
if (nowait)
ret = ocfs2_try_rw_lock(inode, 0);
else
ret = ocfs2_rw_lock(inode, 0); ret = ocfs2_rw_lock(inode, 0);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto bail; goto bail;
} }
...@@ -2393,8 +2450,10 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, ...@@ -2393,8 +2450,10 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
* like i_size. This allows the checks down below * like i_size. This allows the checks down below
* generic_file_aio_read() a chance of actually working. * generic_file_aio_read() a chance of actually working.
*/ */
ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
!nowait);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret); mlog_errno(ret);
goto bail; goto bail;
} }
......
...@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
int ret = 0, lock_level = 0; int ret = 0, lock_level = 0;
ret = ocfs2_inode_lock_atime(file_inode(file), ret = ocfs2_inode_lock_atime(file_inode(file),
file->f_path.mnt, &lock_level); file->f_path.mnt, &lock_level, 1);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
......
...@@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range); ...@@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
TRACE_EVENT(ocfs2_prepare_inode_for_write, TRACE_EVENT(ocfs2_prepare_inode_for_write,
TP_PROTO(unsigned long long ino, unsigned long long saved_pos, TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
unsigned long count), unsigned long count, int wait),
TP_ARGS(ino, saved_pos, count), TP_ARGS(ino, saved_pos, count, wait),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long long, ino) __field(unsigned long long, ino)
__field(unsigned long long, saved_pos) __field(unsigned long long, saved_pos)
__field(unsigned long, count) __field(unsigned long, count)
__field(int, wait)
), ),
TP_fast_assign( TP_fast_assign(
__entry->ino = ino; __entry->ino = ino;
__entry->saved_pos = saved_pos; __entry->saved_pos = saved_pos;
__entry->count = count; __entry->count = count;
__entry->wait = wait;
), ),
TP_printk("%llu %llu %lu", __entry->ino, TP_printk("%llu %llu %lu %d", __entry->ino,
__entry->saved_pos, __entry->count) __entry->saved_pos, __entry->count, __entry->wait)
); );
DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret); DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment