Commit 512f62ac authored by Joseph Qi's avatar Joseph Qi Committed by Linus Torvalds

ocfs2: fix race between dio and recover orphan

During direct io the inode will be added to orphan first and then
deleted from orphan.  There is a race window that the orphan entry will
be deleted twice and thus trigger the BUG when validating
OCFS2_DIO_ORPHANED_FL in ocfs2_del_inode_from_orphan.

ocfs2_direct_IO_write
    ...
    ocfs2_add_inode_to_orphan
    >>>>>>>> race window.
             1) another node may rm the file and then down, this node
             take care of orphan recovery and clear flag
             OCFS2_DIO_ORPHANED_FL.
             2) since rw lock is unlocked, it may race with another
             orphan recovery and append dio.
    ocfs2_del_inode_from_orphan

So take inode mutex lock when recovering orphans and make rw unlock at the
end of aio write in case of append dio.
Signed-off-by: default avatarJoseph Qi <joseph.qi@huawei.com>
Reported-by: default avatarYiwen Jiang <jiangyiwen@huawei.com>
Cc: Weiwei Wang <wangww631@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 81cf09ed
...@@ -627,10 +627,13 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, ...@@ -627,10 +627,13 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
} }
ocfs2_iocb_clear_rw_locked(iocb); /* Let rw unlock to be done later to protect append direct io write */
if (offset + bytes <= i_size_read(inode)) {
ocfs2_iocb_clear_rw_locked(iocb);
level = ocfs2_iocb_rw_locked_level(iocb); level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level); ocfs2_rw_unlock(inode, level);
}
} }
static int ocfs2_releasepage(struct page *page, gfp_t wait) static int ocfs2_releasepage(struct page *page, gfp_t wait)
......
...@@ -2416,7 +2416,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ...@@ -2416,7 +2416,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
} }
no_sync: no_sync:
if (unaligned_dio) { if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
ocfs2_iocb_clear_unaligned_aio(iocb); ocfs2_iocb_clear_unaligned_aio(iocb);
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
} }
......
...@@ -81,8 +81,6 @@ struct ocfs2_inode_info ...@@ -81,8 +81,6 @@ struct ocfs2_inode_info
tid_t i_sync_tid; tid_t i_sync_tid;
tid_t i_datasync_tid; tid_t i_datasync_tid;
wait_queue_head_t append_dio_wq;
struct dquot *i_dquot[MAXQUOTAS]; struct dquot *i_dquot[MAXQUOTAS];
}; };
......
...@@ -2170,6 +2170,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, ...@@ -2170,6 +2170,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
iter = oi->ip_next_orphan; iter = oi->ip_next_orphan;
oi->ip_next_orphan = NULL; oi->ip_next_orphan = NULL;
mutex_lock(&inode->i_mutex);
ret = ocfs2_rw_lock(inode, 1); ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
...@@ -2206,17 +2207,16 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, ...@@ -2206,17 +2207,16 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0); ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
wake_up(&OCFS2_I(inode)->append_dio_wq);
} /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */ } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
unlock_inode: unlock_inode:
ocfs2_inode_unlock(inode, 1); ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
di_bh = NULL;
unlock_rw: unlock_rw:
ocfs2_rw_unlock(inode, 1); ocfs2_rw_unlock(inode, 1);
next: next:
mutex_unlock(&inode->i_mutex);
iput(inode); iput(inode);
brelse(di_bh);
di_bh = NULL;
inode = iter; inode = iter;
} }
......
...@@ -2601,27 +2601,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, ...@@ -2601,27 +2601,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
return status; return status;
} }
static int ocfs2_dio_orphan_recovered(struct inode *inode)
{
int ret;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di = NULL;
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
return 0;
}
di = (struct ocfs2_dinode *) di_bh->b_data;
ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
return ret;
}
#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
struct inode *inode) struct inode *inode)
{ {
...@@ -2633,7 +2612,6 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, ...@@ -2633,7 +2612,6 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
handle_t *handle = NULL; handle_t *handle = NULL;
struct ocfs2_dinode *di = NULL; struct ocfs2_dinode *di = NULL;
restart:
status = ocfs2_inode_lock(inode, &di_bh, 1); status = ocfs2_inode_lock(inode, &di_bh, 1);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
...@@ -2643,15 +2621,21 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, ...@@ -2643,15 +2621,21 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
di = (struct ocfs2_dinode *) di_bh->b_data; di = (struct ocfs2_dinode *) di_bh->b_data;
/* /*
* Another append dio crashed? * Another append dio crashed?
* If so, wait for recovery first. * If so, manually recover it first.
*/ */
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) { if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
ocfs2_inode_unlock(inode, 1); status = ocfs2_truncate_file(inode, di_bh, i_size_read(inode));
brelse(di_bh); if (status < 0) {
wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq, if (status != -ENOSPC)
ocfs2_dio_orphan_recovered(inode), mlog_errno(status);
msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL)); goto bail_unlock_inode;
goto restart; }
status = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_inode;
}
} }
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode, status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
......
...@@ -1746,8 +1746,6 @@ static void ocfs2_inode_init_once(void *data) ...@@ -1746,8 +1746,6 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
ocfs2_lock_res_init_once(&oi->ip_open_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres);
init_waitqueue_head(&oi->append_dio_wq);
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
&ocfs2_inode_caching_ops); &ocfs2_inode_caching_ops);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment