Commit 71244d9b authored by Trond Myklebust's avatar Trond Myklebust

NFSv4.1: Fix a race in nfs4_write_inode

nfs4_write_inode() must not be allowed to exit until the layoutcommit
is done. That means that both NFS_INO_LAYOUTCOMMIT and
NFS_INO_LAYOUTCOMMITTING have to be cleared.
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
parent d8c951c3
......@@ -77,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
int status;
bool sync = true;
if (wbc->sync_mode == WB_SYNC_NONE)
sync = false;
status = pnfs_layoutcommit_inode(inode, sync);
if (status < 0)
return status;
}
if (ret == 0)
ret = pnfs_layoutcommit_inode(inode,
wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
......
......@@ -1790,6 +1790,15 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
static void pnfs_clear_layoutcommitting(struct inode *inode)
{
unsigned long *bitlock = &NFS_I(inode)->flags;
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
}
/*
* There can be multiple RW segments.
*/
......@@ -1807,7 +1816,6 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
{
struct pnfs_layout_segment *lseg, *tmp;
unsigned long *bitlock = &NFS_I(inode)->flags;
/* Matched by references in pnfs_set_layoutcommit */
list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
......@@ -1815,9 +1823,7 @@ static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *lis
pnfs_put_lseg(lseg);
}
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
pnfs_clear_layoutcommitting(inode);
}
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
......@@ -1881,43 +1887,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs4_layoutcommit_data *data;
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
int status = 0;
dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
int status;
if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
if (!pnfs_layoutcommit_outstanding(inode))
return 0;
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data) {
status = -ENOMEM;
goto out;
}
if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
goto out_free;
dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync) {
status = -EAGAIN;
goto out_free;
}
status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync)
goto out;
status = wait_on_bit_lock(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
if (status)
goto out_free;
goto out;
}
INIT_LIST_HEAD(&data->lseg_list);
status = -ENOMEM;
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data)
goto clear_layoutcommitting;
status = 0;
spin_lock(&inode->i_lock);
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
spin_unlock(&inode->i_lock);
wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
goto out_free;
}
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
goto out_unlock;
INIT_LIST_HEAD(&data->lseg_list);
pnfs_list_write_lseg(inode, &data->lseg_list);
end_pos = nfsi->layout->plh_lwb;
......@@ -1940,8 +1940,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
out_free:
out_unlock:
spin_unlock(&inode->i_lock);
kfree(data);
clear_layoutcommitting:
pnfs_clear_layoutcommitting(inode);
goto out;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment