Commit 2105c282 authored by David Howells's avatar David Howells

afs: Fix race between post-modification dir edit and readdir/d_revalidate

AFS directories are retained locally as a structured file, with lookup
being effected by a local search of the file contents.  When a modification
(such as mkdir) happens, the dir file content is modified locally rather
than redownloading the directory.

The directory contents are accessed in a number of ways, with a number of
different locks schemes:

 (1) Download of contents - dvnode->validate_lock/write in afs_read_dir().

 (2) Lookup and readdir - dvnode->validate_lock/read in afs_dir_iterate(),
     downgrading from (1) if necessary.

 (3) d_revalidate of child dentry - dvnode->validate_lock/read in
     afs_do_lookup_one() downgrading from (1) if necessary.

 (4) Edit of dir after modification - page locks on individual dir pages.

Unfortunately, because (4) uses different locking scheme to (1) - (3),
nothing protects against the page being scanned whilst the edit is
underway.  Even download is not safe as it doesn't lock the pages - relying
instead on the validate_lock to serialise as a whole (the theory being that
directory contents are treated as a block and always downloaded as a
block).

Fix this by write-locking dvnode->validate_lock around the edits.  Care
must be taken in the rename case as there may be two different dirs - but
they need not be locked at the same time.  In any case, once the lock is
taken, the directory version must be rechecked, and the edit skipped if a
later version has been downloaded by revalidation (there can't have been
any local changes because the VFS holds the inode lock, but there can have
been remote changes).

Fixes: 63a4681f ("afs: Locally edit directory data for mkdir/create/unlink/...")
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 3efe55b0
......@@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
afs_dataversion_t data_version;
int ret;
mode |= S_IFDIR;
......@@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1;
data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
......@@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto error_key;
}
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
if (ret == 0) {
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
up_write(&dvnode->validate_lock);
}
key_put(key);
kfree(scb);
......@@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
......@@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1;
data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
......@@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
afs_dir_remove_subdir(dentry);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_rmdir);
up_write(&dvnode->validate_lock);
}
}
......@@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc);
if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
ret = afs_dir_remove_link(dvnode, dentry, key);
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
if (ret == 0) {
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
}
}
if (need_rehash && ret < 0 && ret != -ENOENT)
......@@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
afs_dataversion_t data_version;
int ret;
mode |= S_IFREG;
......@@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1;
data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
......@@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto error_key;
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
up_write(&dvnode->validate_lock);
kfree(scb);
key_put(key);
......@@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
struct key *key;
afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%llx:%llu},{%pd}",
......@@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1;
data_version = dvnode->status.data_version + 1;
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
......@@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error_key;
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid,
afs_edit_dir_for_link);
up_write(&dvnode->validate_lock);
key_put(key);
kfree(scb);
......@@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%pd},%s",
......@@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1;
data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
......@@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
goto error_key;
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_symlink);
up_write(&dvnode->validate_lock);
key_put(key);
kfree(scb);
......@@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *tmp = NULL, *rehash = NULL;
struct inode *new_inode;
struct key *key;
afs_dataversion_t orig_data_version;
afs_dataversion_t new_data_version;
bool new_negative = d_is_negative(new_dentry);
int ret;
......@@ -1890,9 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
afs_dataversion_t orig_data_version;
afs_dataversion_t new_data_version;
orig_data_version = orig_dvnode->status.data_version + 1;
if (orig_dvnode != new_dvnode) {
......@@ -1928,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == 0) {
if (rehash)
d_rehash(rehash);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
down_write(&orig_dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
orig_dvnode->status.data_version == orig_data_version)
afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
afs_edit_dir_for_rename_0);
if (orig_dvnode != new_dvnode) {
up_write(&orig_dvnode->validate_lock);
if (!new_negative &&
test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
down_write(&new_dvnode->validate_lock);
}
if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
orig_dvnode->status.data_version == new_data_version) {
if (!new_negative)
afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
afs_edit_dir_for_rename_1);
if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
&vnode->fid, afs_edit_dir_for_rename_2);
}
new_inode = d_inode(new_dentry);
if (new_inode) {
......@@ -1958,6 +1990,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
afs_update_dentry_version(&fc, old_dentry, &scb[1]);
afs_update_dentry_version(&fc, new_dentry, &scb[1]);
d_move(old_dentry, new_dentry);
up_write(&new_dvnode->validate_lock);
goto error_tmp;
}
......
......@@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
{
struct afs_fs_cursor fc;
struct afs_status_cb *scb;
afs_dataversion_t dir_data_version;
int ret = -ERESTARTSYS;
_enter("%pd,%pd", old, new);
......@@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
trace_afs_silly_rename(vnode, false);
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
dir_data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
......@@ -54,13 +55,16 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
dvnode->silly_key = key_get(key);
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dir_data_version) {
afs_edit_dir_remove(dvnode, &old->d_name,
afs_edit_dir_for_silly_0);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1);
}
up_write(&dvnode->validate_lock);
}
kfree(scb);
_leave(" = %d", ret);
......@@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
}
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
if (ret == 0) {
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dir_data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
}
}
kfree(scb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment