Commit a8b00268 authored by Al Viro's avatar Al Viro

rename(): avoid a deadlock in the case of parents having no common ancestor

... and fix the directory locking documentation and proof of correctness.
Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the
case where we really don't want it is splicing the root of disconnected
tree to somewhere.

In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an
ancestor of Y" only if X and Y are already in the same tree.  Otherwise
it can go from false to true, and one can construct a deadlock on that.

Make lock_two_directories() report an error in such case and update the
callers of lock_rename()/lock_rename_child() to handle such errors.

And yes, such conditions are not impossible to create ;-/
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent dbd4540d
...@@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected ...@@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected
by any locks; just don't do it if the old parent is the same as the new one. by any locks; just don't do it if the old parent is the same as the new one.
We really can't lock two subdirectories in same-directory rename - not without We really can't lock two subdirectories in same-directory rename - not without
deadlocks. deadlocks.
---
**mandatory**
lock_rename() and lock_rename_child() may fail in cross-directory case, if
their arguments do not have a common ancestor. In that case ERR_PTR(-EXDEV)
is returned, with no locks taken. In-tree users updated; out-of-tree ones
would need to do so.
...@@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache, ...@@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
/* do the multiway lock magic */ /* do the multiway lock magic */
trap = lock_rename(cache->graveyard, dir); trap = lock_rename(cache->graveyard, dir);
if (IS_ERR(trap))
return PTR_ERR(trap);
/* do some checks before getting the grave dentry */ /* do some checks before getting the grave dentry */
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
......
...@@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, ...@@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
target_inode = d_inode(new_dentry); target_inode = d_inode(new_dentry);
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
if (IS_ERR(trap))
return PTR_ERR(trap);
dget(lower_new_dentry); dget(lower_new_dentry);
rc = -EINVAL; rc = -EINVAL;
if (lower_old_dentry->d_parent != lower_old_dir_dentry) if (lower_old_dentry->d_parent != lower_old_dir_dentry)
......
...@@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap, ...@@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap,
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
} }
// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{ {
struct dentry *p; struct dentry *p = p1, *q = p2, *r;
p = d_ancestor(p2, p1); while ((r = p->d_parent) != p2 && r != p)
if (p) { p = r;
if (r == p2) {
// p is a child of p2 and an ancestor of p1 or p1 itself
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p; return p;
} }
// p is the root of connected component that contains p1
p = d_ancestor(p1, p2); // p2 does not occur on the path from p to p1
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); while ((r = q->d_parent) != p1 && r != p && r != q)
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); q = r;
return p; if (r == p1) {
// q is a child of p1 and an ancestor of p2 or p2 itself
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return q;
} else if (likely(r == p)) {
// both p2 and p1 are descendents of p
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
} else { // no common ancestor at the time we'd been called
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
return ERR_PTR(-EXDEV);
}
} }
/* /*
...@@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, ...@@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
retry_deleg: retry_deleg:
trap = lock_rename(new_path.dentry, old_path.dentry); trap = lock_rename(new_path.dentry, old_path.dentry);
if (IS_ERR(trap)) {
error = PTR_ERR(trap);
goto exit_lock_rename;
}
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
lookup_flags); lookup_flags);
...@@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, ...@@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
dput(old_dentry); dput(old_dentry);
exit3: exit3:
unlock_rename(new_path.dentry, old_path.dentry); unlock_rename(new_path.dentry, old_path.dentry);
exit_lock_rename:
if (delegated_inode) { if (delegated_inode) {
error = break_deleg_wait(&delegated_inode); error = break_deleg_wait(&delegated_inode);
if (!error) if (!error)
......
...@@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, ...@@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
} }
trap = lock_rename(tdentry, fdentry); trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
goto out;
}
err = fh_fill_pre_attrs(ffhp); err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok) if (err != nfs_ok)
goto out_unlock; goto out_unlock;
......
...@@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ...@@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
struct inode *inode; struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
struct path path = { .mnt = ovl_upper_mnt(ofs) }; struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper; struct dentry *temp, *upper, *trap;
struct ovl_cu_creds cc; struct ovl_cu_creds cc;
int err; int err;
struct ovl_cattr cattr = { struct ovl_cattr cattr = {
...@@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ...@@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
* If temp was moved, abort without the cleanup. * If temp was moved, abort without the cleanup.
*/ */
ovl_start_write(c->dentry); ovl_start_write(c->dentry);
if (lock_rename(c->workdir, c->destdir) != NULL || trap = lock_rename(c->workdir, c->destdir);
temp->d_parent != c->workdir) { if (trap || temp->d_parent != c->workdir) {
err = -EIO; err = -EIO;
if (IS_ERR(trap))
goto out;
goto unlock; goto unlock;
} else if (err) { } else if (err) {
goto cleanup; goto cleanup;
...@@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ...@@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_set_flag(OVL_WHITEOUTS, inode); ovl_set_flag(OVL_WHITEOUTS, inode);
unlock: unlock:
unlock_rename(c->workdir, c->destdir); unlock_rename(c->workdir, c->destdir);
out:
ovl_end_write(c->dentry); ovl_end_write(c->dentry);
return err; return err;
......
...@@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, ...@@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
} }
trap = lock_rename(new_upperdir, old_upperdir); trap = lock_rename(new_upperdir, old_upperdir);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_revert_creds;
}
olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir, olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
old->d_name.len); old->d_name.len);
......
...@@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) ...@@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
bool ok = false; bool ok = false;
if (workdir != upperdir) { if (workdir != upperdir) {
ok = (lock_rename(workdir, upperdir) == NULL); struct dentry *trap = lock_rename(workdir, upperdir);
unlock_rename(workdir, upperdir); if (!IS_ERR(trap))
unlock_rename(workdir, upperdir);
ok = (trap == NULL);
} }
return ok; return ok;
} }
......
...@@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry) ...@@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry)
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
{ {
struct dentry *trap;
/* Workdir should not be the same as upperdir */ /* Workdir should not be the same as upperdir */
if (workdir == upperdir) if (workdir == upperdir)
goto err; goto err;
/* Workdir should not be subdir of upperdir and vice versa */ /* Workdir should not be subdir of upperdir and vice versa */
if (lock_rename(workdir, upperdir) != NULL) trap = lock_rename(workdir, upperdir);
if (IS_ERR(trap))
goto err;
if (trap)
goto err_unlock; goto err_unlock;
return 0; return 0;
......
...@@ -708,6 +708,10 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, ...@@ -708,6 +708,10 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out2; goto out2;
trap = lock_rename_child(old_child, new_path.dentry); trap = lock_rename_child(old_child, new_path.dentry);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_drop_write;
}
old_parent = dget(old_child->d_parent); old_parent = dget(old_child->d_parent);
if (d_unhashed(old_child)) { if (d_unhashed(old_child)) {
...@@ -770,6 +774,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, ...@@ -770,6 +774,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
out3: out3:
dput(old_parent); dput(old_parent);
unlock_rename(old_parent, new_path.dentry); unlock_rename(old_parent, new_path.dentry);
out_drop_write:
mnt_drop_write(old_path->mnt); mnt_drop_write(old_path->mnt);
out2: out2:
path_put(&new_path); path_put(&new_path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment