Commit 1b8910cf authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] real_lookup race fix

From: Maneesh Soni <maneesh@in.ibm.com>

Here is a patch to use seqlock for real_lookup race with d_lookup as suggested
by Linus. The race condition can result in duplicate dentry when d_lookup
fails due concurrent d_move in some unrelated directory.

Apart from real_lookup, lookup_hash()->cached_lookup() can also fail due
to same reason. So, for that I am doing the d_lookup again.

Now we have __d_lookup (called from do_lookup() during pathwalk) and
d_lookup which uses seqlock to protect againt rename race.

dcachebench numbers (lower is better) don't have much difference on a 4-way
PIII xeon SMP box.

base-2565
Average usec/iteration  19059.4
Standard Deviation      503.07

base-2565 + seq_lock
Average usec/iteration  18843.2
Standard Deviation      450.57
parent ec1d26ec
...@@ -27,12 +27,14 @@ ...@@ -27,12 +27,14 @@
#include <linux/file.h> #include <linux/file.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/seqlock.h>
#define DCACHE_PARANOIA 1 #define DCACHE_PARANOIA 1
/* #define DCACHE_DEBUG 1 */ /* #define DCACHE_DEBUG 1 */
spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
rwlock_t dparent_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED; rwlock_t dparent_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
static kmem_cache_t *dentry_cache; static kmem_cache_t *dentry_cache;
...@@ -926,7 +928,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -926,7 +928,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
* is returned. The caller must use d_put to free the entry when it has * is returned. The caller must use d_put to free the entry when it has
* finished using it. %NULL is returned on failure. * finished using it. %NULL is returned on failure.
* *
* d_lookup is now, dcache_lock free. The hash list is protected using RCU. * __d_lookup is dcache_lock free. The hash list is protected using RCU.
* Memory barriers are used while updating and doing lockless traversal. * Memory barriers are used while updating and doing lockless traversal.
* To avoid races with d_move while rename is happening, d_move_count is * To avoid races with d_move while rename is happening, d_move_count is
* used. * used.
...@@ -939,9 +941,26 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -939,9 +941,26 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
* *
* d_lru list is not updated, which can leave non-zero d_count dentries * d_lru list is not updated, which can leave non-zero d_count dentries
* around in d_lru list. * around in d_lru list.
*
* d_lookup() is protected against the concurrent renames in some unrelated
* directory using the seqlockt_t rename_lock.
*/ */
struct dentry * d_lookup(struct dentry * parent, struct qstr * name) struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
struct dentry * dentry = NULL;
unsigned long seq;
do {
seq = read_seqbegin(&rename_lock);
dentry = __d_lookup(parent, name);
if (dentry)
break;
} while (read_seqretry(&rename_lock, seq));
return dentry;
}
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{ {
unsigned int len = name->len; unsigned int len = name->len;
unsigned int hash = name->hash; unsigned int hash = name->hash;
...@@ -1185,6 +1204,7 @@ void d_move(struct dentry * dentry, struct dentry * target) ...@@ -1185,6 +1204,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
printk(KERN_WARNING "VFS: moving negative dcache entry\n"); printk(KERN_WARNING "VFS: moving negative dcache entry\n");
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
write_seqlock(&rename_lock);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
/* Move the dentry to the target hash queue, if on different bucket */ /* Move the dentry to the target hash queue, if on different bucket */
...@@ -1222,6 +1242,7 @@ void d_move(struct dentry * dentry, struct dentry * target) ...@@ -1222,6 +1242,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
dentry->d_move_count++; dentry->d_move_count++;
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
} }
......
...@@ -275,8 +275,14 @@ void path_release(struct nameidata *nd) ...@@ -275,8 +275,14 @@ void path_release(struct nameidata *nd)
*/ */
static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
{ {
struct dentry * dentry = d_lookup(parent, name); struct dentry * dentry = __d_lookup(parent, name);
/* lockess __d_lookup may fail due to concurrent d_move()
* in some unrelated directory, so try with d_lookup
*/
if (!dentry)
dentry = d_lookup(parent, name);
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry); dput(dentry);
...@@ -348,12 +354,9 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, i ...@@ -348,12 +354,9 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, i
* negatives from the RCU list walk here, unlike the optimistic * negatives from the RCU list walk here, unlike the optimistic
* fast walk). * fast walk).
* *
* We really should do a sequence number thing to avoid this * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
* all.
*/ */
spin_lock(&dcache_lock);
result = d_lookup(parent, name); result = d_lookup(parent, name);
spin_unlock(&dcache_lock);
if (!result) { if (!result) {
struct dentry * dentry = d_alloc(parent, name); struct dentry * dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM); result = ERR_PTR(-ENOMEM);
...@@ -524,7 +527,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, ...@@ -524,7 +527,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path, int flags) struct path *path, int flags)
{ {
struct vfsmount *mnt = nd->mnt; struct vfsmount *mnt = nd->mnt;
struct dentry *dentry = d_lookup(nd->dentry, name); struct dentry *dentry = __d_lookup(nd->dentry, name);
if (!dentry) if (!dentry)
goto need_lookup; goto need_lookup;
......
...@@ -238,6 +238,7 @@ extern void d_move(struct dentry *, struct dentry *); ...@@ -238,6 +238,7 @@ extern void d_move(struct dentry *, struct dentry *);
/* appendix may either be NULL or be used for transname suffixes */ /* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *); extern struct dentry * d_lookup(struct dentry *, struct qstr *);
extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
/* validate "insecure" dentry pointer */ /* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *); extern int d_validate(struct dentry *, struct dentry *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment