Commit d8a55dda authored by Andrew Morton's avatar Andrew Morton Committed by Jens Axboe

[PATCH] dcache_rcu

Patch from Maneesh Soni <maneesh@in.ibm.com>, Dipankar Sarma
<dipankar@in.ibm.com> and probably others.


This patch provides dcache_lock free d_lookup() using RCU. Al pointed
races with d_move and lockfree d_lookup() while concurrent rename is
going on. We tested this with a test doing million renames
each in 50 threads on 50 different ramfs filesystems. And simultaneously
running millions of "ls". The tests were done on 4-way SMP box.

1. Lookup going to a different bucket as the current dentry is
   moved to a different bucket due to rename. This is solved by
   having a list_head pointer in the dentry structure which points
   to the bucket head it belongs. The bucket pointer is updated when the
   dentry is added to the hash chain. Lookup checks if the current
   dentry belongs to a different bucket, the cached lookup is
   failed and real lookup will be done. This condition occured nearly
   about 100 times during the heavy_rename test.

2. Lookup has got the dentry it is looking and it is comparing
   various keys and meanwhile a rename operation moves the dentry.
   This is solved by using a per dentry counter (d_move_count) which
   is updated at the end of d_move. Lookup takes a snapshot of the
   d_move_count before comparing the keys and once the comparision
   succeeds, it takes the per dentry lock to check the d_move_count
   again. If move_count differs, then dentry is moved (or renamed)
   and the lookup is failed.

3. There can be a theoritical race when a dentry keeps coming back
   to original bucket due to double moves. Due to this lookup may
   consider that it has never moved and can end up in a infinite loop.
   This is solved by using a loop_counter which is compared with a
   approximate maximum number of dentries per bucket. This never got
   hit during the heavy_rename test.

4. There is one more change regarding the loop termintaion condition
   in d_lookup, now the next hash pointer is compared with the current
   dentries bucket pointer (is_bucket()).

5. memcmp() in d_lookup() can go out of bounds if name pointer and length
   fields are not consistent. For this we used a pointer to qstr to keep
   length and name pointer in one structre.

We also tried solving these by using a rwlock but it could not compete
with lockless solution.
parent 7ac75979
......@@ -21,7 +21,7 @@ locking rules:
dcache_lock may block
d_revalidate: no yes
d_hash no yes
d_compare: yes no
d_compare: no no
d_delete: yes no
d_release: no yes
d_iput: no yes
......
This diff is collapsed.
......@@ -7,6 +7,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/rcupdate.h>
#include <asm/bug.h>
struct vfsmount;
......@@ -30,6 +31,7 @@ struct qstr {
const unsigned char * name;
unsigned int len;
unsigned int hash;
char name_str[0];
};
struct dentry_stat_t {
......@@ -72,21 +74,26 @@ struct dcookie_struct;
struct dentry {
atomic_t d_count;
unsigned long d_vfs_flags; /* moved here to be on same cacheline */
spinlock_t d_lock; /* per dentry lock */
unsigned int d_flags;
unsigned long d_move_count; /* to indicated moved dentry while lockless lookup */
struct inode * d_inode; /* Where the name belongs to - NULL is negative */
struct dentry * d_parent; /* parent directory */
struct list_head * d_bucket; /* lookup hash bucket */
struct list_head d_hash; /* lookup hash list */
struct list_head d_lru; /* d_count = 0 LRU list */
struct list_head d_lru; /* LRU list */
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
int d_mounted;
struct qstr d_name;
struct qstr * d_qstr; /* quick str ptr used in lockless lookup and concurrent d_move */
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
struct rcu_head d_rcu;
struct dcookie_struct * d_cookie; /* cookie, if any */
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
} ____cacheline_aligned;
......@@ -139,6 +146,7 @@ d_iput: no no yes
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
extern spinlock_t dcache_lock;
extern rwlock_t dparent_lock;
......@@ -162,7 +170,8 @@ extern rwlock_t dparent_lock;
static __inline__ void __d_drop(struct dentry * dentry)
{
list_del_init(&dentry->d_hash);
dentry->d_vfs_flags |= DCACHE_UNHASHED;
list_del_rcu(&dentry->d_hash);
}
static __inline__ void d_drop(struct dentry * dentry)
......@@ -229,7 +238,6 @@ extern void d_move(struct dentry *, struct dentry *);
/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
......@@ -254,9 +262,8 @@ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
static __inline__ struct dentry * dget(struct dentry *dentry)
{
if (dentry) {
if (!atomic_read(&dentry->d_count))
BUG();
atomic_inc(&dentry->d_count);
dentry->d_vfs_flags |= DCACHE_REFERENCED;
}
return dentry;
}
......@@ -272,7 +279,7 @@ extern struct dentry * dget_locked(struct dentry *);
static __inline__ int d_unhashed(struct dentry *dentry)
{
return list_empty(&dentry->d_hash);
return (dentry->d_vfs_flags & DCACHE_UNHASHED);
}
extern void dput(struct dentry *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment