Commit 31ed8e6f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'dentry-cleanups' (dcache access cleanups and optimizations)

This branch simplifies and clarifies the dcache lookup, and allows us to
do certain nice optimizations when comparing dentries.  It also cleans
up the interface to __d_lookup_rcu(), especially around passing the
inode information around.

* dentry-cleanups:
  vfs: make it possible to access the dentry hash/len as one 64-bit entry
  vfs: move dentry name length comparison from dentry_cmp() into callers
  vfs: do the careful dentry name access for all dentry_cmp cases
  vfs: remove unnecessary d_unhashed() check from __d_lookup_rcu
  vfs: clean up __d_lookup_rcu() and dentry_cmp() interfaces
parents 7e5cb5e1 26fe5750
......@@ -153,16 +153,12 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
* In contrast, 'ct' and 'tcount' can be from a pathname, and do
* need the careful unaligned handling.
*/
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
unsigned long a,b,mask;
if (unlikely(scount != tcount))
return 1;
for (;;) {
a = load_unaligned_zeropad(cs);
a = *(unsigned long *)cs;
b = load_unaligned_zeropad(ct);
if (tcount < sizeof(unsigned long))
break;
......@@ -180,12 +176,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
#else
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
if (scount != tcount)
return 1;
do {
if (*cs != *ct)
return 1;
......@@ -198,6 +190,27 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
#endif
static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
/*
* Be careful about RCU walk racing with rename:
* use ACCESS_ONCE to fetch the name pointer.
*
* NOTE! Even if a rename will mean that the length
* was not loaded atomically, we don't care. The
* RCU walk will check the sequence count eventually,
* and catch it. And we won't overrun the buffer,
* because we're reading the name pointer atomically,
* and a dentry name is guaranteed to be properly
* terminated with a NUL byte.
*
* End result: even if 'len' is wrong, we'll exit
* early because the data cannot match (there can
* be no NUL in the ct/tcount data)
*/
return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount);
}
static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
......@@ -1439,18 +1452,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
}
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
* parent changes, because the parent inode i_mutex is held.
*/
if (qstr->hash != hash)
if (alias->d_name.hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
if (dentry_cmp(qstr->name, qstr->len, name, len))
if (alias->d_name.len != len)
continue;
if (dentry_cmp(alias, name, len))
continue;
__dget(alias);
return alias;
......@@ -1489,7 +1502,7 @@ struct dentry *d_make_root(struct inode *root_inode)
struct dentry *res = NULL;
if (root_inode) {
static const struct qstr name = { .name = "/", .len = 1 };
static const struct qstr name = QSTR_INIT("/", 1);
res = __d_alloc(root_inode->i_sb, &name);
if (res)
......@@ -1727,6 +1740,48 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL(d_add_ci);
/*
* Do the slow-case of the dentry name compare.
*
* Unlike the dentry_cmp() function, we need to atomically
* load the name, length and inode information, so that the
* filesystem can rely on them, and can use the 'name' and
* 'len' information without worrying about walking off the
* end of memory etc.
*
* Thus the read_seqcount_retry() and the "duplicate" info
* in arguments (the low-level filesystem should not look
* at the dentry inode or name contents directly, since
* rename can change them while we're in RCU mode).
*/
enum slow_d_compare {
D_COMP_OK,
D_COMP_NOMATCH,
D_COMP_SEQRETRY,
};
static noinline enum slow_d_compare slow_dentry_cmp(
const struct dentry *parent,
struct inode *inode,
struct dentry *dentry,
unsigned int seq,
const struct qstr *name)
{
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
struct inode *i = dentry->d_inode;
if (read_seqcount_retry(&dentry->d_seq, seq)) {
cpu_relax();
return D_COMP_SEQRETRY;
}
if (parent->d_op->d_compare(parent, inode,
dentry, i,
tlen, tname, name))
return D_COMP_NOMATCH;
return D_COMP_OK;
}
/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
* @parent: parent dentry
......@@ -1753,15 +1808,17 @@ EXPORT_SYMBOL(d_add_ci);
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
* is formed, giving integrity down the path walk.
*
* NOTE! The caller *has* to check the resulting dentry against the sequence
* number we've returned before using any of the resulting dentry state!
*/
struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
unsigned *seqp, struct inode **inode)
unsigned *seqp, struct inode *inode)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
struct hlist_bl_head *b = d_hash(parent, hash);
struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
struct hlist_bl_node *node;
struct dentry *dentry;
......@@ -1787,49 +1844,45 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
*/
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
unsigned seq;
struct inode *i;
const char *tname;
int tlen;
if (dentry->d_name.hash != hash)
continue;
seqretry:
seq = read_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
if (d_unhashed(dentry))
continue;
tlen = dentry->d_name.len;
tname = dentry->d_name.name;
i = dentry->d_inode;
prefetch(tname);
/*
* This seqcount check is required to ensure name and
* len are loaded atomically, so as not to walk off the
* edge of memory when walking. If we could load this
* atomically some other way, we could drop this check.
* The dentry sequence count protects us from concurrent
* renames, and thus protects inode, parent and name fields.
*
* The caller must perform a seqcount check in order
* to do anything useful with the returned dentry,
* including using the 'd_inode' pointer.
*
* NOTE! We do a "raw" seqcount_begin here. That means that
* we don't wait for the sequence count to stabilize if it
* is in the middle of a sequence change. If we do the slow
* dentry compare, we will do seqretries until it is stable,
* and if we end up with a successful lookup, we actually
* want to exit RCU lookup anyway.
*/
if (read_seqcount_retry(&dentry->d_seq, seq))
goto seqretry;
seq = raw_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
*seqp = seq;
if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
if (parent->d_op->d_compare(parent, *inode,
dentry, i,
tlen, tname, name))
if (dentry->d_name.hash != hashlen_hash(hashlen))
continue;
} else {
if (dentry_cmp(tname, tlen, str, len))
switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
case D_COMP_OK:
return dentry;
case D_COMP_NOMATCH:
continue;
default:
goto seqretry;
}
}
/*
* No extra seqcount check is required after the name
* compare. The caller must perform a seqcount check in
* order to do anything useful with the returned dentry
* anyway.
*/
*seqp = seq;
*inode = i;
return dentry;
if (dentry->d_name.hash_len != hashlen)
continue;
if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
return dentry;
}
return NULL;
}
......@@ -1908,8 +1961,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
rcu_read_lock();
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
const char *tname;
int tlen;
if (dentry->d_name.hash != hash)
continue;
......@@ -1924,15 +1975,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
tlen = dentry->d_name.len;
tname = dentry->d_name.name;
if (parent->d_flags & DCACHE_OP_COMPARE) {
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
if (parent->d_op->d_compare(parent, parent->d_inode,
dentry, dentry->d_inode,
tlen, tname, name))
goto next;
} else {
if (dentry_cmp(tname, tlen, str, len))
if (dentry->d_name.len != len)
goto next;
if (dentry_cmp(dentry, str, len))
goto next;
}
......
......@@ -79,7 +79,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext2_get_parent(struct dentry *child)
{
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
......
......@@ -1045,7 +1045,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext3_get_parent(struct dentry *child)
{
unsigned long ino;
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
struct ext3_dir_entry_2 * de;
struct buffer_head *bh;
......
......@@ -1052,10 +1052,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
static const struct qstr dotdot = {
.name = "..",
.len = 2,
};
static const struct qstr dotdot = QSTR_INIT("..", 2);
struct ext4_dir_entry_2 * de;
struct buffer_head *bh;
......
......@@ -821,7 +821,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
struct buffer_head *bh;
struct gfs2_leaf *leaf;
struct gfs2_dirent *dent;
struct qstr name = { .name = "", .len = 0, .hash = 0 };
struct qstr name = { .name = "" };
error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
if (error)
......
......@@ -68,7 +68,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
int dcache_dir_open(struct inode *inode, struct file *file)
{
static struct qstr cursor_name = {.len = 1, .name = "."};
static struct qstr cursor_name = QSTR_INIT(".", 1);
file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
......@@ -225,7 +225,7 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
struct inode *root;
struct qstr d_name = {.name = name, .len = strlen(name)};
struct qstr d_name = QSTR_INIT(name, strlen(name));
if (IS_ERR(s))
return ERR_CAST(s);
......
......@@ -1144,12 +1144,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
if (!dentry)
goto unlazy;
/* Memory barrier in read_seqcount_begin of child is enough */
/*
* This sequence count validates that the inode matches
* the dentry name information from lookup.
*/
*inode = dentry->d_inode;
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;
/*
* This sequence count validates that the parent had no
* changes while we did the lookup of the dentry above.
*
* The memory barrier in read_seqcount_begin of child is
* enough, we can use __read_seqcount_retry here.
*/
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
nd->seq = seq;
......
......@@ -477,10 +477,7 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
struct qstr filename = {
.len = entry->len,
.name = entry->name,
};
struct qstr filename = QSTR_INIT(entry->name, entry->len);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = parent->d_inode;
......
......@@ -398,8 +398,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
.name.len = name->len,
.name.name = name->name,
.name = *name,
};
struct nfs_removeres res;
struct rpc_message msg = {
......
......@@ -2782,8 +2782,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_removeargs args = {
.fh = NFS_FH(dir),
.name.len = name->len,
.name.name = name->name,
.name = *name,
.bitmask = server->attr_bitmask,
};
struct nfs_removeres res = {
......
......@@ -335,8 +335,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
.name.len = name->len,
.name.name = name->name,
.name = *name,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
......
......@@ -441,7 +441,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child)
{
unsigned long ino;
struct inode *inode;
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
struct nilfs_root *root;
ino = nilfs_inode_by_name(child->d_inode, &dotdot);
......
......@@ -2361,7 +2361,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
* by passing 'ubifs_tnc_remove_nm()' the same key but
* an unmatchable name.
*/
struct qstr noname = { .len = 0, .name = "" };
struct qstr noname = { .name = "" };
err = dbg_check_tnc(c, 0);
mutex_unlock(&c->tnc_mutex);
......
......@@ -298,7 +298,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = { .name = name, .len = strlen(name) };
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err, type;
......@@ -361,7 +361,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = { .name = name, .len = strlen(name) };
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_inode *ui;
struct ubifs_dent_node *xent;
union ubifs_key key;
......@@ -524,7 +524,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
struct qstr nm = { .name = name, .len = strlen(name) };
struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
......
......@@ -1193,7 +1193,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
{
struct kernel_lb_addr tloc;
struct inode *inode = NULL;
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
struct fileIdentDesc cfi;
struct udf_fileident_bh fibh;
......
......@@ -146,10 +146,7 @@ static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid,
static struct dentry *ufs_get_parent(struct dentry *child)
{
struct qstr dot_dot = {
.name = "..",
.len = 2,
};
struct qstr dot_dot = QSTR_INIT("..", 2);
ino_t ino;
ino = ufs_inode_by_name(child->d_inode, &dot_dot);
......
......@@ -25,6 +25,13 @@ struct vfsmount;
#define IS_ROOT(x) ((x) == (x)->d_parent)
/* The hash is always the low bits of hash_len */
#ifdef __LITTLE_ENDIAN
#define HASH_LEN_DECLARE u32 hash; u32 len;
#else
#define HASH_LEN_DECLARE u32 len; u32 hash;
#endif
/*
* "quick string" -- eases parameter passing, but more importantly
* saves "metadata" about the string (ie length and the hash).
......@@ -33,11 +40,19 @@ struct vfsmount;
* dentry.
*/
struct qstr {
unsigned int hash;
unsigned int len;
union {
struct {
HASH_LEN_DECLARE;
};
u64 hash_len;
};
const unsigned char *name;
};
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
#define hashlen_hash(hashlen) ((u32) (hashlen))
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
struct dentry_stat_t {
int nr_dentry;
int nr_unused;
......@@ -282,7 +297,7 @@ extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
unsigned *seq, struct inode **inode);
unsigned *seq, struct inode *inode);
/**
* __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
......
......@@ -127,9 +127,7 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
{
static uint32_t clntid;
char name[15];
struct qstr q = {
.name = name,
};
struct qstr q = { .name = name };
struct dentry *dir, *dentry;
int error;
......
......@@ -1059,12 +1059,9 @@ static const struct rpc_filelist files[] = {
struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name)
{
struct qstr dir = {
.name = dir_name,
.len = strlen(dir_name),
.hash = full_name_hash(dir_name, strlen(dir_name)),
};
struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
dir.hash = full_name_hash(dir.name, dir.len);
return d_lookup(sb->s_root, &dir);
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment