Commit 3f19b2ab authored by David Howells's avatar David Howells

vfs, afs, ext4: Make the inode hash table RCU searchable

Make the inode hash table RCU searchable so that searches that want to
access or modify an inode without taking a ref on that inode can do so
without taking the inode hash table lock.

The main thing this requires is some RCU annotation on the list
manipulation operations.  Inodes are already freed by RCU in most cases.

Users of this interface must take care as the inode may be still under
construction or may be being torn down around them.

There are at least three instances where this can be of use:

 (1) Testing whether the inode number iunique() is going to return is
     currently unique (the iunique_lock is still held).

 (2) Ext4 date stamp updating.

 (3) AFS callback breaking.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Acked-by: default avatarKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
cc: linux-ext4@vger.kernel.org
cc: linux-afs@lists.infradead.org
parent 9cb1fd0e
...@@ -252,6 +252,7 @@ static void afs_break_one_callback(struct afs_server *server, ...@@ -252,6 +252,7 @@ static void afs_break_one_callback(struct afs_server *server,
struct afs_vnode *vnode; struct afs_vnode *vnode;
struct inode *inode; struct inode *inode;
rcu_read_lock();
read_lock(&server->cb_break_lock); read_lock(&server->cb_break_lock);
hlist_for_each_entry(vi, &server->cb_volumes, srv_link) { hlist_for_each_entry(vi, &server->cb_volumes, srv_link) {
if (vi->vid < fid->vid) if (vi->vid < fid->vid)
...@@ -287,12 +288,16 @@ static void afs_break_one_callback(struct afs_server *server, ...@@ -287,12 +288,16 @@ static void afs_break_one_callback(struct afs_server *server,
} else { } else {
data.volume = NULL; data.volume = NULL;
data.fid = *fid; data.fid = *fid;
inode = ilookup5_nowait(cbi->sb, fid->vnode,
/* See if we can find a matching inode - even an I_NEW
* inode needs to be marked as it can have its callback
* broken before we finish setting up the local inode.
*/
inode = find_inode_rcu(cbi->sb, fid->vnode,
afs_iget5_test, &data); afs_iget5_test, &data);
if (inode) { if (inode) {
vnode = AFS_FS_I(inode); vnode = AFS_FS_I(inode);
afs_break_callback(vnode, afs_cb_break_for_callback); afs_break_callback(vnode, afs_cb_break_for_callback);
iput(inode);
} else { } else {
trace_afs_cb_miss(fid, afs_cb_break_for_callback); trace_afs_cb_miss(fid, afs_cb_break_for_callback);
} }
...@@ -301,6 +306,7 @@ static void afs_break_one_callback(struct afs_server *server, ...@@ -301,6 +306,7 @@ static void afs_break_one_callback(struct afs_server *server,
out: out:
read_unlock(&server->cb_break_lock); read_unlock(&server->cb_break_lock);
rcu_read_unlock();
} }
/* /*
......
...@@ -4860,21 +4860,22 @@ static int ext4_inode_blocks_set(handle_t *handle, ...@@ -4860,21 +4860,22 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0; return 0;
} }
struct other_inode { static void __ext4_update_other_inode_time(struct super_block *sb,
unsigned long orig_ino; unsigned long orig_ino,
struct ext4_inode *raw_inode; unsigned long ino,
}; struct ext4_inode *raw_inode)
static int other_inode_match(struct inode * inode, unsigned long ino,
void *data)
{ {
struct other_inode *oi = (struct other_inode *) data; struct inode *inode;
if ((inode->i_ino != ino) || inode = find_inode_by_ino_rcu(sb, ino);
(inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | if (!inode)
return;
if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_INODE)) || I_DIRTY_INODE)) ||
((inode->i_state & I_DIRTY_TIME) == 0)) ((inode->i_state & I_DIRTY_TIME) == 0))
return 0; return;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_INODE)) == 0) && I_DIRTY_INODE)) == 0) &&
...@@ -4885,16 +4886,15 @@ static int other_inode_match(struct inode * inode, unsigned long ino, ...@@ -4885,16 +4886,15 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock); spin_lock(&ei->i_raw_lock);
EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
ext4_inode_csum_set(inode, oi->raw_inode, ei); ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock); spin_unlock(&ei->i_raw_lock);
trace_ext4_other_inode_update_time(inode, oi->orig_ino); trace_ext4_other_inode_update_time(inode, orig_ino);
return -1; return;
} }
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return -1;
} }
/* /*
...@@ -4904,24 +4904,24 @@ static int other_inode_match(struct inode * inode, unsigned long ino, ...@@ -4904,24 +4904,24 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
static void ext4_update_other_inodes_time(struct super_block *sb, static void ext4_update_other_inodes_time(struct super_block *sb,
unsigned long orig_ino, char *buf) unsigned long orig_ino, char *buf)
{ {
struct other_inode oi;
unsigned long ino; unsigned long ino;
int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int inode_size = EXT4_INODE_SIZE(sb); int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
/* /*
* Calculate the first inode in the inode table block. Inode * Calculate the first inode in the inode table block. Inode
* numbers are one-based. That is, the first inode in a block * numbers are one-based. That is, the first inode in a block
* (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
*/ */
ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
rcu_read_lock();
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino) if (ino == orig_ino)
continue; continue;
oi.raw_inode = (struct ext4_inode *) buf; __ext4_update_other_inode_time(sb, orig_ino, ino,
(void) find_inode_nowait(sb, ino, other_inode_match, &oi); (struct ext4_inode *)buf);
} }
rcu_read_unlock();
} }
/* /*
......
...@@ -497,7 +497,7 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) ...@@ -497,7 +497,7 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
spin_lock(&inode_hash_lock); spin_lock(&inode_hash_lock);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
hlist_add_head(&inode->i_hash, b); hlist_add_head_rcu(&inode->i_hash, b);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
} }
...@@ -513,7 +513,7 @@ void __remove_inode_hash(struct inode *inode) ...@@ -513,7 +513,7 @@ void __remove_inode_hash(struct inode *inode)
{ {
spin_lock(&inode_hash_lock); spin_lock(&inode_hash_lock);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
hlist_del_init(&inode->i_hash); hlist_del_init_rcu(&inode->i_hash);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
} }
...@@ -1107,7 +1107,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, ...@@ -1107,7 +1107,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode->i_state |= I_NEW; inode->i_state |= I_NEW;
hlist_add_head(&inode->i_hash, head); hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (!creating) if (!creating)
inode_sb_list_add(inode); inode_sb_list_add(inode);
...@@ -1201,7 +1201,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) ...@@ -1201,7 +1201,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
inode->i_ino = ino; inode->i_ino = ino;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode->i_state = I_NEW; inode->i_state = I_NEW;
hlist_add_head(&inode->i_hash, head); hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
inode_sb_list_add(inode); inode_sb_list_add(inode);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
...@@ -1244,15 +1244,10 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino) ...@@ -1244,15 +1244,10 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
struct hlist_head *b = inode_hashtable + hash(sb, ino); struct hlist_head *b = inode_hashtable + hash(sb, ino);
struct inode *inode; struct inode *inode;
spin_lock(&inode_hash_lock); hlist_for_each_entry_rcu(inode, b, i_hash) {
hlist_for_each_entry(inode, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb)
if (inode->i_ino == ino && inode->i_sb == sb) {
spin_unlock(&inode_hash_lock);
return 0; return 0;
} }
}
spin_unlock(&inode_hash_lock);
return 1; return 1;
} }
...@@ -1281,6 +1276,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) ...@@ -1281,6 +1276,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
static unsigned int counter; static unsigned int counter;
ino_t res; ino_t res;
rcu_read_lock();
spin_lock(&iunique_lock); spin_lock(&iunique_lock);
do { do {
if (counter <= max_reserved) if (counter <= max_reserved)
...@@ -1288,6 +1284,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) ...@@ -1288,6 +1284,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
res = counter++; res = counter++;
} while (!test_inode_iunique(sb, res)); } while (!test_inode_iunique(sb, res));
spin_unlock(&iunique_lock); spin_unlock(&iunique_lock);
rcu_read_unlock();
return res; return res;
} }
...@@ -1456,6 +1453,84 @@ struct inode *find_inode_nowait(struct super_block *sb, ...@@ -1456,6 +1453,84 @@ struct inode *find_inode_nowait(struct super_block *sb,
} }
EXPORT_SYMBOL(find_inode_nowait); EXPORT_SYMBOL(find_inode_nowait);
/**
* find_inode_rcu - find an inode in the inode cache
* @sb: Super block of file system to search
* @hashval: Key to hash
* @test: Function to test match on an inode
* @data: Data for test function
*
* Search for the inode specified by @hashval and @data in the inode cache,
* where the helper function @test will return 0 if the inode does not match
* and 1 if it does. The @test function must be responsible for taking the
* i_lock spin_lock and checking i_state for an inode being freed or being
* initialized.
*
* If successful, this will return the inode for which the @test function
* returned 1 and NULL otherwise.
*
* The @test function is not permitted to take a ref on any inode presented.
* It is also not permitted to sleep.
*
* The caller must hold the RCU read lock.
*/
struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode;
RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
"suspicious find_inode_rcu() usage");
hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_sb == sb &&
!(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
test(inode, data))
return inode;
}
return NULL;
}
EXPORT_SYMBOL(find_inode_rcu);
/**
* find_inode_by_rcu - Find an inode in the inode cache
* @sb: Super block of file system to search
* @ino: The inode number to match
*
* Search for the inode specified by @hashval and @data in the inode cache,
* where the helper function @test will return 0 if the inode does not match
* and 1 if it does. The @test function must be responsible for taking the
* i_lock spin_lock and checking i_state for an inode being freed or being
* initialized.
*
* If successful, this will return the inode for which the @test function
* returned 1 and NULL otherwise.
*
* The @test function is not permitted to take a ref on any inode presented.
* It is also not permitted to sleep.
*
* The caller must hold the RCU read lock.
*/
struct inode *find_inode_by_ino_rcu(struct super_block *sb,
unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
"suspicious find_inode_by_ino_rcu() usage");
hlist_for_each_entry_rcu(inode, head, i_hash) {
if (inode->i_ino == ino &&
inode->i_sb == sb &&
!(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
return inode;
}
return NULL;
}
EXPORT_SYMBOL(find_inode_by_ino_rcu);
int insert_inode_locked(struct inode *inode) int insert_inode_locked(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
...@@ -1480,7 +1555,7 @@ int insert_inode_locked(struct inode *inode) ...@@ -1480,7 +1555,7 @@ int insert_inode_locked(struct inode *inode)
if (likely(!old)) { if (likely(!old)) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode->i_state |= I_NEW | I_CREATING; inode->i_state |= I_NEW | I_CREATING;
hlist_add_head(&inode->i_hash, head); hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
return 0; return 0;
...@@ -1540,6 +1615,7 @@ static void iput_final(struct inode *inode) ...@@ -1540,6 +1615,7 @@ static void iput_final(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op; const struct super_operations *op = inode->i_sb->s_op;
unsigned long state;
int drop; int drop;
WARN_ON(inode->i_state & I_NEW); WARN_ON(inode->i_state & I_NEW);
...@@ -1555,16 +1631,20 @@ static void iput_final(struct inode *inode) ...@@ -1555,16 +1631,20 @@ static void iput_final(struct inode *inode)
return; return;
} }
state = inode->i_state;
if (!drop) { if (!drop) {
inode->i_state |= I_WILL_FREE; WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
write_inode_now(inode, 1); write_inode_now(inode, 1);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW); state = inode->i_state;
inode->i_state &= ~I_WILL_FREE; WARN_ON(state & I_NEW);
state &= ~I_WILL_FREE;
} }
inode->i_state |= I_FREEING; WRITE_ONCE(inode->i_state, state | I_FREEING);
if (!list_empty(&inode->i_lru)) if (!list_empty(&inode->i_lru))
inode_lru_list_del(inode); inode_lru_list_del(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
......
...@@ -3070,6 +3070,9 @@ extern struct inode *find_inode_nowait(struct super_block *, ...@@ -3070,6 +3070,9 @@ extern struct inode *find_inode_nowait(struct super_block *,
int (*match)(struct inode *, int (*match)(struct inode *,
unsigned long, void *), unsigned long, void *),
void *data); void *data);
extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
int (*)(struct inode *, void *), void *);
extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *); extern int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment