Commit 41b73435 authored by Christian Brauner's avatar Christian Brauner

Merge patch series "fs: add i_state helpers"

Christian Brauner <brauner@kernel.org> says:

I've recently looked for some free space in struct inode again because
of some exec kerfuffle we had and while my idea didn't turn into
anything I noticed that we often waste bytes when using wait bit
operations. So I set out to switch that to another mechanism that would
allow us to free up bytes. So this is an attempt to turn i_state from an
unsigned long into an u32 using the individual bytes of i_state as
addresses for the wait var event mechanism (Thanks to Linus for that idea.).

This survives LTP, xfstests on various filesystems, and will-it-scale.

* patches from https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org:
  inode: make i_state a u32
  inode: port __I_LRU_ISOLATING to var event
  inode: port __I_NEW to var event
  inode: port __I_SYNC to var event
  fs: reorder i_state bits
  fs: add i_state helpers

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.orgSigned-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 88b1afbf 2b111edb
...@@ -1644,14 +1644,16 @@ void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) ...@@ -1644,14 +1644,16 @@ void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
break; break;
} }
} else if (clean_pass && this_pass_clean) { } else if (clean_pass && this_pass_clean) {
wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); struct wait_bit_queue_entry wqe;
DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); struct wait_queue_head *wq_head;
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
prepare_to_wait_event(wq_head, &wqe.wq_entry,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&c->vfs_inodes_lock); mutex_unlock(&c->vfs_inodes_lock);
schedule(); schedule();
finish_wait(wq, &wait.wq_entry); finish_wait(wq_head, &wqe.wq_entry);
goto again; goto again;
} }
} }
......
...@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) ...@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
__d_instantiate(entry, inode); __d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING; inode->i_state &= ~I_NEW & ~I_CREATING;
/*
* Pairs with the barrier in prepare_to_wait_event() to make sure
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_NEW); inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
EXPORT_SYMBOL(d_instantiate_new); EXPORT_SYMBOL(d_instantiate_new);
......
...@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) ...@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode) static void inode_sync_complete(struct inode *inode)
{ {
assert_spin_locked(&inode->i_lock);
inode->i_state &= ~I_SYNC; inode->i_state &= ~I_SYNC;
/* If inode is clean an unused, put it into LRU now... */ /* If inode is clean an unused, put it into LRU now... */
inode_add_lru(inode); inode_add_lru(inode);
/* Waiters must see I_SYNC cleared before being woken up */ /* Called with inode->i_lock which ensures memory ordering. */
smp_mb(); inode_wake_up_bit(inode, __I_SYNC);
wake_up_bit(&inode->i_state, __I_SYNC);
} }
static bool inode_dirtied_after(struct inode *inode, unsigned long t) static bool inode_dirtied_after(struct inode *inode, unsigned long t)
...@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
*/ */
void inode_wait_for_writeback(struct inode *inode) void inode_wait_for_writeback(struct inode *inode)
{ {
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); struct wait_bit_queue_entry wqe;
wait_queue_head_t *wqh; struct wait_queue_head *wq_head;
assert_spin_locked(&inode->i_lock);
if (!(inode->i_state & I_SYNC))
return;
lockdep_assert_held(&inode->i_lock); wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
wqh = bit_waitqueue(&inode->i_state, __I_SYNC); for (;;) {
while (inode->i_state & I_SYNC) { prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
if (!(inode->i_state & I_SYNC))
break;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
__wait_on_bit(wqh, &wq, bit_wait, schedule();
TASK_UNINTERRUPTIBLE);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
} }
finish_wait(wq_head, &wqe.wq_entry);
} }
/* /*
...@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode) ...@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
static void inode_sleep_on_writeback(struct inode *inode) static void inode_sleep_on_writeback(struct inode *inode)
__releases(inode->i_lock) __releases(inode->i_lock)
{ {
DEFINE_WAIT(wait); struct wait_bit_queue_entry wqe;
wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); struct wait_queue_head *wq_head;
int sleep; bool sleep;
assert_spin_locked(&inode->i_lock);
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
sleep = inode->i_state & I_SYNC; prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
sleep = !!(inode->i_state & I_SYNC);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (sleep) if (sleep)
schedule(); schedule();
finish_wait(wqh, &wait); finish_wait(wq_head, &wqe.wq_entry);
} }
/* /*
......
...@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate) ...@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED; inode->i_state |= I_REFERENCED;
} }
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
struct inode *inode, u32 bit)
{
void *bit_address;
bit_address = inode_state_wait_address(inode, bit);
init_wait_var_entry(wqe, bit_address, 0);
return __var_waitqueue(bit_address);
}
EXPORT_SYMBOL(inode_bit_waitqueue);
/* /*
* Add inode to LRU if needed (inode is unused and clean). * Add inode to LRU if needed (inode is unused and clean).
* *
...@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode) ...@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
inode->i_state &= ~I_LRU_ISOLATING; inode->i_state &= ~I_LRU_ISOLATING;
smp_mb(); /* Called with inode->i_lock which ensures memory ordering. */
wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); inode_wake_up_bit(inode, __I_LRU_ISOLATING);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
static void inode_wait_for_lru_isolating(struct inode *inode) static void inode_wait_for_lru_isolating(struct inode *inode)
{ {
struct wait_bit_queue_entry wqe;
struct wait_queue_head *wq_head;
lockdep_assert_held(&inode->i_lock); lockdep_assert_held(&inode->i_lock);
if (inode->i_state & I_LRU_ISOLATING) { if (!(inode->i_state & I_LRU_ISOLATING))
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); return;
wait_queue_head_t *wqh;
wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
for (;;) {
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
/*
* Checking I_LRU_ISOLATING with inode->i_lock guarantees
* memory ordering.
*/
if (!(inode->i_state & I_LRU_ISOLATING))
break;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); schedule();
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_LRU_ISOLATING);
} }
finish_wait(wq_head, &wqe.wq_entry);
WARN_ON(inode->i_state & I_LRU_ISOLATING);
} }
/** /**
...@@ -723,7 +745,13 @@ static void evict(struct inode *inode) ...@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
* used as an indicator whether blocking on it is safe. * used as an indicator whether blocking on it is safe.
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
wake_up_bit(&inode->i_state, __I_NEW); /*
* Pairs with the barrier in prepare_to_wait_event() to make sure
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
smp_mb();
inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode) ...@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING; inode->i_state &= ~I_NEW & ~I_CREATING;
/*
* Pairs with the barrier in prepare_to_wait_event() to make sure
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_NEW); inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
EXPORT_SYMBOL(unlock_new_inode); EXPORT_SYMBOL(unlock_new_inode);
...@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode) ...@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW; inode->i_state &= ~I_NEW;
/*
* Pairs with the barrier in prepare_to_wait_event() to make sure
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_NEW); inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
iput(inode); iput(inode);
} }
...@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync); ...@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
*/ */
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked) static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{ {
wait_queue_head_t *wq; struct wait_bit_queue_entry wqe;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); struct wait_queue_head *wq_head;
/* /*
* Handle racing against evict(), see that routine for more details. * Handle racing against evict(), see that routine for more details.
...@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock ...@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
return; return;
} }
wq = bit_waitqueue(&inode->i_state, __I_NEW); wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
rcu_read_unlock(); rcu_read_unlock();
if (is_inode_hash_locked) if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
schedule(); schedule();
finish_wait(wq, &wait.wq_entry); finish_wait(wq_head, &wqe.wq_entry);
if (is_inode_hash_locked) if (is_inode_hash_locked)
spin_lock(&inode_hash_lock); spin_lock(&inode_hash_lock);
rcu_read_lock(); rcu_read_lock();
......
...@@ -681,7 +681,8 @@ struct inode { ...@@ -681,7 +681,8 @@ struct inode {
#endif #endif
/* Misc */ /* Misc */
unsigned long i_state; u32 i_state;
/* 32-bit hole */
struct rw_semaphore i_rwsem; struct rw_semaphore i_rwsem;
unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_when; /* jiffies of first dirtying */
...@@ -744,6 +745,21 @@ struct inode { ...@@ -744,6 +745,21 @@ struct inode {
void *i_private; /* fs or device private pointer */ void *i_private; /* fs or device private pointer */
} __randomize_layout; } __randomize_layout;
/*
* Get bit address from inode->i_state to use with wait_var_event()
* infrastructre.
*/
#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
struct inode *inode, u32 bit);
static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
{
/* Caller is responsible for correct memory barriers. */
wake_up_var(inode_state_wait_address(inode, bit));
}
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
static inline unsigned int i_blocksize(const struct inode *node) static inline unsigned int i_blocksize(const struct inode *node)
...@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, ...@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
* i_count. * i_count.
* *
* Q: What is the difference between I_WILL_FREE and I_FREEING? * Q: What is the difference between I_WILL_FREE and I_FREEING?
*
* __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
* upon. There's one free address left.
*/ */
#define I_DIRTY_SYNC (1 << 0) #define __I_NEW 0
#define I_DIRTY_DATASYNC (1 << 1)
#define I_DIRTY_PAGES (1 << 2)
#define __I_NEW 3
#define I_NEW (1 << __I_NEW) #define I_NEW (1 << __I_NEW)
#define I_WILL_FREE (1 << 4) #define __I_SYNC 1
#define I_FREEING (1 << 5)
#define I_CLEAR (1 << 6)
#define __I_SYNC 7
#define I_SYNC (1 << __I_SYNC) #define I_SYNC (1 << __I_SYNC)
#define I_REFERENCED (1 << 8) #define __I_LRU_ISOLATING 2
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
#define I_DIRTY_SYNC (1 << 3)
#define I_DIRTY_DATASYNC (1 << 4)
#define I_DIRTY_PAGES (1 << 5)
#define I_WILL_FREE (1 << 6)
#define I_FREEING (1 << 7)
#define I_CLEAR (1 << 8)
#define I_REFERENCED (1 << 9)
#define I_LINKABLE (1 << 10) #define I_LINKABLE (1 << 10)
#define I_DIRTY_TIME (1 << 11) #define I_DIRTY_TIME (1 << 11)
#define I_WB_SWITCH (1 << 13) #define I_WB_SWITCH (1 << 12)
#define I_OVL_INUSE (1 << 14) #define I_OVL_INUSE (1 << 13)
#define I_CREATING (1 << 15) #define I_CREATING (1 << 14)
#define I_DONTCACHE (1 << 16) #define I_DONTCACHE (1 << 15)
#define I_SYNC_QUEUED (1 << 17) #define I_SYNC_QUEUED (1 << 16)
#define I_PINNING_NETFS_WB (1 << 18) #define I_PINNING_NETFS_WB (1 << 17)
#define __I_LRU_ISOLATING 19
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
......
...@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode); ...@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode);
/* writeback.h requires fs.h; it, too, is not included from here. */ /* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode) static inline void wait_on_inode(struct inode *inode)
{ {
wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); wait_var_event(inode_state_wait_address(inode, __I_NEW),
!(READ_ONCE(inode->i_state) & I_NEW));
} }
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment