Commit 46417064 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Theodore Ts'o

jbd2: Make state lock a spinlock

Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep
on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held
region because bit spinlocks disable preemption even on RT.

A first attempt was to replace state lock with a spinlock placed in struct
buffer_head and make the locking conditional on PREEMPT_RT and
DEBUG_BIT_SPINLOCKS.

Jan pointed out that there is a 4 byte hole in struct journal_head where a
regular spinlock fits in and he would not object to convert the state lock
to a spinlock unconditionally.

Aside of solving the RT problem, this also gains lockdep coverage for the
journal head state lock (bit-spinlocks are not covered by lockdep as it's
hard to fit a lockdep map into a single bit).

The trivial change would have been to convert the jbd_*lock_bh_state()
inlines, but that comes with the downside that these functions take a
buffer head pointer which needs to be converted to a journal head pointer
which adds another level of indirection.

As almost all functions which use this lock have a journal head pointer
readily available, it makes more sense to remove the lock helper inlines
and write out spin_*lock() at all call sites.

Fixup all locking comments as well.
Suggested-by: default avatarJan Kara <jack@suse.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Jan Kara <jack@suse.com>
Cc: linux-ext4@vger.kernel.org
Link: https://lore.kernel.org/r/20190809124233.13277-7-jack@suse.czSigned-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2e710ff0
......@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_committed_data) {
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
spin_lock(&jh->b_state_lock);
jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
spin_unlock(&jh->b_state_lock);
}
jbd2_journal_refile_buffer(journal, jh);
}
......@@ -928,7 +928,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* done with it.
*/
get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
/*
......@@ -1024,7 +1024,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
}
JBUFFER_TRACE(jh, "refile or unfile buffer");
drop_ref = __jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
spin_unlock(&jh->b_state_lock);
if (drop_ref)
jbd2_journal_put_journal_head(jh);
if (try_to_free)
......
......@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
/* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1);
jbd_lock_bh_state(bh_in);
spin_lock(&jh_in->b_state_lock);
repeat:
/*
* If a new transaction has already done a buffer copy-out, then
......@@ -405,13 +405,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
if (need_copy_out && !done_copy_out) {
char *tmp;
jbd_unlock_bh_state(bh_in);
spin_unlock(&jh_in->b_state_lock);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
if (!tmp) {
brelse(new_bh);
return -ENOMEM;
}
jbd_lock_bh_state(bh_in);
spin_lock(&jh_in->b_state_lock);
if (jh_in->b_frozen_data) {
jbd2_free(tmp, bh_in->b_size);
goto repeat;
......@@ -464,7 +464,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
spin_unlock(&journal->j_list_lock);
set_buffer_shadow(bh_in);
jbd_unlock_bh_state(bh_in);
spin_unlock(&jh_in->b_state_lock);
return do_escape | (done_copy_out << 1);
}
......@@ -2410,6 +2410,8 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
if (ret)
spin_lock_init(&ret->b_state_lock);
return ret;
}
......
This diff is collapsed.
......@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr)
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
......@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
if (!buffer_jbd(bg_bh))
return 1;
jbd_lock_bh_state(bg_bh);
bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
jh = bh2jh(bg_bh);
spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
jbd_unlock_bh_state(bg_bh);
spin_unlock(&jh->b_state_lock);
return ret;
}
......@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
int status;
unsigned int tmp;
struct ocfs2_group_desc *undo_bg = NULL;
struct journal_head *jh;
/* The caller got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
......@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
goto bail;
}
jh = bh2jh(group_bh);
if (undo_fn) {
jbd_lock_bh_state(group_bh);
undo_bg = (struct ocfs2_group_desc *)
bh2jh(group_bh)->b_committed_data;
spin_lock(&jh->b_state_lock);
undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
BUG_ON(!undo_bg);
}
......@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
if (undo_fn)
jbd_unlock_bh_state(group_bh);
spin_unlock(&jh->b_state_lock);
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
(unsigned long long)le64_to_cpu(bg->bg_blkno),
le16_to_cpu(bg->bg_bits),
......@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
}
if (undo_fn)
jbd_unlock_bh_state(group_bh);
spin_unlock(&jh->b_state_lock);
ocfs2_journal_dirty(handle, group_bh);
bail:
......
......@@ -313,7 +313,6 @@ enum jbd_state_bits {
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Shadow, /* IO on shadow buffer is running */
BH_Verified, /* Metadata block has been verified ok */
......@@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
return bh->b_private;
}
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
bit_spin_lock(BH_State, &bh->b_state);
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
return bit_spin_is_locked(BH_State, &bh->b_state);
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
bit_spin_unlock(BH_State, &bh->b_state);
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_lock(BH_JournalHead, &bh->b_state);
......@@ -551,9 +535,9 @@ struct transaction_chp_stats_s {
* ->jbd_lock_bh_journal_head() (This is "innermost")
*
* j_state_lock
* ->jbd_lock_bh_state()
* ->b_state_lock
*
* jbd_lock_bh_state()
* b_state_lock
* ->j_list_lock
*
* j_state_lock
......
......@@ -11,6 +11,8 @@
#ifndef JOURNAL_HEAD_H_INCLUDED
#define JOURNAL_HEAD_H_INCLUDED
#include <linux/spinlock.h>
typedef unsigned int tid_t; /* Unique transaction ID */
typedef struct transaction_s transaction_t; /* Compound transaction type */
......@@ -23,6 +25,11 @@ struct journal_head {
*/
struct buffer_head *b_bh;
/*
* Protect the buffer head state
*/
spinlock_t b_state_lock;
/*
* Reference count - see description in journal.c
* [jbd_lock_bh_journal_head()]
......@@ -30,7 +37,7 @@ struct journal_head {
int b_jcount;
/*
* Journalling list for this buffer [jbd_lock_bh_state()]
* Journalling list for this buffer [b_state_lock]
* NOTE: We *cannot* combine this with b_modified into a bitfield
* as gcc would then (which the C standard allows but which is
* very unuseful) make 64-bit accesses to the bitfield and clobber
......@@ -41,20 +48,20 @@ struct journal_head {
/*
* This flag signals the buffer has been modified by
* the currently running transaction
* [jbd_lock_bh_state()]
* [b_state_lock]
*/
unsigned b_modified;
/*
* Copy of the buffer data frozen for writing to the log.
* [jbd_lock_bh_state()]
* [b_state_lock]
*/
char *b_frozen_data;
/*
* Pointer to a saved copy of the buffer containing no uncommitted
* deallocation references, so that allocations can avoid overwriting
* uncommitted deletes. [jbd_lock_bh_state()]
* uncommitted deletes. [b_state_lock]
*/
char *b_committed_data;
......@@ -63,7 +70,7 @@ struct journal_head {
* metadata: either the running transaction or the committing
* transaction (if there is one). Only applies to buffers on a
* transaction's data or metadata journaling list.
* [j_list_lock] [jbd_lock_bh_state()]
* [j_list_lock] [b_state_lock]
* Either of these locks is enough for reading, both are needed for
* changes.
*/
......@@ -73,13 +80,13 @@ struct journal_head {
* Pointer to the running compound transaction which is currently
* modifying the buffer's metadata, if there was already a transaction
* committing it when the new transaction touched it.
* [t_list_lock] [jbd_lock_bh_state()]
* [t_list_lock] [b_state_lock]
*/
transaction_t *b_next_transaction;
/*
* Doubly-linked list of buffers on a transaction's data, metadata or
* forget queue. [t_list_lock] [jbd_lock_bh_state()]
* forget queue. [t_list_lock] [b_state_lock]
*/
struct journal_head *b_tnext, *b_tprev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment