Commit 0a63cac6 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: remove journal_datalist_lock

This was a system-wide spinlock.

Simple transformation: make it a filesystem-wide spinlock, in the JBD
journal.

That's a bit lame, and later it might be nice to make it per-transaction_t.
But there are interesting ranking and ordering problems with that, especially
around __journal_refile_buffer().
parent 1fe87216
......@@ -23,12 +23,10 @@
#include <linux/errno.h>
#include <linux/slab.h>
extern spinlock_t journal_datalist_lock;
/*
* Unlink a buffer from a transaction.
*
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
*/
static inline void __buffer_unlink(struct journal_head *jh)
......@@ -49,7 +47,7 @@ static inline void __buffer_unlink(struct journal_head *jh)
/*
* Try to release a checkpointed buffer from its transaction.
* Returns 1 if we released it.
* Requires journal_datalist_lock
* Requires j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __try_to_free_cp_buf(struct journal_head *jh)
......@@ -97,14 +95,14 @@ void log_wait_for_space(journal_t *journal, int nblocks)
}
/*
* We were unable to perform jbd_trylock_bh_state() inside
* journal_datalist_lock. The caller must restart a list walk. Wait for
* someone else to run jbd_unlock_bh_state().
* We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
* The caller must restart a list walk. Wait for someone else to run
* jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
{
get_bh(bh);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
jbd_lock_bh_state(bh);
jbd_unlock_bh_state(bh);
put_bh(bh);
......@@ -121,7 +119,7 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
* the last checkpoint buffer is cleansed)
*
* Called with the journal locked.
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
*/
static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
{
......@@ -129,7 +127,7 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
struct buffer_head *bh;
int ret = 0;
assert_spin_locked(&journal_datalist_lock);
assert_spin_locked(&journal->j_list_lock);
jh = transaction->t_checkpoint_list;
if (!jh)
return 0;
......@@ -141,7 +139,7 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
bh = jh2bh(jh);
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
unlock_journal(journal);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
......@@ -162,7 +160,7 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
transaction_t *transaction = jh->b_transaction;
tid_t tid = transaction->t_tid;
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
log_start_commit(journal, transaction);
unlock_journal(journal);
......@@ -196,20 +194,20 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
return ret;
out_return_1:
lock_journal(journal);
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
return 1;
}
#define NR_BATCH 64
static void __flush_batch(struct buffer_head **bhs, int *batch_count)
static void
__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
ll_rw_block(WRITE, *batch_count, bhs);
blk_run_queues();
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
clear_bit(BH_JWrite, &bh->b_state);
......@@ -225,7 +223,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count)
* Return 1 if something happened which requires us to abort the current
* scan of the checkpoint list.
*
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __flush_buffer(journal_t *journal, struct journal_head *jh,
......@@ -253,7 +251,7 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
jbd_unlock_bh_state(bh);
(*batch_count)++;
if (*batch_count == NR_BATCH) {
__flush_batch(bhs, batch_count);
__flush_batch(journal, bhs, batch_count);
ret = 1;
}
} else {
......@@ -287,7 +285,7 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
*/
/* @@@ `nblocks' is unused. Should it be used? */
int log_do_checkpoint (journal_t *journal, int nblocks)
int log_do_checkpoint(journal_t *journal, int nblocks)
{
transaction_t *transaction, *last_transaction, *next_transaction;
int result;
......@@ -314,7 +312,7 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
* AKPM: check this code. I had a feeling a while back that it
* degenerates into a busy loop at unmount time.
*/
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
repeat:
transaction = journal->j_checkpoint_transactions;
if (transaction == NULL)
......@@ -340,14 +338,14 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
break;
}
retry = __flush_buffer(journal, jh, bhs, &batch_count,
&drop_count);
} while (jh != last_jh && !retry);
if (batch_count) {
__flush_batch(bhs, &batch_count);
__flush_batch(journal, bhs, &batch_count);
goto repeat;
}
if (retry)
......@@ -363,7 +361,7 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
} while (transaction != last_transaction);
done:
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
result = cleanup_journal_tail(journal);
if (result < 0)
return result;
......@@ -402,7 +400,7 @@ int cleanup_journal_tail(journal_t *journal)
* start. */
/* j_checkpoint_transactions needs locking */
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions;
if (transaction) {
first_tid = transaction->t_tid;
......@@ -417,7 +415,7 @@ int cleanup_journal_tail(journal_t *journal)
first_tid = journal->j_transaction_sequence;
blocknr = journal->j_head;
}
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
J_ASSERT (blocknr != 0);
/* If the oldest pinned transaction is at the tail of the log
......@@ -454,7 +452,7 @@ int cleanup_journal_tail(journal_t *journal)
* Find all the written-back checkpoint buffers in the journal and release them.
*
* Called with the journal locked.
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
* Returns number of bufers reaped (for debug)
*/
......@@ -506,7 +504,7 @@ int __journal_clean_checkpoint_list(journal_t *journal)
* checkpoint list.
*
* This function is called with the journal locked.
* This function is called with journal_datalist_lock held.
* This function is called with j_list_lock held.
*/
void __journal_remove_checkpoint(struct journal_head *jh)
......@@ -551,20 +549,13 @@ void __journal_remove_checkpoint(struct journal_head *jh)
JBUFFER_TRACE(jh, "exit");
}
void journal_remove_checkpoint(struct journal_head *jh)
{
spin_lock(&journal_datalist_lock);
__journal_remove_checkpoint(jh);
spin_unlock(&journal_datalist_lock);
}
/*
* journal_insert_checkpoint: put a committed buffer onto a checkpoint
* list so that we know when it is safe to clean the transaction out of
* the log.
*
* Called with the journal locked.
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
*/
void __journal_insert_checkpoint(struct journal_head *jh,
transaction_t *transaction)
......@@ -573,7 +564,6 @@ void __journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
assert_spin_locked(&journal_datalist_lock);
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
......@@ -587,14 +577,6 @@ void __journal_insert_checkpoint(struct journal_head *jh,
transaction->t_checkpoint_list = jh;
}
void journal_insert_checkpoint(struct journal_head *jh,
transaction_t *transaction)
{
spin_lock(&journal_datalist_lock);
__journal_insert_checkpoint(jh, transaction);
spin_unlock(&journal_datalist_lock);
}
/*
* We've finished with this transaction structure: adios...
*
......@@ -602,12 +584,12 @@ void journal_insert_checkpoint(struct journal_head *jh,
* point.
*
* Called with the journal locked.
* Called with journal_datalist_lock held.
* Called with j_list_lock held.
*/
void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
{
assert_spin_locked(&journal_datalist_lock);
assert_spin_locked(&journal->j_list_lock);
if (transaction->t_cpnext) {
transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
......
......@@ -20,8 +20,6 @@
#include <linux/slab.h>
#include <linux/smp_lock.h>
extern spinlock_t journal_datalist_lock;
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
*/
......@@ -67,9 +65,9 @@ void journal_commit_transaction(journal_t *journal)
lock_journal(journal); /* Protect journal->j_running_transaction */
#ifdef COMMIT_STATS
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
summarise_journal_usage(journal);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
#endif
lock_kernel();
......@@ -121,7 +119,7 @@ void journal_commit_transaction(journal_t *journal)
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
JBUFFER_TRACE(jh, "reserved, unused: refile");
journal_refile_buffer(jh);
journal_refile_buffer(journal, jh);
}
/*
......@@ -129,9 +127,9 @@ void journal_commit_transaction(journal_t *journal)
* checkpoint lists. We do this *before* commit because it potentially
* frees some memory
*/
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
__journal_clean_checkpoint_list(journal);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
/* First part of the commit: force the revoke list out to disk.
* The revoke code generates its own metadata blocks on disk for this.
......@@ -185,10 +183,10 @@ void journal_commit_transaction(journal_t *journal)
* Cleanup any flushed data buffers from the data list. Even in
* abort mode, we want to flush this out as soon as possible.
*
* We take journal_datalist_lock to protect the lists from
* We take j_list_lock to protect the lists from
* journal_try_to_free_buffers().
*/
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
write_out_data_locked:
bufs = 0;
......@@ -214,9 +212,9 @@ void journal_commit_transaction(journal_t *journal)
* We have a lock ranking problem..
*/
if (!jbd_trylock_bh_state(bh)) {
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
schedule();
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
break;
}
__journal_unfile_buffer(jh);
......@@ -238,14 +236,14 @@ void journal_commit_transaction(journal_t *journal)
if (bufs || need_resched()) {
jbd_debug(2, "submit %d writes\n", bufs);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
unlock_journal(journal);
if (bufs)
ll_rw_block(WRITE, bufs, wbuf);
cond_resched();
journal_brelse_array(wbuf, bufs);
lock_journal(journal);
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
if (bufs)
goto write_out_data_locked;
}
......@@ -263,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
bh = jh2bh(jh);
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
unlock_journal(journal);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
......@@ -279,7 +277,7 @@ void journal_commit_transaction(journal_t *journal)
goto write_out_data_locked;
sync_datalist_empty:
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
/*
* If we found any dirty or locked buffers, then we should have
......@@ -311,7 +309,7 @@ void journal_commit_transaction(journal_t *journal)
if (is_journal_aborted(journal)) {
JBUFFER_TRACE(jh, "journal is aborting: refile");
journal_refile_buffer(jh);
journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
* any descriptor buffers which may have been
* already allocated, even if we are now
......@@ -355,7 +353,7 @@ void journal_commit_transaction(journal_t *journal)
completion later */
BUFFER_TRACE(bh, "ph3: file as descriptor");
journal_file_buffer(descriptor, commit_transaction,
BJ_LogCtl);
BJ_LogCtl);
}
/* Where is the buffer to be written? */
......@@ -462,7 +460,7 @@ void journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD: commit phase 4\n");
/*
* akpm: these are BJ_IO, and journal_datalist_lock is not needed.
* akpm: these are BJ_IO, and j_list_lock is not needed.
* See __journal_try_to_free_buffer.
*/
wait_for_iobuf:
......@@ -483,7 +481,7 @@ void journal_commit_transaction(journal_t *journal)
clear_buffer_jwrite(bh);
JBUFFER_TRACE(jh, "ph4: unfile after journal write");
journal_unfile_buffer(jh);
journal_unfile_buffer(journal, jh);
/*
* akpm: don't put back a buffer_head with stale pointers
......@@ -543,8 +541,8 @@ void journal_commit_transaction(journal_t *journal)
}
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_bit(BH_JWrite, &bh->b_state);
journal_unfile_buffer(jh);
clear_buffer_jwrite(bh);
journal_unfile_buffer(journal, jh);
jh->b_transaction = NULL;
journal_put_journal_head(jh);
__brelse(bh); /* One for getblk */
......@@ -664,7 +662,7 @@ void journal_commit_transaction(journal_t *journal)
jh->b_frozen_data = NULL;
}
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
cp_transaction = jh->b_cp_transaction;
if (cp_transaction) {
JBUFFER_TRACE(jh, "remove from old cp transaction");
......@@ -706,7 +704,7 @@ void journal_commit_transaction(journal_t *journal)
journal_remove_journal_head(bh);
__brelse(bh);
}
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
}
/* Done with this transaction! */
......@@ -720,7 +718,7 @@ void journal_commit_transaction(journal_t *journal)
journal->j_commit_sequence = commit_transaction->t_tid;
journal->j_committing_transaction = NULL;
spin_lock(&journal_datalist_lock);
spin_lock(&journal->j_list_lock);
if (commit_transaction->t_checkpoint_list == NULL) {
__journal_drop_transaction(journal, commit_transaction);
} else {
......@@ -739,7 +737,7 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction;
}
}
spin_unlock(&journal_datalist_lock);
spin_unlock(&journal->j_list_lock);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
......
......@@ -83,39 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
/*
* journal_datalist_lock is used to protect data buffers:
*
* bh->b_transaction
* bh->b_tprev
* bh->b_tnext
*
* journal_free_buffer() is called from journal_try_to_free_buffer(), and is
* async wrt everything else.
*
* It is also used for checkpoint data, also to protect against
* journal_try_to_free_buffer():
*
* bh->b_cp_transaction
* bh->b_cpnext
* bh->b_cpprev
* transaction->t_checkpoint_list
* transaction->t_cpnext
* transaction->t_cpprev
* journal->j_checkpoint_transactions
*
* It is global at this time rather than per-journal because it's
* impossible for __journal_free_buffer to go from a buffer_head
* back to a journal_t unracily (well, not true. Fix later)
*
*
* The `datalist' and `checkpoint list' functions are quite
* separate and we could use two spinlocks here.
*
* lru_list_lock nests inside journal_datalist_lock.
*/
spinlock_t journal_datalist_lock = SPIN_LOCK_UNLOCKED;
/*
* List of all journals in the system. Protected by the BKL.
*/
......@@ -1515,26 +1482,6 @@ int journal_blocks_per_page(struct inode *inode)
return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
}
/*
* shrink_journal_memory().
* Called when we're under memory pressure. Free up all the written-back
* checkpointed metadata buffers.
*/
void shrink_journal_memory(void)
{
struct list_head *list;
lock_kernel();
list_for_each(list, &all_journals) {
journal_t *journal =
list_entry(list, journal_t, j_all_journals);
spin_lock(&journal_datalist_lock);
__journal_clean_checkpoint_list(journal);
spin_unlock(&journal_datalist_lock);
}
unlock_kernel();
}
/*
* Simple support for retying memory allocations. Introduced to help to
* debug different VM deadlock avoidance strategies.
......@@ -1660,7 +1607,6 @@ static void journal_free_journal_head(struct journal_head *jh)
*
* Doesn't need the journal lock.
* May sleep.
* Cannot be called with journal_datalist_lock held.
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
{
......
This diff is collapsed.
......@@ -836,10 +836,10 @@ struct journal_s
*/
/* Filing buffers */
extern void journal_unfile_buffer(journal_t *, struct journal_head *);
extern void __journal_unfile_buffer(struct journal_head *);
extern void journal_unfile_buffer(struct journal_head *);
extern void __journal_refile_buffer(struct journal_head *);
extern void journal_refile_buffer(struct journal_head *);
extern void journal_refile_buffer(journal_t *, struct journal_head *);
extern void __journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_free_buffer(struct journal_head *bh);
extern void journal_file_buffer(struct journal_head *, transaction_t *, int);
......@@ -854,10 +854,8 @@ extern void journal_commit_transaction(journal_t *);
/* Checkpoint list management */
int __journal_clean_checkpoint_list(journal_t *journal);
extern void journal_remove_checkpoint(struct journal_head *);
extern void __journal_remove_checkpoint(struct journal_head *);
extern void journal_insert_checkpoint(struct journal_head *, transaction_t *);
extern void __journal_insert_checkpoint(struct journal_head *,transaction_t *);
void __journal_remove_checkpoint(struct journal_head *);
void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
/* Buffer IO */
extern int
......@@ -1017,9 +1015,6 @@ extern void log_wait_for_space(journal_t *, int nblocks);
extern void __journal_drop_transaction(journal_t *, transaction_t *);
extern int cleanup_journal_tail(journal_t *);
/* Reduce journal memory usage by flushing */
extern void shrink_journal_memory(void);
/* Debugging code only: */
#define jbd_ENOSYS() \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment