Commit e821ceb2 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: implement b_transaction locking rules

Go through all use of b_transaction and implement the rules.

Fairly straightforward.
parent b07da5e5
......@@ -50,6 +50,7 @@ static inline void __buffer_unlink(struct journal_head *jh)
* Try to release a checkpointed buffer from its transaction.
* Returns 1 if we released it.
* Requires journal_datalist_lock
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __try_to_free_cp_buf(struct journal_head *jh)
{
......@@ -59,10 +60,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
ret = 1;
} else {
jbd_unlock_bh_state(bh);
}
return ret;
}
......@@ -92,6 +96,20 @@ void log_wait_for_space(journal_t *journal, int nblocks)
}
}
/*
* We were unable to perform jbd_trylock_bh_state() inside
* journal_datalist_lock. The caller must restart a list walk. Wait for
* someone else to run jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
{
get_bh(bh);
spin_unlock(&journal_datalist_lock);
jbd_lock_bh_state(bh);
jbd_unlock_bh_state(bh);
put_bh(bh);
}
/*
* Clean up a transaction's checkpoint list.
*
......@@ -131,12 +149,21 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
__brelse(bh);
goto out_return_1;
}
/*
* This is foul
*/
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
goto out_return_1;
}
if (jh->b_transaction != NULL) {
transaction_t *transaction = jh->b_transaction;
tid_t tid = transaction->t_tid;
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
log_start_commit(journal, transaction);
unlock_journal(journal);
log_wait_commit(journal, tid);
......@@ -156,11 +183,13 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh);
ret = 1;
} else {
jbd_unlock_bh_state(bh);
}
jh = next_jh;
} while (jh != last_jh);
......@@ -197,6 +226,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count)
* scan of the checkpoint list.
*
* Called with journal_datalist_lock held.
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __flush_buffer(journal_t *journal, struct journal_head *jh,
struct buffer_head **bhs, int *batch_count,
......@@ -216,10 +246,11 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
* disk, as that would break recoverability.
*/
BUFFER_TRACE(bh, "queue");
atomic_inc(&bh->b_count);
J_ASSERT_BH(bh, !test_bit(BH_JWrite, &bh->b_state));
set_bit(BH_JWrite, &bh->b_state);
get_bh(bh);
J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_buffer_jwrite(bh);
bhs[*batch_count] = bh;
jbd_unlock_bh_state(bh);
(*batch_count)++;
if (*batch_count == NR_BATCH) {
__flush_batch(bhs, batch_count);
......@@ -302,8 +333,16 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
last_jh = jh->b_cpprev;
next_jh = jh;
do {
struct buffer_head *bh;
jh = next_jh;
next_jh = jh->b_cpnext;
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
spin_lock(&journal_datalist_lock);
break;
}
retry = __flush_buffer(journal, jh, bhs, &batch_count,
&drop_count);
} while (jh != last_jh && !retry);
......@@ -439,10 +478,13 @@ int __journal_clean_checkpoint_list(journal_t *journal)
if (jh) {
struct journal_head *last_jh = jh->b_cpprev;
struct journal_head *next_jh = jh;
do {
jh = next_jh;
next_jh = jh->b_cpnext;
ret += __try_to_free_cp_buf(jh);
/* Use trylock because of the ranknig */
if (jbd_trylock_bh_state(jh2bh(jh)))
ret += __try_to_free_cp_buf(jh);
} while (jh != last_jh);
}
} while (transaction != last_transaction);
......
......@@ -465,9 +465,10 @@ void journal_commit_transaction(journal_t *journal)
* akpm: these are BJ_IO, and journal_datalist_lock is not needed.
* See __journal_try_to_free_buffer.
*/
wait_for_iobuf:
wait_for_iobuf:
while (commit_transaction->t_iobuf_list != NULL) {
struct buffer_head *bh;
jh = commit_transaction->t_iobuf_list->b_tprev;
bh = jh2bh(jh);
if (buffer_locked(bh)) {
......@@ -479,7 +480,7 @@ void journal_commit_transaction(journal_t *journal)
goto wait_for_iobuf;
}
clear_bit(BH_JWrite, &jh2bh(jh)->b_state);
clear_buffer_jwrite(bh);
JBUFFER_TRACE(jh, "ph4: unfile after journal write");
journal_unfile_buffer(jh);
......@@ -495,7 +496,6 @@ void journal_commit_transaction(journal_t *journal)
* ->t_iobuf_list should contain only dummy buffer_heads
* which were created by journal_write_metadata_buffer().
*/
bh = jh2bh(jh);
BUFFER_TRACE(bh, "dumping temporary bh");
journal_put_journal_head(jh);
__brelse(bh);
......@@ -637,6 +637,8 @@ void journal_commit_transaction(journal_t *journal)
struct buffer_head *bh;
jh = commit_transaction->t_forget;
bh = jh2bh(jh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction);
......@@ -650,7 +652,6 @@ void journal_commit_transaction(journal_t *journal)
*
* Otherwise, we can just throw away the frozen data now.
*/
jbd_lock_bh_state(jh2bh(jh));
if (jh->b_committed_data) {
kfree(jh->b_committed_data);
jh->b_committed_data = NULL;
......@@ -676,7 +677,6 @@ void journal_commit_transaction(journal_t *journal)
* by journal_forget, it may no longer be dirty and
* there's no point in keeping a checkpoint record for
* it. */
bh = jh2bh(jh);
/* A buffer which has been freed while still being
* journaled by a previous transaction may end up still
......
......@@ -259,56 +259,6 @@ static void journal_kill_thread(journal_t *journal)
}
}
#if 0
This is no longer needed - we do it in commit quite efficiently.
Note that if this function is resurrected, the loop needs to
be reorganised into the next_jh/last_jh algorithm.
/*
* journal_clean_data_list: cleanup after data IO.
*
* Once the IO system has finished writing the buffers on the transaction's
* data list, we can remove those buffers from the list. This function
* scans the list for such buffers and removes them cleanly.
*
* We assume that the journal is already locked.
* We are called with journal_datalist_lock held.
*
* AKPM: This function looks inefficient. Approximately O(n^2)
* for potentially thousands of buffers. It no longer shows on profiles
* because these buffers are mainly dropped in journal_commit_transaction().
*/
void __journal_clean_data_list(transaction_t *transaction)
{
struct journal_head *jh, *next;
assert_spin_locked(&journal_datalist_lock);
restart:
jh = transaction->t_sync_datalist;
if (!jh)
goto out;
do {
next = jh->b_tnext;
if (!buffer_locked(jh2bh(jh)) && !buffer_dirty(jh2bh(jh))) {
struct buffer_head *bh = jh2bh(jh);
BUFFER_TRACE(bh, "data writeout complete: unfile");
__journal_unfile_buffer(jh);
jh->b_transaction = NULL;
journal_remove_journal_head(bh);
__brelse(bh);
goto restart;
}
jh = next;
} while (transaction->t_sync_datalist &&
jh != transaction->t_sync_datalist);
out:
return;
}
#endif
/*
* journal_write_metadata_buffer: write a metadata buffer to the journal.
*
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment