Commit a2df663d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: checkpointing optimisations

From: Alex Tomas <bzzz@tmi.comex.ru>

Some transaction checkpointing improvements for the JBD commit phase.  Decent
speedups:

creation of 500K files in single dir (with htree, of course):
 before: 4m16.094s, 4m12.035s, 4m11.911s
 after:  1m41.364s, 1m43.461s, 1m45.189s

removal of 500K files in single dir:
 before: 43m50.161s
 after:  38m45.510s


- Make __log_wait_for_space() recalculate the needed blocks because journal
  free space changes during commit

- Make log_do_checkpoint() starts scanning from the oldest transaction

- Make log_do_checkpoint() stop scanning if a transaction gets dropped.
  The caller will reevaluate the transaction state and decide whether more
  space needs to be generated in the log.

  The effect of this is to smooth out the I/O patterns, avoid the huge
  stop-and-go which currently happens when forced checkpointing writes out
  and waits upon 3/4 of the journal's size worth of data.
parent 20c52ab8
...@@ -75,11 +75,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh) ...@@ -75,11 +75,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
* Called under j-state_lock *only*. It will be unlocked if we have to wait * Called under j-state_lock *only*. It will be unlocked if we have to wait
* for a checkpoint to free up some space in the log. * for a checkpoint to free up some space in the log.
*/ */
void __log_wait_for_space(journal_t *journal)
void __log_wait_for_space(journal_t *journal, int nblocks)
{ {
int nblocks;
assert_spin_locked(&journal->j_state_lock); assert_spin_locked(&journal->j_state_lock);
nblocks = jbd_space_needed(journal);
while (__log_space_left(journal) < nblocks) { while (__log_space_left(journal) < nblocks) {
if (journal->j_flags & JFS_ABORT) if (journal->j_flags & JFS_ABORT)
return; return;
...@@ -91,9 +92,10 @@ void __log_wait_for_space(journal_t *journal, int nblocks) ...@@ -91,9 +92,10 @@ void __log_wait_for_space(journal_t *journal, int nblocks)
* were waiting for the checkpoint lock * were waiting for the checkpoint lock
*/ */
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
nblocks = jbd_space_needed(journal);
if (__log_space_left(journal) < nblocks) { if (__log_space_left(journal) < nblocks) {
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
log_do_checkpoint(journal, nblocks); log_do_checkpoint(journal);
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
} }
up(&journal->j_checkpoint_sem); up(&journal->j_checkpoint_sem);
...@@ -279,9 +281,7 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, ...@@ -279,9 +281,7 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
* *
* The journal should be locked before calling this function. * The journal should be locked before calling this function.
*/ */
int log_do_checkpoint(journal_t *journal)
/* @@@ `nblocks' is unused. Should it be used? */
int log_do_checkpoint(journal_t *journal, int nblocks)
{ {
int result; int result;
int batch_count = 0; int batch_count = 0;
...@@ -315,7 +315,7 @@ int log_do_checkpoint(journal_t *journal, int nblocks) ...@@ -315,7 +315,7 @@ int log_do_checkpoint(journal_t *journal, int nblocks)
int cleanup_ret, retry = 0; int cleanup_ret, retry = 0;
tid_t this_tid; tid_t this_tid;
transaction = journal->j_checkpoint_transactions->t_cpnext; transaction = journal->j_checkpoint_transactions;
this_tid = transaction->t_tid; this_tid = transaction->t_tid;
jh = transaction->t_checkpoint_list; jh = transaction->t_checkpoint_list;
last_jh = jh->b_cpprev; last_jh = jh->b_cpprev;
...@@ -332,27 +332,19 @@ int log_do_checkpoint(journal_t *journal, int nblocks) ...@@ -332,27 +332,19 @@ int log_do_checkpoint(journal_t *journal, int nblocks)
retry = 1; retry = 1;
break; break;
} }
retry = __flush_buffer(journal, jh, bhs, &batch_count, retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
&drop_count);
} while (jh != last_jh && !retry); } while (jh != last_jh && !retry);
if (batch_count) {
if (batch_count)
__flush_batch(journal, bhs, &batch_count); __flush_batch(journal, bhs, &batch_count);
continue;
}
if (retry)
continue;
/*
* If someone emptied the checkpoint list while we slept, we're
* done.
*/
if (!journal->j_checkpoint_transactions)
break;
/* /*
* If someone cleaned up this transaction while we slept, we're * If someone cleaned up this transaction while we slept, we're
* done * done
*/ */
if (journal->j_checkpoint_transactions->t_cpnext != transaction) if (journal->j_checkpoint_transactions != transaction)
break;
if (retry)
continue; continue;
/* /*
* Maybe it's a new transaction, but it fell at the same * Maybe it's a new transaction, but it fell at the same
...@@ -367,6 +359,8 @@ int log_do_checkpoint(journal_t *journal, int nblocks) ...@@ -367,6 +359,8 @@ int log_do_checkpoint(journal_t *journal, int nblocks)
*/ */
cleanup_ret = __cleanup_transaction(journal, transaction); cleanup_ret = __cleanup_transaction(journal, transaction);
J_ASSERT(drop_count != 0 || cleanup_ret != 0); J_ASSERT(drop_count != 0 || cleanup_ret != 0);
if (journal->j_checkpoint_transactions != transaction)
break;
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
result = cleanup_journal_tail(journal); result = cleanup_journal_tail(journal);
......
...@@ -1076,7 +1076,7 @@ void journal_destroy(journal_t *journal) ...@@ -1076,7 +1076,7 @@ void journal_destroy(journal_t *journal)
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
while (journal->j_checkpoint_transactions != NULL) { while (journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
log_do_checkpoint(journal, 1); log_do_checkpoint(journal);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
} }
...@@ -1284,7 +1284,7 @@ int journal_flush(journal_t *journal) ...@@ -1284,7 +1284,7 @@ int journal_flush(journal_t *journal)
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
while (!err && journal->j_checkpoint_transactions != NULL) { while (!err && journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
err = log_do_checkpoint(journal, journal->j_maxlen); err = log_do_checkpoint(journal);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
......
...@@ -206,15 +206,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) ...@@ -206,15 +206,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
* Also, this test is inconsitent with the matching one in * Also, this test is inconsitent with the matching one in
* journal_extend(). * journal_extend().
*/ */
needed = journal->j_max_transaction_buffers; if (__log_space_left(journal) < jbd_space_needed(journal)) {
if (journal->j_committing_transaction)
needed += journal->j_committing_transaction->
t_outstanding_credits;
if (__log_space_left(journal) < needed) {
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
__log_wait_for_space(journal, needed); __log_wait_for_space(journal);
goto repeat_locked; goto repeat_locked;
} }
......
...@@ -992,9 +992,9 @@ int log_start_commit(journal_t *journal, tid_t tid); ...@@ -992,9 +992,9 @@ int log_start_commit(journal_t *journal, tid_t tid);
int __log_start_commit(journal_t *journal, tid_t tid); int __log_start_commit(journal_t *journal, tid_t tid);
int journal_start_commit(journal_t *journal, tid_t *tid); int journal_start_commit(journal_t *journal, tid_t *tid);
int log_wait_commit(journal_t *journal, tid_t tid); int log_wait_commit(journal_t *journal, tid_t tid);
int log_do_checkpoint(journal_t *journal, int nblocks); int log_do_checkpoint(journal_t *journal);
void __log_wait_for_space(journal_t *journal, int nblocks); void __log_wait_for_space(journal_t *journal);
extern void __journal_drop_transaction(journal_t *, transaction_t *); extern void __journal_drop_transaction(journal_t *, transaction_t *);
extern int cleanup_journal_tail(journal_t *); extern int cleanup_journal_tail(journal_t *);
...@@ -1053,6 +1053,19 @@ static inline int tid_geq(tid_t x, tid_t y) ...@@ -1053,6 +1053,19 @@ static inline int tid_geq(tid_t x, tid_t y)
extern int journal_blocks_per_page(struct inode *inode); extern int journal_blocks_per_page(struct inode *inode);
/*
* Return the minimum number of blocks which must be free in the journal
* before a new transaction may be started. Must be called under j_state_lock.
*/
static inline int jbd_space_needed(journal_t *journal)
{
int nblocks = journal->j_max_transaction_buffers;
if (journal->j_committing_transaction)
nblocks += journal->j_committing_transaction->
t_outstanding_credits;
return nblocks;
}
/* /*
* Definitions which augment the buffer_head layer * Definitions which augment the buffer_head layer
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment