Commit 11059034 authored by Dave Kleikamp's avatar Dave Kleikamp

JFS: Fix hang while flushing outstanding transactions under heavy load

During syncbarrier recovery, when JFS is waiting for all outstanding
transactions to complete before allowing new ones to start, there was a
window where a new transaction starts, but is not committed when JFS thought
it could stop flushing the journal to disk.  This caused the log_SYNCBARRIER
flag to not be reset, and no new transactions are allowed to start.

This is fixed by using the log->active field to determine when to reset the
log_FLUSH flag.
parent 236b5445
......@@ -674,28 +674,29 @@ int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
}
jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
if (tblk->xflag & COMMIT_LAZY) {
/*
* Lazy transactions can leave now
*/
if (tblk->xflag & COMMIT_LAZY)
tblk->flag |= tblkGC_LAZY;
LOGGC_UNLOCK(log);
return 0;
}
/*
* group commit pageout in progress
*/
if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head) {
if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
(!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
/*
* only transaction in the commit queue:
* No pageout in progress
*
* start one-transaction group commit as
* its group leader.
* start group commit as its group leader.
*/
log->cflag |= logGC_PAGEOUT;
lmGCwrite(log, 0);
}
if (tblk->xflag & COMMIT_LAZY) {
/*
* Lazy transactions can leave now
*/
LOGGC_UNLOCK(log);
return 0;
}
/* lmGCwrite gives up LOGGC_LOCK, check again */
if (tblk->flag & tblkGC_COMMITTED) {
......@@ -894,11 +895,8 @@ void lmPostGC(struct lbuf * bp)
* the first transaction entering group commit
* will elect herself as new group leader.
*/
else {
else
log->cflag &= ~logGC_PAGEOUT;
clear_bit(log_FLUSH, &log->flag);
WARN_ON(log->flush_tblk);
}
//LOGGC_UNLOCK(log);
spin_unlock_irqrestore(&log->gclock, flags);
......
......@@ -518,22 +518,24 @@ void txEnd(tid_t tid)
/*
* mark the tblock not active
*/
--log->active;
if (--log->active == 0) {
clear_bit(log_FLUSH, &log->flag);
/*
* synchronize with logsync barrier
*/
if (test_bit(log_SYNCBARRIER, &log->flag) && log->active == 0) {
/* forward log syncpt */
/* lmSync(log); */
/*
* synchronize with logsync barrier
*/
if (test_bit(log_SYNCBARRIER, &log->flag)) {
/* forward log syncpt */
/* lmSync(log); */
jfs_info(" log barrier off: 0x%x", log->lsn);
jfs_info("log barrier off: 0x%x", log->lsn);
/* enable new transactions start */
clear_bit(log_SYNCBARRIER, &log->flag);
/* enable new transactions start */
clear_bit(log_SYNCBARRIER, &log->flag);
/* wakeup all waitors for logsync barrier */
TXN_WAKEUP(&log->syncwait);
/* wakeup all waitors for logsync barrier */
TXN_WAKEUP(&log->syncwait);
}
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment