Commit 990aef1a authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: Finish protection of journal_head.b_frozen_data

We now start to move across the JBD data structure's fields, from "innermost"
and outwards.

Start with journal_head.b_frozen_data, because the locking for this field was
partially implemented in jbd-010-b_committed_data-race-fix.patch.

It is protected by jbd_lock_bh_state().  We keep the lock_journal() and
spin_lock(&journal_datalist_lock) calls in place.  Later,
spin_lock(&journal_datalist_lock) is replaced by
spin_lock(&journal->j_list_lock).

Of course, this completion of the locking around b_frozen_data also puts a
lot of the locking for other fields in place.
parent eacf9510
...@@ -210,8 +210,18 @@ void journal_commit_transaction(journal_t *journal) ...@@ -210,8 +210,18 @@ void journal_commit_transaction(journal_t *journal)
wbuf[bufs++] = bh; wbuf[bufs++] = bh;
} else { } else {
BUFFER_TRACE(bh, "writeout complete: unfile"); BUFFER_TRACE(bh, "writeout complete: unfile");
/*
* We have a lock ranking problem..
*/
if (!jbd_trylock_bh_state(bh)) {
spin_unlock(&journal_datalist_lock);
schedule();
spin_lock(&journal_datalist_lock);
break;
}
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
jh->b_transaction = NULL; jh->b_transaction = NULL;
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} }
...@@ -652,7 +662,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -652,7 +662,6 @@ void journal_commit_transaction(journal_t *journal)
kfree(jh->b_frozen_data); kfree(jh->b_frozen_data);
jh->b_frozen_data = NULL; jh->b_frozen_data = NULL;
} }
jbd_unlock_bh_state(jh2bh(jh));
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
cp_transaction = jh->b_cp_transaction; cp_transaction = jh->b_cp_transaction;
...@@ -687,11 +696,13 @@ void journal_commit_transaction(journal_t *journal) ...@@ -687,11 +696,13 @@ void journal_commit_transaction(journal_t *journal)
__journal_insert_checkpoint(jh, commit_transaction); __journal_insert_checkpoint(jh, commit_transaction);
JBUFFER_TRACE(jh, "refile for checkpoint writeback"); JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__journal_refile_buffer(jh); __journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
} else { } else {
J_ASSERT_BH(bh, !buffer_dirty(bh)); J_ASSERT_BH(bh, !buffer_dirty(bh));
J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
jh->b_transaction = 0; jh->b_transaction = 0;
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} }
......
...@@ -359,9 +359,10 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -359,9 +359,10 @@ int journal_write_metadata_buffer(transaction_t *transaction,
int do_escape = 0; int do_escape = 0;
char *mapped_data; char *mapped_data;
struct buffer_head *new_bh; struct buffer_head *new_bh;
struct journal_head * new_jh; struct journal_head *new_jh;
struct page *new_page; struct page *new_page;
unsigned int new_offset; unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
/* /*
* The buffer really shouldn't be locked: only the current committing * The buffer really shouldn't be locked: only the current committing
...@@ -372,13 +373,16 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -372,13 +373,16 @@ int journal_write_metadata_buffer(transaction_t *transaction,
* also part of a shared mapping, and another thread has * also part of a shared mapping, and another thread has
* decided to launch a writepage() against this buffer. * decided to launch a writepage() against this buffer.
*/ */
J_ASSERT_JH(jh_in, buffer_jbddirty(jh2bh(jh_in))); J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
/* /*
* If a new transaction has already done a buffer copy-out, then * If a new transaction has already done a buffer copy-out, then
* we use that version of the data for the commit. * we use that version of the data for the commit.
*/ */
jbd_lock_bh_state(bh_in);
repeat:
if (jh_in->b_frozen_data) { if (jh_in->b_frozen_data) {
done_copy_out = 1; done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data); new_page = virt_to_page(jh_in->b_frozen_data);
...@@ -388,12 +392,13 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -388,12 +392,13 @@ int journal_write_metadata_buffer(transaction_t *transaction,
new_offset = virt_to_offset(jh2bh(jh_in)->b_data); new_offset = virt_to_offset(jh2bh(jh_in)->b_data);
} }
mapped_data = ((char *) kmap(new_page)) + new_offset; mapped_data = kmap_atomic(new_page, KM_USER0);
/* /*
* Check for escaping * Check for escaping
*/ */
if (* ((unsigned int *) mapped_data) == htonl(JFS_MAGIC_NUMBER)) { if (*((unsigned int *)(mapped_data + new_offset)) ==
htonl(JFS_MAGIC_NUMBER)) {
need_copy_out = 1; need_copy_out = 1;
do_escape = 1; do_escape = 1;
} }
...@@ -401,38 +406,47 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -401,38 +406,47 @@ int journal_write_metadata_buffer(transaction_t *transaction,
/* /*
* Do we need to do a data copy? * Do we need to do a data copy?
*/ */
if (need_copy_out && !done_copy_out) { if (need_copy_out && !done_copy_out) {
char *tmp; char *tmp;
tmp = jbd_rep_kmalloc(jh2bh(jh_in)->b_size, GFP_NOFS);
kunmap_atomic(mapped_data, KM_USER0);
jbd_unlock_bh_state(bh_in);
tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
kfree(new_page);
goto repeat;
}
jh_in->b_frozen_data = tmp; jh_in->b_frozen_data = tmp;
memcpy (tmp, mapped_data, jh2bh(jh_in)->b_size); mapped_data = kmap_atomic(new_page, KM_USER0);
memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
/* If we get to this path, we'll always need the new /* If we get to this path, we'll always need the new
address kmapped so that we can clear the escaped address kmapped so that we can clear the escaped
magic number below. */ magic number below. */
kunmap(new_page);
new_page = virt_to_page(tmp); new_page = virt_to_page(tmp);
new_offset = virt_to_offset(tmp); new_offset = virt_to_offset(tmp);
mapped_data = ((char *) kmap(new_page)) + new_offset;
done_copy_out = 1; done_copy_out = 1;
} }
/* /*
* Right, time to make up the new buffer_head. * Did we need to do an escaping? Now we've done all the
* copying, we can finally do so.
*/ */
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); if (do_escape)
*((unsigned int *)(mapped_data + new_offset)) = 0;
kunmap_atomic(mapped_data, KM_USER0);
/* keep subsequent assertions sane */ /* keep subsequent assertions sane */
new_bh->b_state = 0; new_bh->b_state = 0;
init_buffer(new_bh, NULL, NULL); init_buffer(new_bh, NULL, NULL);
atomic_set(&new_bh->b_count, 1); atomic_set(&new_bh->b_count, 1);
new_jh = journal_add_journal_head(new_bh); jbd_unlock_bh_state(bh_in);
set_bh_page(new_bh, new_page, new_offset); new_jh = journal_add_journal_head(new_bh); /* This sleeps */
set_bh_page(new_bh, new_page, new_offset);
new_jh->b_transaction = NULL; new_jh->b_transaction = NULL;
new_bh->b_size = jh2bh(jh_in)->b_size; new_bh->b_size = jh2bh(jh_in)->b_size;
new_bh->b_bdev = transaction->t_journal->j_dev; new_bh->b_bdev = transaction->t_journal->j_dev;
...@@ -442,15 +456,6 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -442,15 +456,6 @@ int journal_write_metadata_buffer(transaction_t *transaction,
*jh_out = new_jh; *jh_out = new_jh;
/*
* Did we need to do an escaping? Now we've done all the
* copying, we can finally do so.
*/
if (do_escape)
* ((unsigned int *) mapped_data) = 0;
kunmap(new_page);
/* /*
* The to-be-written buffer needs to get moved to the io queue, * The to-be-written buffer needs to get moved to the io queue,
* and the original buffer whose contents we are shadowing or * and the original buffer whose contents we are shadowing or
......
...@@ -502,7 +502,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -502,7 +502,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
int error; int error;
char *frozen_buffer = NULL; char *frozen_buffer = NULL;
int need_copy = 0; int need_copy = 0;
int locked;
jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
...@@ -512,16 +511,9 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -512,16 +511,9 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
/* @@@ Need to check for errors here at some point. */ /* @@@ Need to check for errors here at some point. */
locked = test_set_buffer_locked(bh); lock_buffer(bh);
if (locked) { jbd_lock_bh_state(bh);
/* We can't reliably test the buffer state if we found spin_lock(&journal_datalist_lock);
* it already locked, so just wait for the lock and
* retry. */
unlock_journal(journal);
wait_on_buffer(bh);
lock_journal(journal);
goto repeat;
}
/* We now hold the buffer lock so it is safe to query the buffer /* We now hold the buffer lock so it is safe to query the buffer
* state. Is the buffer dirty? * state. Is the buffer dirty?
...@@ -537,7 +529,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -537,7 +529,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
* the buffer dirtied, ugh.) */ * the buffer dirtied, ugh.) */
if (buffer_dirty(bh)) { if (buffer_dirty(bh)) {
spin_lock(&journal_datalist_lock);
/* First question: is this buffer already part of the /* First question: is this buffer already part of the
* current transaction or the existing committing * current transaction or the existing committing
* transaction? */ * transaction? */
...@@ -552,18 +543,18 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -552,18 +543,18 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
JBUFFER_TRACE(jh, "Unexpected dirty buffer"); JBUFFER_TRACE(jh, "Unexpected dirty buffer");
jbd_unexpected_dirty_buffer(jh); jbd_unexpected_dirty_buffer(jh);
} }
spin_unlock(&journal_datalist_lock);
} }
unlock_buffer(bh); unlock_buffer(bh);
error = -EROFS; error = -EROFS;
if (is_handle_aborted(handle)) if (is_handle_aborted(handle)) {
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
goto out_unlocked; goto out_unlocked;
}
error = 0; error = 0;
spin_lock(&journal_datalist_lock);
/* The buffer is already part of this transaction if /* The buffer is already part of this transaction if
* b_transaction or b_next_transaction points to it. */ * b_transaction or b_next_transaction points to it. */
...@@ -606,6 +597,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -606,6 +597,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
JBUFFER_TRACE(jh, "on shadow: sleep"); JBUFFER_TRACE(jh, "on shadow: sleep");
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
unlock_journal(journal); unlock_journal(journal);
/* commit wakes up all shadow buffers after IO */ /* commit wakes up all shadow buffers after IO */
wqh = bh_waitq_head(jh2bh(jh)); wqh = bh_waitq_head(jh2bh(jh));
...@@ -633,16 +625,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -633,16 +625,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
if (!frozen_buffer) { if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer"); JBUFFER_TRACE(jh, "allocate memory for buffer");
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
unlock_journal(journal); jbd_unlock_bh_state(bh);
frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
GFP_NOFS); GFP_NOFS);
lock_journal(journal);
if (!frozen_buffer) { if (!frozen_buffer) {
printk(KERN_EMERG printk(KERN_EMERG
"%s: OOM for frozen_buffer\n", "%s: OOM for frozen_buffer\n",
__FUNCTION__); __FUNCTION__);
JBUFFER_TRACE(jh, "oom!"); JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM; error = -ENOMEM;
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
goto done_locked; goto done_locked;
} }
...@@ -672,7 +664,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -672,7 +664,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
} }
done_locked: done_locked:
spin_unlock(&journal_datalist_lock);
if (need_copy) { if (need_copy) {
struct page *page; struct page *page;
int offset; int offset;
...@@ -682,17 +673,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) ...@@ -682,17 +673,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
"Possible IO failure.\n"); "Possible IO failure.\n");
page = jh2bh(jh)->b_page; page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap(page); source = kmap_atomic(page, KM_USER0);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap(page); kunmap_atomic(source, KM_USER0);
} }
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
/* If we are about to journal a buffer, then any revoke pending /* If we are about to journal a buffer, then any revoke pending
on it is no longer valid. */ on it is no longer valid. */
lock_kernel();
journal_cancel_revoke(handle, jh); journal_cancel_revoke(handle, jh);
unlock_kernel();
out_unlocked: out_unlocked:
if (frozen_buffer) if (frozen_buffer)
...@@ -1070,8 +1060,9 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) ...@@ -1070,8 +1060,9 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
goto out_unlock; goto out_unlock;
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
set_bit(BH_JBDDirty, &bh->b_state); set_buffer_jbddirty(bh);
J_ASSERT_JH(jh, jh->b_transaction != NULL); J_ASSERT_JH(jh, jh->b_transaction != NULL);
...@@ -1102,6 +1093,7 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) ...@@ -1102,6 +1093,7 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
done_locked: done_locked:
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
JBUFFER_TRACE(jh, "exit"); JBUFFER_TRACE(jh, "exit");
out_unlock: out_unlock:
unlock_journal(journal); unlock_journal(journal);
...@@ -1168,6 +1160,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1168,6 +1160,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh)
BUFFER_TRACE(bh, "entry"); BUFFER_TRACE(bh, "entry");
lock_journal(journal); lock_journal(journal);
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
if (!buffer_jbd(bh)) if (!buffer_jbd(bh))
...@@ -1181,7 +1174,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1181,7 +1174,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh)
* of this transaction, then we can just drop it from * of this transaction, then we can just drop it from
* the transaction immediately. */ * the transaction immediately. */
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
clear_bit(BH_JBDDirty, &bh->b_state); clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
J_ASSERT_JH(jh, !jh->b_committed_data); J_ASSERT_JH(jh, !jh->b_committed_data);
...@@ -1208,6 +1201,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1208,6 +1201,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh)
__brelse(bh); __brelse(bh);
if (!buffer_jbd(bh)) { if (!buffer_jbd(bh)) {
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
unlock_journal(journal); unlock_journal(journal);
__bforget(bh); __bforget(bh);
return; return;
...@@ -1231,6 +1225,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1231,6 +1225,7 @@ void journal_forget (handle_t *handle, struct buffer_head *bh)
not_jbd: not_jbd:
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
unlock_journal(journal); unlock_journal(journal);
__brelse(bh); __brelse(bh);
return; return;
...@@ -1924,7 +1919,7 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -1924,7 +1919,7 @@ void __journal_file_buffer(struct journal_head *jh,
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
assert_spin_locked(&journal_datalist_lock); assert_spin_locked(&journal_datalist_lock);
#ifdef __SMP__ #ifdef __SMP__
J_ASSERT (current->lock_depth >= 0); J_ASSERT (current->lock_depth >= 0);
#endif #endif
...@@ -1990,9 +1985,11 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -1990,9 +1985,11 @@ void __journal_file_buffer(struct journal_head *jh,
void journal_file_buffer(struct journal_head *jh, void journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist) transaction_t *transaction, int jlist)
{ {
jbd_lock_bh_state(jh2bh(jh));
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
__journal_file_buffer(jh, transaction, jlist); __journal_file_buffer(jh, transaction, jlist);
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(jh2bh(jh));
} }
/* /*
...@@ -2045,12 +2042,13 @@ void __journal_refile_buffer(struct journal_head *jh) ...@@ -2045,12 +2042,13 @@ void __journal_refile_buffer(struct journal_head *jh)
*/ */
void journal_refile_buffer(struct journal_head *jh) void journal_refile_buffer(struct journal_head *jh)
{ {
struct buffer_head *bh; struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
bh = jh2bh(jh);
__journal_refile_buffer(jh); __journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment