Commit f86f1bbd authored by Chris Mason's avatar Chris Mason Committed by Ben Hutchings

btrfs: fix races on root_log_ctx lists

commit 570dd450 upstream.

btrfs_remove_all_log_ctxs takes a shortcut where it avoids walking the
list because it knows all of the waiters are patiently waiting for the
commit to finish.

But, there's a small race where btrfs_sync_log can remove itself from
the list if it finds a log commit is already done.  Also, it uses
list_del_init() to remove itself from the list, but there's no way to
know if btrfs_remove_all_log_ctxs has already run, so we don't know for
sure if it is safe to call list_del_init().

This gets rid of all the shortcuts for btrfs_remove_all_log_ctxs(), and
just calls it with the proper locking.

This is part two of the corruption fixed by cbd60aa7.  I should have
done this in the first place, but convinced myself the optimizations were
safe.  A 12 hour run of dbench 2048 will eventually trigger a list debug
WARN_ON for the list_del_init() in btrfs_sync_log().

Fixes: d1433debReported-by: default avatarDave Jones <davej@codemonkey.org.uk>
Signed-off-by: default avatarChris Mason <clm@fb.com>
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
parent 768071f4
...@@ -2449,14 +2449,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, ...@@ -2449,14 +2449,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
int index, int error) int index, int error)
{ {
struct btrfs_log_ctx *ctx; struct btrfs_log_ctx *ctx;
struct btrfs_log_ctx *safe;
if (!error) { list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
INIT_LIST_HEAD(&root->log_ctxs[index]); list_del_init(&ctx->list);
return;
}
list_for_each_entry(ctx, &root->log_ctxs[index], list)
ctx->log_ret = error; ctx->log_ret = error;
}
INIT_LIST_HEAD(&root->log_ctxs[index]); INIT_LIST_HEAD(&root->log_ctxs[index]);
} }
...@@ -2686,13 +2684,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ...@@ -2686,13 +2684,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex); mutex_unlock(&root->log_mutex);
out_wake_log_root: out_wake_log_root:
/* mutex_lock(&log_root_tree->log_mutex);
* We needn't get log_mutex here because we are sure all
* the other tasks are blocked.
*/
btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
mutex_lock(&log_root_tree->log_mutex);
log_root_tree->log_transid_committed++; log_root_tree->log_transid_committed++;
atomic_set(&log_root_tree->log_commit[index2], 0); atomic_set(&log_root_tree->log_commit[index2], 0);
mutex_unlock(&log_root_tree->log_mutex); mutex_unlock(&log_root_tree->log_mutex);
...@@ -2700,10 +2694,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ...@@ -2700,10 +2694,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]); wake_up(&log_root_tree->log_commit_wait[index2]);
out: out:
/* See above. */
btrfs_remove_all_log_ctxs(root, index1, ret);
mutex_lock(&root->log_mutex); mutex_lock(&root->log_mutex);
btrfs_remove_all_log_ctxs(root, index1, ret);
root->log_transid_committed++; root->log_transid_committed++;
atomic_set(&root->log_commit[index1], 0); atomic_set(&root->log_commit[index1], 0);
mutex_unlock(&root->log_mutex); mutex_unlock(&root->log_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment