Commit 99d16cbc authored by Sage Weil's avatar Sage Weil Committed by Chris Mason

Btrfs: fix deadlock in btrfs_commit_transaction

We calculate timeout (either 1 or MAX_SCHEDULE_TIMEOUT) based on whether
num_writers > 1 or should_grow at the top of the loop.  Then, much much
later, we wait for that timeout if either num_writers or should_grow is
true.  However, it's possible for a racing process (calling
btrfs_end_transaction()) to decrement num_writers such that we wait
forever instead of for 1.

Fix this by deciding how long to wait when we wait.  Include a smp_mb()
before checking if the waitqueue is active to ensure the num_writers
is visible.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent fccdae43
...@@ -402,6 +402,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -402,6 +402,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
WARN_ON(cur_trans->num_writers < 1); WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--; cur_trans->num_writers--;
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait)) if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait); wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans); put_transaction(cur_trans);
...@@ -1010,7 +1011,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1010,7 +1011,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
unsigned long joined = 0; unsigned long joined = 0;
unsigned long timeout = 1;
struct btrfs_transaction *cur_trans; struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL; struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
...@@ -1081,11 +1081,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1081,11 +1081,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1; snap_pending = 1;
WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans != trans->transaction);
if (cur_trans->num_writers > 1)
timeout = MAX_SCHEDULE_TIMEOUT;
else if (should_grow)
timeout = 1;
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) { if (flush_on_commit || snap_pending) {
...@@ -1107,8 +1102,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1107,8 +1102,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
smp_mb(); smp_mb();
if (cur_trans->num_writers > 1 || should_grow) if (cur_trans->num_writers > 1)
schedule_timeout(timeout); schedule_timeout(MAX_SCHEDULE_TIMEOUT);
else if (should_grow)
schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait); finish_wait(&cur_trans->writer_wait, &wait);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment