Commit fee13fe9 authored by Dennis Zhou's avatar Dennis Zhou Committed by David Sterba

btrfs: correct zstd workspace manager lock to use spin_lock_bh()

The btrfs zstd workspace manager uses a background timer to reclaim not
recently used workspaces. I used spin_lock() from this context which
should have been caught with lockdep, but was not. This deadlock was
reported in bugzilla. The fix is to switch the zstd wsm lock to use
spin_lock_bh() from the softirq context.

This happened quite relibably on ppc64, unlike on other architectures.

  [  313.402874] ================================
  [  313.402875] WARNING: inconsistent lock state
  [  313.402879] 5.1.0-rc7 #1 Not tainted
  [  313.402880] --------------------------------
  [  313.402882] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  [  313.402885] swapper/5/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
  [  313.402888] 0000000080d1120c (&(&wsm.lock)->rlock){+.?.}, at: .zstd_reclaim_timer_fn+0x40/0x230
  [  313.402895] {SOFTIRQ-ON-W} state was registered at:
  [  313.402899]   .lock_acquire+0xd0/0x240
  [  313.402903]   ._raw_spin_lock+0x34/0x60
  [  313.402906]   .zstd_get_workspace+0xd0/0x360
  [  313.402908]   .end_compressed_bio_read+0x3b8/0x540
  [  313.402911]   .bio_endio+0x174/0x2c0
  [  313.402914]   .end_workqueue_fn+0x4c/0x70
  [  313.402917]   .normal_work_helper+0x138/0x7e0
  [  313.402920]   .process_one_work+0x324/0x790
  [  313.402922]   .worker_thread+0x68/0x570
  [  313.402925]   .kthread+0x19c/0x1b0
  [  313.402928]   .ret_from_kernel_thread+0x58/0x78
  [  313.402930] irq event stamp: 2629216
  [  313.402933] hardirqs last  enabled at (2629216): [<c0000000009da738>] ._raw_spin_unlock_irq+0x38/0x60
  [  313.402936] hardirqs last disabled at (2629215): [<c0000000009da4c4>] ._raw_spin_lock_irq+0x24/0x70
  [  313.402939] softirqs last  enabled at (2629212): [<c0000000000af9fc>] .irq_enter+0x8c/0xd0
  [  313.402942] softirqs last disabled at (2629213): [<c0000000000afb58>] .irq_exit+0x118/0x170
  [  313.402944]
		 other info that might help us debug this:
  [  313.402945]  Possible unsafe locking scenario:

  [  313.402947]        CPU0
  [  313.402948]        ----
  [  313.402949]   lock(&(&wsm.lock)->rlock);
  [  313.402951]   <Interrupt>
  [  313.402952]     lock(&(&wsm.lock)->rlock);
  [  313.402954]
		  *** DEADLOCK ***

  [  313.402957] 1 lock held by swapper/5/0:
  [  313.402958]  #0: 000000004b612042 ((&wsm.timer)){+.-.}, at: .call_timer_fn+0x0/0x3c0
  [  313.402963]
		 stack backtrace:
  [  313.402967] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.1.0-rc7 #1
  [  313.402968] Call Trace:
  [  313.402972] [c0000007fa262e70] [c0000000009b3294] .dump_stack+0xe0/0x15c (unreliable)
  [  313.402975] [c0000007fa262f10] [c000000000125548] .print_usage_bug+0x348/0x390
  [  313.402978] [c0000007fa262fd0] [c000000000125cb4] .mark_lock+0x724/0x930
  [  313.402981] [c0000007fa263080] [c000000000126c20] .__lock_acquire+0xc90/0x16a0
  [  313.402984] [c0000007fa2631b0] [c000000000128040] .lock_acquire+0xd0/0x240
  [  313.402987] [c0000007fa263280] [c0000000009da2b4] ._raw_spin_lock+0x34/0x60
  [  313.402990] [c0000007fa263300] [c00000000054b0b0] .zstd_reclaim_timer_fn+0x40/0x230
  [  313.402993] [c0000007fa2633d0] [c000000000158b38] .call_timer_fn+0xc8/0x3c0
  [  313.402996] [c0000007fa2634a0] [c000000000158f74] .expire_timers+0x144/0x260
  [  313.402999] [c0000007fa263550] [c000000000159178] .run_timer_softirq+0xe8/0x230
  [  313.403002] [c0000007fa263680] [c0000000009db288] .__do_softirq+0x188/0x5d4
  [  313.403004] [c0000007fa263790] [c0000000000afb58] .irq_exit+0x118/0x170
  [  313.403008] [c0000007fa263800] [c000000000028d88] .timer_interrupt+0x158/0x430
  [  313.403012] [c0000007fa2638b0] [c0000000000091d4] decrementer_common+0x134/0x140
  [  313.403017] --- interrupt: 901 at replay_interrupt_return+0x0/0x4
		     LR = .arch_local_irq_restore.part.0+0x68/0x80
  [  313.403020] [c0000007fa263bb0] [c00000000001a3ac] .arch_local_irq_restore.part.0+0x2c/0x80 (unreliable)
  [  313.403024] [c0000007fa263c30] [c0000000007bbbcc] .cpuidle_enter_state+0xec/0x670
  [  313.403027] [c0000007fa263d00] [c0000000000f5130] .call_cpuidle+0x40/0x90
  [  313.403031] [c0000007fa263d70] [c0000000000f554c] .do_idle+0x2dc/0x3a0
  [  313.403034] [c0000007fa263e30] [c0000000000f59ac] .cpu_startup_entry+0x2c/0x30
  [  313.403037] [c0000007fa263ea0] [c000000000045674] .start_secondary+0x644/0x650
  [  313.403041] [c0000007fa263f90] [c00000000000ad5c] start_secondary_prolog+0x10/0x14

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203517
Fixes: 3f93aef5 ("btrfs: add zstd compression level support")
CC: stable@vger.kernel.org # 5.1+
Signed-off-by: default avatarDennis Zhou <dennis@kernel.org>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent debd1c06
...@@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) ...@@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
struct list_head *pos, *next; struct list_head *pos, *next;
spin_lock(&wsm.lock); spin_lock_bh(&wsm.lock);
if (list_empty(&wsm.lru_list)) { if (list_empty(&wsm.lru_list)) {
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
return; return;
} }
...@@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) ...@@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
if (!list_empty(&wsm.lru_list)) if (!list_empty(&wsm.lru_list))
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
} }
/* /*
...@@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void) ...@@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void)
struct workspace *workspace; struct workspace *workspace;
int i; int i;
spin_lock(&wsm.lock); spin_lock_bh(&wsm.lock);
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
while (!list_empty(&wsm.idle_ws[i])) { while (!list_empty(&wsm.idle_ws[i])) {
workspace = container_of(wsm.idle_ws[i].next, workspace = container_of(wsm.idle_ws[i].next,
...@@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void) ...@@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void)
zstd_free_workspace(&workspace->list); zstd_free_workspace(&workspace->list);
} }
} }
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
del_timer_sync(&wsm.timer); del_timer_sync(&wsm.timer);
} }
...@@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level) ...@@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
struct workspace *workspace; struct workspace *workspace;
int i = level - 1; int i = level - 1;
spin_lock(&wsm.lock); spin_lock_bh(&wsm.lock);
for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
if (!list_empty(&wsm.idle_ws[i])) { if (!list_empty(&wsm.idle_ws[i])) {
ws = wsm.idle_ws[i].next; ws = wsm.idle_ws[i].next;
...@@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level) ...@@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level)
list_del(&workspace->lru_list); list_del(&workspace->lru_list);
if (list_empty(&wsm.idle_ws[i])) if (list_empty(&wsm.idle_ws[i]))
clear_bit(i, &wsm.active_map); clear_bit(i, &wsm.active_map);
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
return ws; return ws;
} }
} }
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
return NULL; return NULL;
} }
...@@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws) ...@@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws)
{ {
struct workspace *workspace = list_to_workspace(ws); struct workspace *workspace = list_to_workspace(ws);
spin_lock(&wsm.lock); spin_lock_bh(&wsm.lock);
/* A node is only taken off the lru if we are the corresponding level */ /* A node is only taken off the lru if we are the corresponding level */
if (workspace->req_level == workspace->level) { if (workspace->req_level == workspace->level) {
...@@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws) ...@@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws)
list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]); list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
workspace->req_level = 0; workspace->req_level = 0;
spin_unlock(&wsm.lock); spin_unlock_bh(&wsm.lock);
if (workspace->level == ZSTD_BTRFS_MAX_LEVEL) if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
cond_wake_up(&wsm.wait); cond_wake_up(&wsm.wait);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment