• Josef Bacik's avatar
    btrfs: use nofs allocations for running delayed items · 351cbf6e
    Josef Bacik authored
    Zygo reported the following lockdep splat while testing the balance
    patches
    
    ======================================================
    WARNING: possible circular locking dependency detected
    5.6.0-c6f0579d496a+ #53 Not tainted
    ------------------------------------------------------
    kswapd0/1133 is trying to acquire lock:
    ffff888092f622c0 (&delayed_node->mutex){+.+.}, at: __btrfs_release_delayed_node+0x7c/0x5b0
    
    but task is already holding lock:
    ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
    
    which lock already depends on the new lock.
    
    the existing dependency chain (in reverse order) is:
    
    -> #1 (fs_reclaim){+.+.}:
           fs_reclaim_acquire.part.91+0x29/0x30
           fs_reclaim_acquire+0x19/0x20
           kmem_cache_alloc_trace+0x32/0x740
           add_block_entry+0x45/0x260
           btrfs_ref_tree_mod+0x6e2/0x8b0
           btrfs_alloc_tree_block+0x789/0x880
           alloc_tree_block_no_bg_flush+0xc6/0xf0
           __btrfs_cow_block+0x270/0x940
           btrfs_cow_block+0x1ba/0x3a0
           btrfs_search_slot+0x999/0x1030
           btrfs_insert_empty_items+0x81/0xe0
           btrfs_insert_delayed_items+0x128/0x7d0
           __btrfs_run_delayed_items+0xf4/0x2a0
           btrfs_run_delayed_items+0x13/0x20
           btrfs_commit_transaction+0x5cc/0x1390
           insert_balance_item.isra.39+0x6b2/0x6e0
           btrfs_balance+0x72d/0x18d0
           btrfs_ioctl_balance+0x3de/0x4c0
           btrfs_ioctl+0x30ab/0x44a0
           ksys_ioctl+0xa1/0xe0
           __x64_sys_ioctl+0x43/0x50
           do_syscall_64+0x77/0x2c0
           entry_SYSCALL_64_after_hwframe+0x49/0xbe
    
    -> #0 (&delayed_node->mutex){+.+.}:
           __lock_acquire+0x197e/0x2550
           lock_acquire+0x103/0x220
           __mutex_lock+0x13d/0xce0
           mutex_lock_nested+0x1b/0x20
           __btrfs_release_delayed_node+0x7c/0x5b0
           btrfs_remove_delayed_node+0x49/0x50
           btrfs_evict_inode+0x6fc/0x900
           evict+0x19a/0x2c0
           dispose_list+0xa0/0xe0
           prune_icache_sb+0xbd/0xf0
           super_cache_scan+0x1b5/0x250
           do_shrink_slab+0x1f6/0x530
           shrink_slab+0x32e/0x410
           shrink_node+0x2a5/0xba0
           balance_pgdat+0x4bd/0x8a0
           kswapd+0x35a/0x800
           kthread+0x1e9/0x210
           ret_from_fork+0x3a/0x50
    
    other info that might help us debug this:
    
     Possible unsafe locking scenario:
    
           CPU0                    CPU1
           ----                    ----
      lock(fs_reclaim);
                                   lock(&delayed_node->mutex);
                                   lock(fs_reclaim);
      lock(&delayed_node->mutex);
    
     *** DEADLOCK ***
    
    3 locks held by kswapd0/1133:
     #0: ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30
     #1: ffffffff8fc380d8 (shrinker_rwsem){++++}, at: shrink_slab+0x1e8/0x410
     #2: ffff8881e0e6c0e8 (&type->s_umount_key#42){++++}, at: trylock_super+0x1b/0x70
    
    stack backtrace:
    CPU: 2 PID: 1133 Comm: kswapd0 Not tainted 5.6.0-c6f0579d496a+ #53
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
    Call Trace:
     dump_stack+0xc1/0x11a
     print_circular_bug.isra.38.cold.57+0x145/0x14a
     check_noncircular+0x2a9/0x2f0
     ? print_circular_bug.isra.38+0x130/0x130
     ? stack_trace_consume_entry+0x90/0x90
     ? save_trace+0x3cc/0x420
     __lock_acquire+0x197e/0x2550
     ? btrfs_inode_clear_file_extent_range+0x9b/0xb0
     ? register_lock_class+0x960/0x960
     lock_acquire+0x103/0x220
     ? __btrfs_release_delayed_node+0x7c/0x5b0
     __mutex_lock+0x13d/0xce0
     ? __btrfs_release_delayed_node+0x7c/0x5b0
     ? __asan_loadN+0xf/0x20
     ? pvclock_clocksource_read+0xeb/0x190
     ? __btrfs_release_delayed_node+0x7c/0x5b0
     ? mutex_lock_io_nested+0xc20/0xc20
     ? __kasan_check_read+0x11/0x20
     ? check_chain_key+0x1e6/0x2e0
     mutex_lock_nested+0x1b/0x20
     ? mutex_lock_nested+0x1b/0x20
     __btrfs_release_delayed_node+0x7c/0x5b0
     btrfs_remove_delayed_node+0x49/0x50
     btrfs_evict_inode+0x6fc/0x900
     ? btrfs_setattr+0x840/0x840
     ? do_raw_spin_unlock+0xa8/0x140
     evict+0x19a/0x2c0
     dispose_list+0xa0/0xe0
     prune_icache_sb+0xbd/0xf0
     ? invalidate_inodes+0x310/0x310
     super_cache_scan+0x1b5/0x250
     do_shrink_slab+0x1f6/0x530
     shrink_slab+0x32e/0x410
     ? do_shrink_slab+0x530/0x530
     ? do_shrink_slab+0x530/0x530
     ? __kasan_check_read+0x11/0x20
     ? mem_cgroup_protected+0x13d/0x260
     shrink_node+0x2a5/0xba0
     balance_pgdat+0x4bd/0x8a0
     ? mem_cgroup_shrink_node+0x490/0x490
     ? _raw_spin_unlock_irq+0x27/0x40
     ? finish_task_switch+0xce/0x390
     ? rcu_read_lock_bh_held+0xb0/0xb0
     kswapd+0x35a/0x800
     ? _raw_spin_unlock_irqrestore+0x4c/0x60
     ? balance_pgdat+0x8a0/0x8a0
     ? finish_wait+0x110/0x110
     ? __kasan_check_read+0x11/0x20
     ? __kthread_parkme+0xc6/0xe0
     ? balance_pgdat+0x8a0/0x8a0
     kthread+0x1e9/0x210
     ? kthread_create_worker_on_cpu+0xc0/0xc0
     ret_from_fork+0x3a/0x50
    
    This is because we hold that delayed node's mutex while doing tree
    operations.  Fix this by just wrapping the searches in nofs.
    
    CC: stable@vger.kernel.org # 4.4+
    Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
    Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
    Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
    351cbf6e
delayed-inode.c 51.9 KB