Commit 9ea9cb00 authored by Johannes Weiner's avatar Johannes Weiner Committed by Andrew Morton

mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement

Breno and Josef report a deadlock scenario from cgroup reclaim
re-entering the filesystem:

[  361.546690] ======================================================
[  361.559210] WARNING: possible circular locking dependency detected
[  361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S          E
[  361.589704] ------------------------------------------------------
[  361.602277] find/9315 is trying to acquire lock:
[  361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0
[  361.631437]
[  361.631437] but task is already holding lock:
[  361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40

[  362.904457]  mutex_lock_nested+0x1c/0x30
[  362.912414]  __btrfs_release_delayed_node+0x68/0x4f0
[  362.922460]  btrfs_evict_inode+0x301/0x770
[  362.982726]  evict+0x17c/0x380
[  362.988944]  prune_icache_sb+0x100/0x1d0
[  363.005559]  super_cache_scan+0x1f8/0x260
[  363.013695]  do_shrink_slab+0x2a2/0x540
[  363.021489]  shrink_slab_memcg+0x237/0x3d0
[  363.050606]  shrink_slab+0xa7/0x240
[  363.083382]  shrink_node_memcgs+0x262/0x3b0
[  363.091870]  shrink_node+0x1a4/0x720
[  363.099150]  shrink_zones+0x1f6/0x5d0
[  363.148798]  do_try_to_free_pages+0x19b/0x5e0
[  363.157633]  try_to_free_mem_cgroup_pages+0x266/0x370
[  363.190575]  reclaim_high+0x16f/0x1f0
[  363.208409]  mem_cgroup_handle_over_high+0x10b/0x270
[  363.246678]  try_charge_memcg+0xaf2/0xc70
[  363.304151]  charge_memcg+0xf0/0x350
[  363.320070]  __mem_cgroup_charge+0x28/0x40
[  363.328371]  __filemap_add_folio+0x870/0xd50
[  363.371303]  filemap_add_folio+0xdd/0x310
[  363.399696]  __filemap_get_folio+0x2fc/0x7d0
[  363.419086]  pagecache_get_page+0xe/0x30
[  363.427048]  alloc_extent_buffer+0x1cd/0x6a0
[  363.435704]  read_tree_block+0x43/0xc0
[  363.443316]  read_block_for_search+0x361/0x510
[  363.466690]  btrfs_search_slot+0xc8c/0x1520

This is caused by the mem_cgroup_handle_over_high() not respecting the
gfp_mask of the allocation context.  We used to only call this function on
resume to userspace, where no locks were held.  But c9afe31e ("memcg:
synchronously enforce memory.high for large overcharges") added a call
from the allocation context without considering the gfp.

Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org
Fixes: c9afe31e ("memcg: synchronously enforce memory.high for large overcharges")
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reported-by: default avatarBreno Leitao <leitao@debian.org>
Reported-by: default avatarJosef Bacik <josef@toxicpanda.com>
Acked-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: <stable@vger.kernel.org>	[5.17+]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 0c7752d5
...@@ -920,7 +920,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, ...@@ -920,7 +920,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
} }
void mem_cgroup_handle_over_high(void); void mem_cgroup_handle_over_high(gfp_t gfp_mask);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
...@@ -1458,7 +1458,7 @@ static inline void mem_cgroup_unlock_pages(void) ...@@ -1458,7 +1458,7 @@ static inline void mem_cgroup_unlock_pages(void)
rcu_read_unlock(); rcu_read_unlock();
} }
static inline void mem_cgroup_handle_over_high(void) static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{ {
} }
......
...@@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) ...@@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs)
} }
#endif #endif
mem_cgroup_handle_over_high(); mem_cgroup_handle_over_high(GFP_KERNEL);
blkcg_maybe_throttle_current(); blkcg_maybe_throttle_current();
rseq_handle_notify_resume(NULL, regs); rseq_handle_notify_resume(NULL, regs);
......
...@@ -2555,7 +2555,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, ...@@ -2555,7 +2555,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
* Scheduled by try_charge() to be executed from the userland return path * Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit. * and reclaims memory over the high limit.
*/ */
void mem_cgroup_handle_over_high(void) void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{ {
unsigned long penalty_jiffies; unsigned long penalty_jiffies;
unsigned long pflags; unsigned long pflags;
...@@ -2583,7 +2583,7 @@ void mem_cgroup_handle_over_high(void) ...@@ -2583,7 +2583,7 @@ void mem_cgroup_handle_over_high(void)
*/ */
nr_reclaimed = reclaim_high(memcg, nr_reclaimed = reclaim_high(memcg,
in_retry ? SWAP_CLUSTER_MAX : nr_pages, in_retry ? SWAP_CLUSTER_MAX : nr_pages,
GFP_KERNEL); gfp_mask);
/* /*
* memory.high is breached and reclaim is unable to keep up. Throttle * memory.high is breached and reclaim is unable to keep up. Throttle
...@@ -2819,7 +2819,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2819,7 +2819,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
!(current->flags & PF_MEMALLOC) && !(current->flags & PF_MEMALLOC) &&
gfpflags_allow_blocking(gfp_mask)) { gfpflags_allow_blocking(gfp_mask)) {
mem_cgroup_handle_over_high(); mem_cgroup_handle_over_high(gfp_mask);
} }
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment