Commit d818fca1 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm/vmscan: throttle reclaim and compaction when too may pages are isolated

Page reclaim throttles on congestion if too many parallel reclaim
instances have isolated too many pages.  This makes no sense, excessive
parallelisation has nothing to do with writeback or congestion.

This patch creates an additional workqueue to sleep on when too many
pages are isolated.  The throttled tasks are woken when the number of
isolated pages is reduced or a timeout occurs.  There may be some false
positive wakeups for GFP_NOIO/GFP_NOFS callers but the tasks will
throttle again if necessary.

[shy828301@gmail.com: Wake up from compaction context]
[vbabka@suse.cz: Account number of throttled tasks only for writeback]

Link: https://lkml.kernel.org/r/20211022144651.19914-3-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8cd7c588
...@@ -275,6 +275,7 @@ enum lru_list { ...@@ -275,6 +275,7 @@ enum lru_list {
enum vmscan_throttle_state { enum vmscan_throttle_state {
VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_WRITEBACK,
VMSCAN_THROTTLE_ISOLATED,
NR_VMSCAN_THROTTLE, NR_VMSCAN_THROTTLE,
}; };
......
...@@ -28,10 +28,12 @@ ...@@ -28,10 +28,12 @@
) : "RECLAIM_WB_NONE" ) : "RECLAIM_WB_NONE"
#define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK)
#define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED)
#define show_throttle_flags(flags) \ #define show_throttle_flags(flags) \
(flags) ? __print_flags(flags, "|", \ (flags) ? __print_flags(flags, "|", \
{_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"} \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \
{_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"} \
) : "VMSCAN_THROTTLE_NONE" ) : "VMSCAN_THROTTLE_NONE"
......
...@@ -761,6 +761,8 @@ isolate_freepages_range(struct compact_control *cc, ...@@ -761,6 +761,8 @@ isolate_freepages_range(struct compact_control *cc,
/* Similar to reclaim, but different enough that they don't share logic */ /* Similar to reclaim, but different enough that they don't share logic */
static bool too_many_isolated(pg_data_t *pgdat) static bool too_many_isolated(pg_data_t *pgdat)
{ {
bool too_many;
unsigned long active, inactive, isolated; unsigned long active, inactive, isolated;
inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
...@@ -770,7 +772,11 @@ static bool too_many_isolated(pg_data_t *pgdat) ...@@ -770,7 +772,11 @@ static bool too_many_isolated(pg_data_t *pgdat)
isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
node_page_state(pgdat, NR_ISOLATED_ANON); node_page_state(pgdat, NR_ISOLATED_ANON);
return isolated > (inactive + active) / 2; too_many = isolated > (inactive + active) / 2;
if (!too_many)
wake_throttle_isolated(pgdat);
return too_many;
} }
/** /**
...@@ -822,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, ...@@ -822,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (cc->mode == MIGRATE_ASYNC) if (cc->mode == MIGRATE_ASYNC)
return -EAGAIN; return -EAGAIN;
congestion_wait(BLK_RW_ASYNC, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -EINTR; return -EINTR;
......
...@@ -45,6 +45,15 @@ static inline void acct_reclaim_writeback(struct page *page) ...@@ -45,6 +45,15 @@ static inline void acct_reclaim_writeback(struct page *page)
__acct_reclaim_writeback(pgdat, page, nr_throttled); __acct_reclaim_writeback(pgdat, page, nr_throttled);
} }
static inline void wake_throttle_isolated(pg_data_t *pgdat)
{
wait_queue_head_t *wqh;
wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
if (waitqueue_active(wqh))
wake_up(wqh);
}
vm_fault_t do_swap_page(struct vm_fault *vmf); vm_fault_t do_swap_page(struct vm_fault *vmf);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
...@@ -121,6 +130,8 @@ extern unsigned long highest_memmap_pfn; ...@@ -121,6 +130,8 @@ extern unsigned long highest_memmap_pfn;
*/ */
extern int isolate_lru_page(struct page *page); extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page); extern void putback_lru_page(struct page *page);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
long timeout);
/* /*
* in mm/rmap.c: * in mm/rmap.c:
......
...@@ -1006,12 +1006,12 @@ static void handle_write_error(struct address_space *mapping, ...@@ -1006,12 +1006,12 @@ static void handle_write_error(struct address_space *mapping,
unlock_page(page); unlock_page(page);
} }
static void void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
long timeout) long timeout)
{ {
wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
long ret; long ret;
bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK);
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
/* /*
...@@ -1023,7 +1023,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, ...@@ -1023,7 +1023,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
current->flags & (PF_IO_WORKER|PF_KTHREAD)) current->flags & (PF_IO_WORKER|PF_KTHREAD))
return; return;
if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { if (acct_writeback &&
atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
WRITE_ONCE(pgdat->nr_reclaim_start, WRITE_ONCE(pgdat->nr_reclaim_start,
node_page_state(pgdat, NR_THROTTLED_WRITTEN)); node_page_state(pgdat, NR_THROTTLED_WRITTEN));
} }
...@@ -1031,6 +1032,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, ...@@ -1031,6 +1032,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = schedule_timeout(timeout); ret = schedule_timeout(timeout);
finish_wait(wqh, &wait); finish_wait(wqh, &wait);
if (acct_writeback)
atomic_dec(&pgdat->nr_writeback_throttled); atomic_dec(&pgdat->nr_writeback_throttled);
trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
...@@ -2175,6 +2178,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, ...@@ -2175,6 +2178,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
struct scan_control *sc) struct scan_control *sc)
{ {
unsigned long inactive, isolated; unsigned long inactive, isolated;
bool too_many;
if (current_is_kswapd()) if (current_is_kswapd())
return 0; return 0;
...@@ -2198,7 +2202,13 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, ...@@ -2198,7 +2202,13 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
inactive >>= 3; inactive >>= 3;
return isolated > inactive; too_many = isolated > inactive;
/* Wake up tasks throttled due to too_many_isolated. */
if (!too_many)
wake_throttle_isolated(pgdat);
return too_many;
} }
/* /*
...@@ -2307,8 +2317,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, ...@@ -2307,8 +2317,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return 0; return 0;
/* wait a bit for the reclaimer. */ /* wait a bit for the reclaimer. */
msleep(100);
stalled = true; stalled = true;
reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
/* We are about to die and free our memory. Return now. */ /* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment