Commit c3f4a9a2 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm/vmscan: centralise timeout values for reclaim_throttle

Neil Brown raised concerns about callers of reclaim_throttle specifying
a timeout value.  The original timeout values to congestion_wait() were
probably pulled out of thin air or copy&pasted from somewhere else.
This patch centralises the timeout values and selects a timeout based on
the reason for reclaim throttling.  These figures are also pulled out of
the same thin air but better values may be derived

Running a workload that is throttling for inappropriate periods and
tracing mm_vmscan_throttled can be used to pick a more appropriate
value.  Excessive throttling would pick a lower timeout where as
excessive CPU usage in reclaim context would select a larger timeout.
Ideally a large value would always be used and the wakeups would occur
before a timeout but that requires careful testing.

Link: https://lkml.kernel.org/r/20211022144651.19914-7-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 132b0d21
...@@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, ...@@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (cc->mode == MIGRATE_ASYNC) if (cc->mode == MIGRATE_ASYNC)
return -EAGAIN; return -EAGAIN;
reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -EINTR; return -EINTR;
......
...@@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn; ...@@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn;
*/ */
extern int isolate_lru_page(struct page *page); extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page); extern void putback_lru_page(struct page *page);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
long timeout);
/* /*
* in mm/rmap.c: * in mm/rmap.c:
......
...@@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) ...@@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
* guess as any. * guess as any.
*/ */
reclaim_throttle(NODE_DATA(numa_node_id()), reclaim_throttle(NODE_DATA(numa_node_id()),
VMSCAN_THROTTLE_WRITEBACK, HZ/50); VMSCAN_THROTTLE_WRITEBACK);
} }
/* /*
* Usually few pages are written by now from those we've just submitted * Usually few pages are written by now from those we've just submitted
......
...@@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping, ...@@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping,
unlock_page(page); unlock_page(page);
} }
void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
long timeout)
{ {
wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
long ret; long timeout, ret;
bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK);
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
/* /*
...@@ -1023,17 +1021,43 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, ...@@ -1023,17 +1021,43 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
current->flags & (PF_IO_WORKER|PF_KTHREAD)) current->flags & (PF_IO_WORKER|PF_KTHREAD))
return; return;
if (acct_writeback && /*
atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { * These figures are pulled out of thin air.
WRITE_ONCE(pgdat->nr_reclaim_start, * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many
node_page_state(pgdat, NR_THROTTLED_WRITTEN)); * parallel reclaimers which is a short-lived event so the timeout is
* short. Failing to make progress or waiting on writeback are
* potentially long-lived events so use a longer timeout. This is shaky
* logic as a failure to make progress could be due to anything from
* writeback to a slow device to excessive references pages at the tail
* of the inactive LRU.
*/
switch(reason) {
case VMSCAN_THROTTLE_WRITEBACK:
timeout = HZ/10;
if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
WRITE_ONCE(pgdat->nr_reclaim_start,
node_page_state(pgdat, NR_THROTTLED_WRITTEN));
}
break;
case VMSCAN_THROTTLE_NOPROGRESS:
timeout = HZ/10;
break;
case VMSCAN_THROTTLE_ISOLATED:
timeout = HZ/50;
break;
default:
WARN_ON_ONCE(1);
timeout = HZ;
break;
} }
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = schedule_timeout(timeout); ret = schedule_timeout(timeout);
finish_wait(wqh, &wait); finish_wait(wqh, &wait);
if (acct_writeback) if (reason == VMSCAN_THROTTLE_WRITEBACK)
atomic_dec(&pgdat->nr_writeback_throttled); atomic_dec(&pgdat->nr_writeback_throttled);
trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
...@@ -2318,7 +2342,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, ...@@ -2318,7 +2342,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
/* wait a bit for the reclaimer. */ /* wait a bit for the reclaimer. */
stalled = true; stalled = true;
reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);
/* We are about to die and free our memory. Return now. */ /* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
...@@ -3250,7 +3274,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) ...@@ -3250,7 +3274,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
* until some pages complete writeback. * until some pages complete writeback.
*/ */
if (sc->nr.immediate) if (sc->nr.immediate)
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
} }
/* /*
...@@ -3274,7 +3298,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) ...@@ -3274,7 +3298,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
if (!current_is_kswapd() && current_may_throttle() && if (!current_is_kswapd() && current_may_throttle() &&
!sc->hibernation_mode && !sc->hibernation_mode &&
test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc)) sc))
...@@ -3346,7 +3370,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) ...@@ -3346,7 +3370,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
/* Throttle if making no progress at high prioities. */ /* Throttle if making no progress at high prioities. */
if (sc->priority < DEF_PRIORITY - 2) if (sc->priority < DEF_PRIORITY - 2)
reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10); reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment