Commit 0eb77e98 authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

vmstat: make vmstat_updater deferrable again and shut down on idle

Currently the vmstat updater is not deferrable as a result of commit
ba4877b9 ("vmstat: do not use deferrable delayed work for
vmstat_update").  This in turn can cause multiple interruptions of the
applications because the vmstat updater may run at

Make vmstate_update deferrable again and provide a function that folds
the differentials when the processor is going to idle mode thus
addressing the issue of the above commit in a clean way.

Note that the shepherd thread will continue scanning the differentials
from another processor and will reenable the vmstat workers if it
detects any changes.

Fixes: ba4877b9 ("vmstat: do not use deferrable delayed work for vmstat_update")
Signed-off-by: default avatarChristoph Lameter <cl@linux.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 686739f6
...@@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); ...@@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void quiet_vmstat(void);
void cpu_vm_stats_fold(int cpu); void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void); void refresh_zone_stat_thresholds(void);
...@@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page, ...@@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page,
static inline void refresh_zone_stat_thresholds(void) { } static inline void refresh_zone_stat_thresholds(void) { }
static inline void cpu_vm_stats_fold(int cpu) { } static inline void cpu_vm_stats_fold(int cpu) { }
static inline void quiet_vmstat(void) { }
static inline void drain_zonestat(struct zone *zone, static inline void drain_zonestat(struct zone *zone,
struct per_cpu_pageset *pset) { } struct per_cpu_pageset *pset) { }
......
...@@ -219,6 +219,7 @@ static void cpu_idle_loop(void) ...@@ -219,6 +219,7 @@ static void cpu_idle_loop(void)
*/ */
__current_set_polling(); __current_set_polling();
quiet_vmstat();
tick_nohz_idle_enter(); tick_nohz_idle_enter();
while (!need_resched()) { while (!need_resched()) {
......
...@@ -460,7 +460,7 @@ static int fold_diff(int *diff) ...@@ -460,7 +460,7 @@ static int fold_diff(int *diff)
* *
* The function returns the number of global counters updated. * The function returns the number of global counters updated.
*/ */
static int refresh_cpu_vm_stats(void) static int refresh_cpu_vm_stats(bool do_pagesets)
{ {
struct zone *zone; struct zone *zone;
int i; int i;
...@@ -484,33 +484,35 @@ static int refresh_cpu_vm_stats(void) ...@@ -484,33 +484,35 @@ static int refresh_cpu_vm_stats(void)
#endif #endif
} }
} }
cond_resched();
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* if (do_pagesets) {
* Deal with draining the remote pageset of this cond_resched();
* processor /*
* * Deal with draining the remote pageset of this
* Check if there are pages remaining in this pageset * processor
* if not then there is nothing to expire. *
*/ * Check if there are pages remaining in this pageset
if (!__this_cpu_read(p->expire) || * if not then there is nothing to expire.
*/
if (!__this_cpu_read(p->expire) ||
!__this_cpu_read(p->pcp.count)) !__this_cpu_read(p->pcp.count))
continue; continue;
/* /*
* We never drain zones local to this processor. * We never drain zones local to this processor.
*/ */
if (zone_to_nid(zone) == numa_node_id()) { if (zone_to_nid(zone) == numa_node_id()) {
__this_cpu_write(p->expire, 0); __this_cpu_write(p->expire, 0);
continue; continue;
} }
if (__this_cpu_dec_return(p->expire)) if (__this_cpu_dec_return(p->expire))
continue; continue;
if (__this_cpu_read(p->pcp.count)) { if (__this_cpu_read(p->pcp.count)) {
drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
changes++; changes++;
}
} }
#endif #endif
} }
...@@ -1386,7 +1388,7 @@ static cpumask_var_t cpu_stat_off; ...@@ -1386,7 +1388,7 @@ static cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w) static void vmstat_update(struct work_struct *w)
{ {
if (refresh_cpu_vm_stats()) { if (refresh_cpu_vm_stats(true)) {
/* /*
* Counters were updated so we expect more updates * Counters were updated so we expect more updates
* to occur in the future. Keep on running the * to occur in the future. Keep on running the
...@@ -1417,6 +1419,23 @@ static void vmstat_update(struct work_struct *w) ...@@ -1417,6 +1419,23 @@ static void vmstat_update(struct work_struct *w)
} }
} }
/*
* Switch off vmstat processing and then fold all the remaining differentials
* until the diffs stay at zero. The function is used by NOHZ and can only be
* invoked when tick processing is not active.
*/
void quiet_vmstat(void)
{
if (system_state != SYSTEM_RUNNING)
return;
do {
if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
cancel_delayed_work(this_cpu_ptr(&vmstat_work));
} while (refresh_cpu_vm_stats(false));
}
/* /*
* Check if the diffs for a certain cpu indicate that * Check if the diffs for a certain cpu indicate that
* an update is needed. * an update is needed.
...@@ -1449,7 +1468,7 @@ static bool need_update(int cpu) ...@@ -1449,7 +1468,7 @@ static bool need_update(int cpu)
*/ */
static void vmstat_shepherd(struct work_struct *w); static void vmstat_shepherd(struct work_struct *w);
static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd); static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
static void vmstat_shepherd(struct work_struct *w) static void vmstat_shepherd(struct work_struct *w)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment