Commit 04f8cfea authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm/page_alloc: adjust pcp->high after CPU hotplug events

The PCP high watermark is based on the number of online CPUs so the
watermarks must be adjusted during CPU hotplug.  At the time of
hot-remove, the number of online CPUs is already adjusted but during
hot-add, a delta needs to be applied to update PCP to the correct value.
After this patch is applied, the high watermarks are adjusted correctly.

  # grep high: /proc/zoneinfo  | tail -1
              high:  649
  # echo 0 > /sys/devices/system/cpu/cpu4/online
  # grep high: /proc/zoneinfo  | tail -1
              high:  664
  # echo 1 > /sys/devices/system/cpu/cpu4/online
  # grep high: /proc/zoneinfo  | tail -1
              high:  649

Link: https://lkml.kernel.org/r/20210525080119.5455-4-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b92ca18e
......@@ -54,7 +54,7 @@ enum cpuhp_state {
CPUHP_MM_MEMCQ_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
CPUHP_PAGE_ALLOC_DEAD,
CPUHP_PAGE_ALLOC,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_IOVA_DEAD,
......
......@@ -206,7 +206,7 @@ extern int user_min_free_kbytes;
extern void free_unref_page(struct page *page);
extern void free_unref_page_list(struct list_head *list);
extern void zone_pcp_update(struct zone *zone);
extern void zone_pcp_update(struct zone *zone, int cpu_online);
extern void zone_pcp_reset(struct zone *zone);
extern void zone_pcp_disable(struct zone *zone);
extern void zone_pcp_enable(struct zone *zone);
......
......@@ -6667,7 +6667,7 @@ static int zone_batchsize(struct zone *zone)
#endif
}
static int zone_highsize(struct zone *zone, int batch)
static int zone_highsize(struct zone *zone, int batch, int cpu_online)
{
#ifdef CONFIG_MMU
int high;
......@@ -6678,9 +6678,10 @@ static int zone_highsize(struct zone *zone, int batch)
* so that if they are full then background reclaim will not be
* started prematurely. The value is split across all online CPUs
* local to the zone. Note that early in boot that CPUs may not be
* online yet.
* online yet and that during CPU hotplug that the cpumask is not
* yet updated when a CPU is being onlined.
*/
nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
high = low_wmark_pages(zone) / nr_local_cpus;
/*
......@@ -6754,12 +6755,12 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
* Calculate and set new high and batch values for all per-cpu pagesets of a
* zone based on the zone's size.
*/
static void zone_set_pageset_high_and_batch(struct zone *zone)
static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
{
int new_high, new_batch;
new_batch = max(1, zone_batchsize(zone));
new_high = zone_highsize(zone, new_batch);
new_high = zone_highsize(zone, new_batch, cpu_online);
if (zone->pageset_high == new_high &&
zone->pageset_batch == new_batch)
......@@ -6789,7 +6790,7 @@ void __meminit setup_zone_pageset(struct zone *zone)
per_cpu_pages_init(pcp, pzstats);
}
zone_set_pageset_high_and_batch(zone);
zone_set_pageset_high_and_batch(zone, 0);
}
/*
......@@ -8044,6 +8045,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
static int page_alloc_cpu_dead(unsigned int cpu)
{
struct zone *zone;
lru_add_drain_cpu(cpu);
drain_pages(cpu);
......@@ -8064,6 +8066,19 @@ static int page_alloc_cpu_dead(unsigned int cpu)
* race with what we are doing.
*/
cpu_vm_stats_fold(cpu);
for_each_populated_zone(zone)
zone_pcp_update(zone, 0);
return 0;
}
static int page_alloc_cpu_online(unsigned int cpu)
{
struct zone *zone;
for_each_populated_zone(zone)
zone_pcp_update(zone, 1);
return 0;
}
......@@ -8089,8 +8104,9 @@ void __init page_alloc_init(void)
hashdist = 0;
#endif
ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
"mm/page_alloc:dead", NULL,
ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
"mm/page_alloc:pcp",
page_alloc_cpu_online,
page_alloc_cpu_dead);
WARN_ON(ret < 0);
}
......@@ -8252,7 +8268,7 @@ void setup_per_zone_wmarks(void)
* and high limits or the limits may be inappropriate.
*/
for_each_zone(zone)
zone_pcp_update(zone);
zone_pcp_update(zone, 0);
}
/*
......@@ -9053,10 +9069,10 @@ EXPORT_SYMBOL(free_contig_range);
* The zone indicated has a new number of managed_pages; batch sizes and percpu
* page high values need to be recalculated.
*/
void __meminit zone_pcp_update(struct zone *zone)
void zone_pcp_update(struct zone *zone, int cpu_online)
{
mutex_lock(&pcp_batch_high_lock);
zone_set_pageset_high_and_batch(zone);
zone_set_pageset_high_and_batch(zone, cpu_online);
mutex_unlock(&pcp_batch_high_lock);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment