Commit 70a64b79 authored by Shakeel Butt's avatar Shakeel Butt Committed by Andrew Morton

memcg: dynamically allocate lruvec_stats

To decouple the dependency of lruvec_stats on NR_VM_NODE_STAT_ITEMS, we
need to dynamically allocate lruvec_stats in the mem_cgroup_per_node
structure.  Also move the definition of lruvec_stats_percpu and
lruvec_stats and related functions to the memcontrol.c to facilitate later
patches.  No functional changes in the patch.

Link: https://lkml.kernel.org/r/20240501172617.678560-3-shakeel.butt@linux.devSigned-off-by: default avatarShakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: default avatarYosry Ahmed <yosryahmed@google.com>
Reviewed-by: default avatarT.J. Mercier <tjmercier@google.com>
Reviewed-by: default avatarRoman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 59142d87
...@@ -83,6 +83,8 @@ enum mem_cgroup_events_target { ...@@ -83,6 +83,8 @@ enum mem_cgroup_events_target {
struct memcg_vmstats_percpu; struct memcg_vmstats_percpu;
struct memcg_vmstats; struct memcg_vmstats;
struct lruvec_stats_percpu;
struct lruvec_stats;
struct mem_cgroup_reclaim_iter { struct mem_cgroup_reclaim_iter {
struct mem_cgroup *position; struct mem_cgroup *position;
...@@ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter { ...@@ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation; unsigned int generation;
}; };
struct lruvec_stats_percpu {
/* Local (CPU and cgroup) state */
long state[NR_VM_NODE_STAT_ITEMS];
/* Delta calculation for lockless upward propagation */
long state_prev[NR_VM_NODE_STAT_ITEMS];
};
struct lruvec_stats {
/* Aggregated (CPU and subtree) state */
long state[NR_VM_NODE_STAT_ITEMS];
/* Non-hierarchical (CPU aggregated) state */
long state_local[NR_VM_NODE_STAT_ITEMS];
/* Pending child counts during tree propagation */
long state_pending[NR_VM_NODE_STAT_ITEMS];
};
/* /*
* per-node information in memory controller. * per-node information in memory controller.
*/ */
...@@ -116,7 +99,7 @@ struct mem_cgroup_per_node { ...@@ -116,7 +99,7 @@ struct mem_cgroup_per_node {
struct lruvec lruvec; struct lruvec lruvec;
struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; struct lruvec_stats_percpu __percpu *lruvec_stats_percpu;
struct lruvec_stats lruvec_stats; struct lruvec_stats *lruvec_stats;
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
...@@ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page, ...@@ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page,
} }
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
static inline unsigned long lruvec_page_state(struct lruvec *lruvec, unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx) enum node_stat_item idx);
{
struct mem_cgroup_per_node *pn;
long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
x = READ_ONCE(pn->lruvec_stats.state[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
long x = 0;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
void mem_cgroup_flush_stats(struct mem_cgroup *memcg); void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
......
...@@ -575,6 +575,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) ...@@ -575,6 +575,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
return mz; return mz;
} }
struct lruvec_stats_percpu {
/* Local (CPU and cgroup) state */
long state[NR_VM_NODE_STAT_ITEMS];
/* Delta calculation for lockless upward propagation */
long state_prev[NR_VM_NODE_STAT_ITEMS];
};
struct lruvec_stats {
/* Aggregated (CPU and subtree) state */
long state[NR_VM_NODE_STAT_ITEMS];
/* Non-hierarchical (CPU aggregated) state */
long state_local[NR_VM_NODE_STAT_ITEMS];
/* Pending child counts during tree propagation */
long state_pending[NR_VM_NODE_STAT_ITEMS];
};
unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
x = READ_ONCE(pn->lruvec_stats->state[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
long x = 0;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
x = READ_ONCE(pn->lruvec_stats->state_local[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
#endif
return x;
}
/* Subset of vm_event_item to report for memcg event stats */ /* Subset of vm_event_item to report for memcg event stats */
static const unsigned int memcg_vm_event_stat[] = { static const unsigned int memcg_vm_event_stat[] = {
PGPGIN, PGPGIN,
...@@ -5486,18 +5540,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) ...@@ -5486,18 +5540,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn) if (!pn)
return 1; return 1;
pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), GFP_KERNEL,
node);
if (!pn->lruvec_stats)
goto fail;
pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu, pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
GFP_KERNEL_ACCOUNT); GFP_KERNEL_ACCOUNT);
if (!pn->lruvec_stats_percpu) { if (!pn->lruvec_stats_percpu)
kfree(pn); goto fail;
return 1;
}
lruvec_init(&pn->lruvec); lruvec_init(&pn->lruvec);
pn->memcg = memcg; pn->memcg = memcg;
memcg->nodeinfo[node] = pn; memcg->nodeinfo[node] = pn;
return 0; return 0;
fail:
kfree(pn->lruvec_stats);
kfree(pn);
return 1;
} }
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
...@@ -5508,6 +5569,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) ...@@ -5508,6 +5569,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
return; return;
free_percpu(pn->lruvec_stats_percpu); free_percpu(pn->lruvec_stats_percpu);
kfree(pn->lruvec_stats);
kfree(pn); kfree(pn);
} }
...@@ -5860,18 +5922,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) ...@@ -5860,18 +5922,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
for_each_node_state(nid, N_MEMORY) { for_each_node_state(nid, N_MEMORY) {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
struct mem_cgroup_per_node *ppn = NULL; struct lruvec_stats *lstats = pn->lruvec_stats;
struct lruvec_stats *plstats = NULL;
struct lruvec_stats_percpu *lstatc; struct lruvec_stats_percpu *lstatc;
if (parent) if (parent)
ppn = parent->nodeinfo[nid]; plstats = parent->nodeinfo[nid]->lruvec_stats;
lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
delta = pn->lruvec_stats.state_pending[i]; delta = lstats->state_pending[i];
if (delta) if (delta)
pn->lruvec_stats.state_pending[i] = 0; lstats->state_pending[i] = 0;
delta_cpu = 0; delta_cpu = 0;
v = READ_ONCE(lstatc->state[i]); v = READ_ONCE(lstatc->state[i]);
...@@ -5882,12 +5945,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) ...@@ -5882,12 +5945,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
} }
if (delta_cpu) if (delta_cpu)
pn->lruvec_stats.state_local[i] += delta_cpu; lstats->state_local[i] += delta_cpu;
if (delta) { if (delta) {
pn->lruvec_stats.state[i] += delta; lstats->state[i] += delta;
if (ppn) if (plstats)
ppn->lruvec_stats.state_pending[i] += delta; plstats->state_pending[i] += delta;
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment