Commit 6b3ae58e authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

memcg: remove direct page_cgroup-to-page pointer

In struct page_cgroup, we have a full word for flags but only a few are
reserved.  Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.

This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5564e88b
#ifndef __LINUX_PAGE_CGROUP_H #ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H #define __LINUX_PAGE_CGROUP_H
enum {
/* flags for mem_cgroup */
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
/* No lock in page_cgroup */
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
__NR_PCG_FLAGS,
};
#ifndef __GENERATING_BOUNDS_H
#include <generated/bounds.h>
#ifdef CONFIG_CGROUP_MEM_RES_CTLR #ifdef CONFIG_CGROUP_MEM_RES_CTLR
#include <linux/bit_spinlock.h> #include <linux/bit_spinlock.h>
/* /*
* Page Cgroup can be considered as an extended mem_map. * Page Cgroup can be considered as an extended mem_map.
* A page_cgroup page is associated with every page descriptor. The * A page_cgroup page is associated with every page descriptor. The
...@@ -13,7 +31,6 @@ ...@@ -13,7 +31,6 @@
struct page_cgroup { struct page_cgroup {
unsigned long flags; unsigned long flags;
struct mem_cgroup *mem_cgroup; struct mem_cgroup *mem_cgroup;
struct page *page;
struct list_head lru; /* per cgroup LRU list */ struct list_head lru; /* per cgroup LRU list */
}; };
...@@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void) ...@@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void)
#endif #endif
struct page_cgroup *lookup_page_cgroup(struct page *page); struct page_cgroup *lookup_page_cgroup(struct page *page);
struct page *lookup_cgroup_page(struct page_cgroup *pc);
enum {
/* flags for mem_cgroup */
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
/* No lock in page_cgroup */
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
};
#define TESTPCGFLAG(uname, lname) \ #define TESTPCGFLAG(uname, lname) \
static inline int PageCgroup##uname(struct page_cgroup *pc) \ static inline int PageCgroup##uname(struct page_cgroup *pc) \
...@@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, ...@@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
local_irq_restore(*flags); local_irq_restore(*flags);
} }
#ifdef CONFIG_SPARSEMEM
#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT
#else
#define PCG_ARRAYID_WIDTH NODES_SHIFT
#endif
#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
#error Not enough space left in pc->flags to store page_cgroup array IDs
#endif
/* pc->flags: ARRAY-ID | FLAGS */
#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1)
#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH)
/*
* Zero the shift count for non-existant fields, to prevent compiler
* warnings and ensure references are optimized away.
*/
#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
unsigned long id)
{
pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
}
static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
{
return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
}
#else /* CONFIG_CGROUP_MEM_RES_CTLR */ #else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup; struct page_cgroup;
...@@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void) ...@@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void)
{ {
} }
#endif #endif /* CONFIG_CGROUP_MEM_RES_CTLR */
#include <linux/swap.h> #include <linux/swap.h>
...@@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type) ...@@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type)
return; return;
} }
#endif #endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
#endif
#endif /* !__GENERATING_BOUNDS_H */
#endif /* __LINUX_PAGE_CGROUP_H */
...@@ -9,11 +9,13 @@ ...@@ -9,11 +9,13 @@
#include <linux/page-flags.h> #include <linux/page-flags.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/kbuild.h> #include <linux/kbuild.h>
#include <linux/page_cgroup.h>
void foo(void) void foo(void)
{ {
/* The enum constants to put into include/generated/bounds.h */ /* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
/* End of constants */ /* End of constants */
} }
...@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, ...@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
if (unlikely(!PageCgroupUsed(pc))) if (unlikely(!PageCgroupUsed(pc)))
continue; continue;
page = pc->page; page = lookup_cgroup_page(pc);
if (unlikely(!PageLRU(page))) if (unlikely(!PageLRU(page)))
continue; continue;
...@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, ...@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
} }
spin_unlock_irqrestore(&zone->lru_lock, flags); spin_unlock_irqrestore(&zone->lru_lock, flags);
page = pc->page; page = lookup_cgroup_page(pc);
ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
if (ret == -ENOMEM) if (ret == -ENOMEM)
......
...@@ -11,12 +11,11 @@ ...@@ -11,12 +11,11 @@
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
static void __meminit static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
{ {
pc->flags = 0; pc->flags = 0;
set_page_cgroup_array_id(pc, id);
pc->mem_cgroup = NULL; pc->mem_cgroup = NULL;
pc->page = pfn_to_page(pfn);
INIT_LIST_HEAD(&pc->lru); INIT_LIST_HEAD(&pc->lru);
} }
static unsigned long total_usage; static unsigned long total_usage;
...@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) ...@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return base + offset; return base + offset;
} }
struct page *lookup_cgroup_page(struct page_cgroup *pc)
{
unsigned long pfn;
struct page *page;
pg_data_t *pgdat;
pgdat = NODE_DATA(page_cgroup_array_id(pc));
pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
page = pfn_to_page(pfn);
VM_BUG_ON(pc != lookup_page_cgroup(page));
return page;
}
static int __init alloc_node_page_cgroup(int nid) static int __init alloc_node_page_cgroup(int nid)
{ {
struct page_cgroup *base, *pc; struct page_cgroup *base, *pc;
...@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid) ...@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)
return -ENOMEM; return -ENOMEM;
for (index = 0; index < nr_pages; index++) { for (index = 0; index < nr_pages; index++) {
pc = base + index; pc = base + index;
__init_page_cgroup(pc, start_pfn + index); init_page_cgroup(pc, nid);
} }
NODE_DATA(nid)->node_page_cgroup = base; NODE_DATA(nid)->node_page_cgroup = base;
total_usage += table_size; total_usage += table_size;
...@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) ...@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return section->page_cgroup + pfn; return section->page_cgroup + pfn;
} }
struct page *lookup_cgroup_page(struct page_cgroup *pc)
{
struct mem_section *section;
struct page *page;
unsigned long nr;
nr = page_cgroup_array_id(pc);
section = __nr_to_section(nr);
page = pfn_to_page(pc - section->page_cgroup);
VM_BUG_ON(pc != lookup_page_cgroup(page));
return page;
}
/* __alloc_bootmem...() is protected by !slab_available() */ /* __alloc_bootmem...() is protected by !slab_available() */
static int __init_refok init_section_page_cgroup(unsigned long pfn) static int __init_refok init_section_page_cgroup(unsigned long pfn)
{ {
struct mem_section *section = __pfn_to_section(pfn);
struct page_cgroup *base, *pc; struct page_cgroup *base, *pc;
struct mem_section *section;
unsigned long table_size; unsigned long table_size;
unsigned long nr;
int nid, index; int nid, index;
if (!section->page_cgroup) { nr = pfn_to_section_nr(pfn);
nid = page_to_nid(pfn_to_page(pfn)); section = __nr_to_section(nr);
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
VM_BUG_ON(!slab_is_available()); if (section->page_cgroup)
if (node_state(nid, N_HIGH_MEMORY)) { return 0;
base = kmalloc_node(table_size,
GFP_KERNEL | __GFP_NOWARN, nid); nid = page_to_nid(pfn_to_page(pfn));
if (!base) table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
base = vmalloc_node(table_size, nid); VM_BUG_ON(!slab_is_available());
} else { if (node_state(nid, N_HIGH_MEMORY)) {
base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); base = kmalloc_node(table_size,
if (!base) GFP_KERNEL | __GFP_NOWARN, nid);
base = vmalloc(table_size); if (!base)
} base = vmalloc_node(table_size, nid);
/*
* The value stored in section->page_cgroup is (base - pfn)
* and it does not point to the memory block allocated above,
* causing kmemleak false positives.
*/
kmemleak_not_leak(base);
} else { } else {
/* base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
* We don't have to allocate page_cgroup again, but if (!base)
* address of memmap may be changed. So, we have to initialize base = vmalloc(table_size);
* again.
*/
base = section->page_cgroup + pfn;
table_size = 0;
/* check address of memmap is changed or not. */
if (base->page == pfn_to_page(pfn))
return 0;
} }
/*
* The value stored in section->page_cgroup is (base - pfn)
* and it does not point to the memory block allocated above,
* causing kmemleak false positives.
*/
kmemleak_not_leak(base);
if (!base) { if (!base) {
printk(KERN_ERR "page cgroup allocation failure\n"); printk(KERN_ERR "page cgroup allocation failure\n");
...@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) ...@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
for (index = 0; index < PAGES_PER_SECTION; index++) { for (index = 0; index < PAGES_PER_SECTION; index++) {
pc = base + index; pc = base + index;
__init_page_cgroup(pc, pfn + index); init_page_cgroup(pc, nr);
} }
section->page_cgroup = base - pfn; section->page_cgroup = base - pfn;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment