Commit 075db150 authored by Konstantin Khlebnikov's avatar Konstantin Khlebnikov Committed by Linus Torvalds

tools/vm/page-types.c: add memory cgroup dumping and filtering

This adds two command line keys:

 -c|--cgroup path|@inode	Walk only pages owned by this memory cgroup
 -C|--list-cgroup		Show memory cgroup inodes

[vdavydov@virtuozzo.com: opt_cgroup should be uint64_t.  Fix conflicts with "tools/vm/page-types.c: support swap entry"]
Signed-off-by: default avatarKonstantin Khlebnikov <koct9i@gmail.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: default avatarVladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent accf6242
...@@ -75,6 +75,7 @@ ...@@ -75,6 +75,7 @@
#define KPF_BYTES 8 #define KPF_BYTES 8
#define PROC_KPAGEFLAGS "/proc/kpageflags" #define PROC_KPAGEFLAGS "/proc/kpageflags"
#define PROC_KPAGECGROUP "/proc/kpagecgroup"
/* [32-] kernel hacking assistances */ /* [32-] kernel hacking assistances */
#define KPF_RESERVED 32 #define KPF_RESERVED 32
...@@ -168,7 +169,9 @@ static int opt_raw; /* for kernel developers */ ...@@ -168,7 +169,9 @@ static int opt_raw; /* for kernel developers */
static int opt_list; /* list pages (in ranges) */ static int opt_list; /* list pages (in ranges) */
static int opt_no_summary; /* don't show summary */ static int opt_no_summary; /* don't show summary */
static pid_t opt_pid; /* process to walk */ static pid_t opt_pid; /* process to walk */
const char * opt_file; const char * opt_file; /* file or directory path */
static uint64_t opt_cgroup; /* cgroup inode */
static int opt_list_cgroup;/* list page cgroup */
#define MAX_ADDR_RANGES 1024 #define MAX_ADDR_RANGES 1024
static int nr_addr_ranges; static int nr_addr_ranges;
...@@ -189,6 +192,7 @@ static int page_size; ...@@ -189,6 +192,7 @@ static int page_size;
static int pagemap_fd; static int pagemap_fd;
static int kpageflags_fd; static int kpageflags_fd;
static int kpagecgroup_fd = -1;
static int opt_hwpoison; static int opt_hwpoison;
static int opt_unpoison; static int opt_unpoison;
...@@ -282,6 +286,16 @@ static unsigned long kpageflags_read(uint64_t *buf, ...@@ -282,6 +286,16 @@ static unsigned long kpageflags_read(uint64_t *buf,
return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
} }
static unsigned long kpagecgroup_read(uint64_t *buf,
unsigned long index,
unsigned long pages)
{
if (kpagecgroup_fd < 0)
return pages;
return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages);
}
static unsigned long pagemap_read(uint64_t *buf, static unsigned long pagemap_read(uint64_t *buf,
unsigned long index, unsigned long index,
unsigned long pages) unsigned long pages)
...@@ -354,14 +368,15 @@ static char *page_flag_longname(uint64_t flags) ...@@ -354,14 +368,15 @@ static char *page_flag_longname(uint64_t flags)
*/ */
static void show_page_range(unsigned long voffset, unsigned long offset, static void show_page_range(unsigned long voffset, unsigned long offset,
unsigned long size, uint64_t flags) unsigned long size, uint64_t flags, uint64_t cgroup)
{ {
static uint64_t flags0; static uint64_t flags0;
static uint64_t cgroup0;
static unsigned long voff; static unsigned long voff;
static unsigned long index; static unsigned long index;
static unsigned long count; static unsigned long count;
if (flags == flags0 && offset == index + count && if (flags == flags0 && cgroup == cgroup0 && offset == index + count &&
size && voffset == voff + count) { size && voffset == voff + count) {
count += size; count += size;
return; return;
...@@ -372,11 +387,14 @@ static void show_page_range(unsigned long voffset, unsigned long offset, ...@@ -372,11 +387,14 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
printf("%lx\t", voff); printf("%lx\t", voff);
if (opt_file) if (opt_file)
printf("%lu\t", voff); printf("%lu\t", voff);
if (opt_list_cgroup)
printf("@%llu\t", (unsigned long long)cgroup0);
printf("%lx\t%lx\t%s\n", printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0)); index, count, page_flag_name(flags0));
} }
flags0 = flags; flags0 = flags;
cgroup0= cgroup;
index = offset; index = offset;
voff = voffset; voff = voffset;
count = size; count = size;
...@@ -384,16 +402,18 @@ static void show_page_range(unsigned long voffset, unsigned long offset, ...@@ -384,16 +402,18 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
static void flush_page_range(void) static void flush_page_range(void)
{ {
show_page_range(0, 0, 0, 0); show_page_range(0, 0, 0, 0, 0);
} }
static void show_page(unsigned long voffset, static void show_page(unsigned long voffset, unsigned long offset,
unsigned long offset, uint64_t flags) uint64_t flags, uint64_t cgroup)
{ {
if (opt_pid) if (opt_pid)
printf("%lx\t", voffset); printf("%lx\t", voffset);
if (opt_file) if (opt_file)
printf("%lu\t", voffset); printf("%lu\t", voffset);
if (opt_list_cgroup)
printf("@%llu\t", (unsigned long long)cgroup);
printf("%lx\t%s\n", offset, page_flag_name(flags)); printf("%lx\t%s\n", offset, page_flag_name(flags));
} }
...@@ -576,23 +596,26 @@ static size_t hash_slot(uint64_t flags) ...@@ -576,23 +596,26 @@ static size_t hash_slot(uint64_t flags)
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
static void add_page(unsigned long voffset, static void add_page(unsigned long voffset, unsigned long offset,
unsigned long offset, uint64_t flags, uint64_t pme) uint64_t flags, uint64_t cgroup, uint64_t pme)
{ {
flags = kpageflags_flags(flags, pme); flags = kpageflags_flags(flags, pme);
if (!bit_mask_ok(flags)) if (!bit_mask_ok(flags))
return; return;
if (opt_cgroup && cgroup != (uint64_t)opt_cgroup)
return;
if (opt_hwpoison) if (opt_hwpoison)
hwpoison_page(offset); hwpoison_page(offset);
if (opt_unpoison) if (opt_unpoison)
unpoison_page(offset); unpoison_page(offset);
if (opt_list == 1) if (opt_list == 1)
show_page_range(voffset, offset, 1, flags); show_page_range(voffset, offset, 1, flags, cgroup);
else if (opt_list == 2) else if (opt_list == 2)
show_page(voffset, offset, flags); show_page(voffset, offset, flags, cgroup);
nr_pages[hash_slot(flags)]++; nr_pages[hash_slot(flags)]++;
total_pages++; total_pages++;
...@@ -605,18 +628,24 @@ static void walk_pfn(unsigned long voffset, ...@@ -605,18 +628,24 @@ static void walk_pfn(unsigned long voffset,
uint64_t pme) uint64_t pme)
{ {
uint64_t buf[KPAGEFLAGS_BATCH]; uint64_t buf[KPAGEFLAGS_BATCH];
uint64_t cgi[KPAGEFLAGS_BATCH];
unsigned long batch; unsigned long batch;
unsigned long pages; unsigned long pages;
unsigned long i; unsigned long i;
memset(cgi, 0, sizeof cgi);
while (count) { while (count) {
batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
pages = kpageflags_read(buf, index, batch); pages = kpageflags_read(buf, index, batch);
if (pages == 0) if (pages == 0)
break; break;
if (kpagecgroup_read(cgi, index, pages) != pages)
fatal("kpagecgroup returned fewer pages than expected");
for (i = 0; i < pages; i++) for (i = 0; i < pages; i++)
add_page(voffset + i, index + i, buf[i], pme); add_page(voffset + i, index + i, buf[i], cgi[i], pme);
index += pages; index += pages;
count -= pages; count -= pages;
...@@ -630,10 +659,13 @@ static void walk_swap(unsigned long voffset, uint64_t pme) ...@@ -630,10 +659,13 @@ static void walk_swap(unsigned long voffset, uint64_t pme)
if (!bit_mask_ok(flags)) if (!bit_mask_ok(flags))
return; return;
if (opt_cgroup)
return;
if (opt_list == 1) if (opt_list == 1)
show_page_range(voffset, pagemap_swap_offset(pme), 1, flags); show_page_range(voffset, pagemap_swap_offset(pme), 1, flags, 0);
else if (opt_list == 2) else if (opt_list == 2)
show_page(voffset, pagemap_swap_offset(pme), flags); show_page(voffset, pagemap_swap_offset(pme), flags, 0);
nr_pages[hash_slot(flags)]++; nr_pages[hash_slot(flags)]++;
total_pages++; total_pages++;
...@@ -741,10 +773,12 @@ static void usage(void) ...@@ -741,10 +773,12 @@ static void usage(void)
" -d|--describe flags Describe flags\n" " -d|--describe flags Describe flags\n"
" -a|--addr addr-spec Walk a range of pages\n" " -a|--addr addr-spec Walk a range of pages\n"
" -b|--bits bits-spec Walk pages with specified bits\n" " -b|--bits bits-spec Walk pages with specified bits\n"
" -c|--cgroup path|@inode Walk pages within memory cgroup\n"
" -p|--pid pid Walk process address space\n" " -p|--pid pid Walk process address space\n"
" -f|--file filename Walk file address space\n" " -f|--file filename Walk file address space\n"
" -l|--list Show page details in ranges\n" " -l|--list Show page details in ranges\n"
" -L|--list-each Show page details one by one\n" " -L|--list-each Show page details one by one\n"
" -C|--list-cgroup Show cgroup inode for pages\n"
" -N|--no-summary Don't show summary info\n" " -N|--no-summary Don't show summary info\n"
" -X|--hwpoison hwpoison pages\n" " -X|--hwpoison hwpoison pages\n"
" -x|--unpoison unpoison pages\n" " -x|--unpoison unpoison pages\n"
...@@ -879,6 +913,7 @@ static void walk_file(const char *name, const struct stat *st) ...@@ -879,6 +913,7 @@ static void walk_file(const char *name, const struct stat *st)
{ {
uint8_t vec[PAGEMAP_BATCH]; uint8_t vec[PAGEMAP_BATCH];
uint64_t buf[PAGEMAP_BATCH], flags; uint64_t buf[PAGEMAP_BATCH], flags;
uint64_t cgroup = 0;
unsigned long nr_pages, pfn, i; unsigned long nr_pages, pfn, i;
off_t off, end = st->st_size; off_t off, end = st->st_size;
int fd; int fd;
...@@ -936,12 +971,15 @@ static void walk_file(const char *name, const struct stat *st) ...@@ -936,12 +971,15 @@ static void walk_file(const char *name, const struct stat *st)
continue; continue;
if (!kpageflags_read(&flags, pfn, 1)) if (!kpageflags_read(&flags, pfn, 1))
continue; continue;
if (!kpagecgroup_read(&cgroup, pfn, 1))
fatal("kpagecgroup_read failed");
if (first && opt_list) { if (first && opt_list) {
first = 0; first = 0;
flush_page_range(); flush_page_range();
show_file(name, st); show_file(name, st);
} }
add_page(off / page_size + i, pfn, flags, buf[i]); add_page(off / page_size + i, pfn,
flags, cgroup, buf[i]);
} }
} }
...@@ -993,6 +1031,24 @@ static void parse_file(const char *name) ...@@ -993,6 +1031,24 @@ static void parse_file(const char *name)
opt_file = name; opt_file = name;
} }
static void parse_cgroup(const char *path)
{
if (path[0] == '@') {
opt_cgroup = parse_number(path + 1);
return;
}
struct stat st;
if (stat(path, &st))
fatal("stat failed: %s: %m\n", path);
if (!S_ISDIR(st.st_mode))
fatal("cgroup supposed to be a directory: %s\n", path);
opt_cgroup = st.st_ino;
}
static void parse_addr_range(const char *optarg) static void parse_addr_range(const char *optarg)
{ {
unsigned long offset; unsigned long offset;
...@@ -1116,9 +1172,11 @@ static const struct option opts[] = { ...@@ -1116,9 +1172,11 @@ static const struct option opts[] = {
{ "file" , 1, NULL, 'f' }, { "file" , 1, NULL, 'f' },
{ "addr" , 1, NULL, 'a' }, { "addr" , 1, NULL, 'a' },
{ "bits" , 1, NULL, 'b' }, { "bits" , 1, NULL, 'b' },
{ "cgroup" , 1, NULL, 'c' },
{ "describe" , 1, NULL, 'd' }, { "describe" , 1, NULL, 'd' },
{ "list" , 0, NULL, 'l' }, { "list" , 0, NULL, 'l' },
{ "list-each" , 0, NULL, 'L' }, { "list-each" , 0, NULL, 'L' },
{ "list-cgroup", 0, NULL, 'C' },
{ "no-summary", 0, NULL, 'N' }, { "no-summary", 0, NULL, 'N' },
{ "hwpoison" , 0, NULL, 'X' }, { "hwpoison" , 0, NULL, 'X' },
{ "unpoison" , 0, NULL, 'x' }, { "unpoison" , 0, NULL, 'x' },
...@@ -1133,7 +1191,7 @@ int main(int argc, char *argv[]) ...@@ -1133,7 +1191,7 @@ int main(int argc, char *argv[])
page_size = getpagesize(); page_size = getpagesize();
while ((c = getopt_long(argc, argv, while ((c = getopt_long(argc, argv,
"rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) { "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) {
switch (c) { switch (c) {
case 'r': case 'r':
opt_raw = 1; opt_raw = 1;
...@@ -1150,6 +1208,12 @@ int main(int argc, char *argv[]) ...@@ -1150,6 +1208,12 @@ int main(int argc, char *argv[])
case 'b': case 'b':
parse_bits_mask(optarg); parse_bits_mask(optarg);
break; break;
case 'c':
parse_cgroup(optarg);
break;
case 'C':
opt_list_cgroup = 1;
break;
case 'd': case 'd':
describe_flags(optarg); describe_flags(optarg);
exit(0); exit(0);
...@@ -1179,10 +1243,15 @@ int main(int argc, char *argv[]) ...@@ -1179,10 +1243,15 @@ int main(int argc, char *argv[])
} }
} }
if (opt_cgroup || opt_list_cgroup)
kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY);
if (opt_list && opt_pid) if (opt_list && opt_pid)
printf("voffset\t"); printf("voffset\t");
if (opt_list && opt_file) if (opt_list && opt_file)
printf("foffset\t"); printf("foffset\t");
if (opt_list && opt_list_cgroup)
printf("cgroup\t");
if (opt_list == 1) if (opt_list == 1)
printf("offset\tlen\tflags\n"); printf("offset\tlen\tflags\n");
if (opt_list == 2) if (opt_list == 2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment