Commit fe770bf0 authored by H. Peter Anvin's avatar H. Peter Anvin Committed by Ingo Molnar

x86: clean up the page table dumper and add 32-bit support

Clean up the page table dumper (fix boundary conditions, table driven
address ranges, some formatting changes since it is no longer using
the kernel log but a separate virtual file), and generalize to 32
bits.

[ mingo@elte.hu: x86: fix the pagetable dumper ]
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 926e5392
......@@ -56,7 +56,7 @@ config DEBUG_PER_CPU_MAPS
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on X86_64
depends on DEBUG_KERNEL
select DEBUG_FS
help
Say Y here if you want to show the kernel pagetable layout in a
......
......@@ -3,6 +3,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o
obj-$(CONFIG_X86_32) += pgtable_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
......@@ -12,5 +13,4 @@ else
obj-$(CONFIG_NUMA) += numa_64.o
obj-$(CONFIG_K8_NUMA) += k8topology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_64.o
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
endif
......@@ -12,9 +12,10 @@
* of the License.
*/
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <asm/pgtable.h>
......@@ -28,73 +29,107 @@ struct pg_state {
pgprot_t current_prot;
unsigned long start_address;
unsigned long current_address;
int printed_vmalloc;
int printed_modules;
int printed_vmemmap;
int printed_highmap;
const struct addr_marker *marker;
};
/* Multipliers for offsets within the PTEs */
#define LEVEL_4_MULT (PAGE_SIZE)
#define LEVEL_3_MULT (512UL * LEVEL_4_MULT)
#define LEVEL_2_MULT (512UL * LEVEL_3_MULT)
#define LEVEL_1_MULT (512UL * LEVEL_2_MULT)
struct addr_marker {
unsigned long start_address;
const char *name;
};
/* Address space markers hints */
static struct addr_marker address_markers[] = {
{ 0, "User Space" },
#ifdef CONFIG_X86_64
{ 0x8000000000000000UL, "Kernel Space" },
{ 0xffff810000000000UL, "Low Kernel Mapping" },
{ VMALLOC_START, "vmalloc() Area" },
{ MODULES_VADDR, "Modules" },
{ MODULES_END, "End Modules" },
{ VMEMMAP_START, "Vmemmap" },
{ __START_KERNEL_map, "High Kernel Mapping" },
#else
{ PAGE_OFFSET, "Kernel Mapping" },
{ 0/* VMALLOC_START */, "vmalloc() Area" },
{ 0/*VMALLOC_END*/, "vmalloc() End" },
# ifdef CONFIG_HIGHMEM
{ 0/*PKMAP_BASE*/, "Persisent kmap() Area" },
# endif
{ 0/*FIXADDR_START*/, "Fixmap Area" },
#endif
{ -1, NULL } /* End of list */
};
/* Multipliers for offsets within the PTEs */
#define PTE_LEVEL_MULT (PAGE_SIZE)
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
/*
* Print a readable form of a pgprot_t to the seq_file
*/
static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
{
unsigned long pr = pgprot_val(prot);
if (pr & _PAGE_USER)
seq_printf(m, "USR ");
else
seq_printf(m, " ");
if (pr & _PAGE_RW)
seq_printf(m, "RW ");
else
seq_printf(m, "ro ");
if (pr & _PAGE_PWT)
seq_printf(m, "PWT ");
else
seq_printf(m, " ");
if (pr & _PAGE_PCD)
seq_printf(m, "PCD ");
else
seq_printf(m, " ");
/* Bit 9 has a different meaning on level 3 vs 4 */
if (level <= 3) {
if (pr & _PAGE_PSE)
seq_printf(m, "PSE ");
pgprotval_t pr = pgprot_val(prot);
static const char * const level_name[] =
{ "cr3", "pgd", "pud", "pmd", "pte" };
if (!pgprot_val(prot)) {
/* Not present */
seq_printf(m, " ");
} else {
if (pr & _PAGE_USER)
seq_printf(m, "USR ");
else
seq_printf(m, " ");
} else {
if (pr & _PAGE_PAT)
seq_printf(m, "pat ");
if (pr & _PAGE_RW)
seq_printf(m, "RW ");
else
seq_printf(m, "ro ");
if (pr & _PAGE_PWT)
seq_printf(m, "PWT ");
else
seq_printf(m, " ");
if (pr & _PAGE_PCD)
seq_printf(m, "PCD ");
else
seq_printf(m, " ");
/* Bit 9 has a different meaning on level 3 vs 4 */
if (level <= 3) {
if (pr & _PAGE_PSE)
seq_printf(m, "PSE ");
else
seq_printf(m, " ");
} else {
if (pr & _PAGE_PAT)
seq_printf(m, "pat ");
else
seq_printf(m, " ");
}
if (pr & _PAGE_GLOBAL)
seq_printf(m, "GLB ");
else
seq_printf(m, " ");
if (pr & _PAGE_NX)
seq_printf(m, "NX ");
else
seq_printf(m, "x ");
}
if (pr & _PAGE_GLOBAL)
seq_printf(m, "GLB ");
else
seq_printf(m, " ");
if (pr & _PAGE_NX)
seq_printf(m, "NX ");
else
seq_printf(m, "x ");
seq_printf(m, "%s\n", level_name[level]);
}
/*
* Sign-extend the 48 bit address to 64 bit
* On 64 bits, sign-extend the 48 bit address to 64 bit
*/
static unsigned long sign_extend(unsigned long u)
static unsigned long normalize_addr(unsigned long u)
{
if (u>>47)
u = u | (0xffffUL << 48);
#ifdef CONFIG_X86_64
return (signed long)(u << 16) >> 16;
#else
return u;
#endif
}
/*
......@@ -103,81 +138,62 @@ static unsigned long sign_extend(unsigned long u)
* print what we collected so far.
*/
static void note_page(struct seq_file *m, struct pg_state *st,
pgprot_t new_prot, int level)
pgprot_t new_prot, int level)
{
unsigned long prot, cur;
pgprotval_t prot, cur;
static const char units[] = "KMGTPE";
/*
* If we have a "break" in the series, we need to flush the state that
* we have now. "break" is either changing perms or a different level.
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
prot = pgprot_val(new_prot) & ~(PTE_MASK);
cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
if ((prot != cur || level != st->level) &&
st->current_address != st->start_address) {
char unit = 'K';
if (!st->level) {
/* First entry */
st->current_prot = new_prot;
st->level = level;
st->marker = address_markers;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
} else if (prot != cur || level != st->level ||
st->current_address >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
/*
* We print markers for special areas of address space,
* such as the start of vmalloc space etc.
* This helps in the interpretation.
*/
if (!st->printed_vmalloc &&
st->start_address >= VMALLOC_START) {
seq_printf(m, "---[ VMALLOC SPACE ]---\n");
st->printed_vmalloc = 1;
}
if (!st->printed_modules &&
st->start_address >= MODULES_VADDR) {
seq_printf(m, "---[ MODULES SPACE ]---\n");
st->printed_modules = 1;
}
if (st->printed_modules < 2 &&
st->start_address >= MODULES_END) {
seq_printf(m, "---[ END MODULES SPACE ]---\n");
st->printed_modules = 2;
}
if (!st->printed_vmemmap &&
st->start_address >= VMEMMAP_START) {
seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
st->printed_vmemmap = 1;
}
if (!st->printed_highmap &&
st->start_address >= __START_KERNEL_map) {
seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
st->printed_highmap = 1;
}
/*
* Now print the actual finished series
*/
seq_printf(m, "[ %016lx - %016lx ",
st->start_address, st->current_address);
seq_printf(m, "0x%p-0x%p ",
(void *)st->start_address,
(void *)st->current_address);
delta = (st->current_address - st->start_address) >> 10;
if ((delta & 1023) == 0) {
delta = delta >> 10;
unit = 'M';
while (!(delta & 1023) && unit[1]) {
delta >>= 10;
unit++;
}
if (pgprot_val(st->current_prot)) {
seq_printf(m, "Size %9lu%cb ", delta, unit);
printk_prot(m, st->current_prot, st->level);
seq_printf(m, "L%i]\n", st->level);
} else {
/* don't print protections on non-present memory */
seq_printf(m, "%14lu%cb", delta, unit);
seq_printf(m, " L%i]\n",
st->level);
seq_printf(m, "%9lu%c ", delta, *unit);
printk_prot(m, st->current_prot, st->level);
/*
* We print markers for special areas of address space,
* such as the start of vmalloc space etc.
* This helps in the interpretation.
*/
if (st->current_address >= st->marker[1].start_address) {
st->marker++;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
st->start_address = st->current_address;
st->current_prot = new_prot;
st->level = level;
};
}
}
static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
unsigned long P)
{
int i;
......@@ -187,14 +203,15 @@ static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
for (i = 0; i < PTRS_PER_PTE; i++) {
pgprot_t prot = pte_pgprot(*start);
st->current_address = sign_extend(P + i * LEVEL_4_MULT);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
note_page(m, st, prot, 4);
start++;
}
}
#if PTRS_PER_PMD > 1
static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
unsigned long P)
{
int i;
......@@ -202,25 +219,30 @@ static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
start = (pmd_t *) pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = sign_extend(P + i * LEVEL_3_MULT);
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
unsigned long prot;
pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
prot = pmd_val(*start) & ~(PTE_MASK);
/* Deal with 2Mb pages */
if (pmd_large(*start))
if (pmd_large(*start) || !pmd_present(*start))
note_page(m, st, __pgprot(prot), 3);
else
walk_level_4(m, st, *start,
P + i * LEVEL_3_MULT);
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
} else
note_page(m, st, __pgprot(0), 3);
start++;
}
}
#else
#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
#define pud_none(a) pmd_none(__pmd(pud_val(a)))
#endif
static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
#if PTRS_PER_PUD > 1
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
unsigned long P)
{
int i;
......@@ -229,16 +251,15 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
unsigned long prot;
pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
prot = pud_val(*start) & ~(PTE_MASK);
/* Deal with 1Gb pages */
if (pud_large(*start))
if (pud_large(*start) || !pud_present(*start))
note_page(m, st, __pgprot(prot), 2);
else
walk_level_3(m, st, *start,
P + i * LEVEL_2_MULT);
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
} else
note_page(m, st, __pgprot(0), 2);
......@@ -246,28 +267,48 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}
static void walk_level_1(struct seq_file *m)
#else
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
#define pgd_large(a) pud_large(__pud(pgd_val(a)))
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
static void walk_pgd_level(struct seq_file *m)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
pgd_t *start = swapper_pg_dir;
#endif
int i;
struct pg_state st;
memset(&st, 0, sizeof(st));
st.level = 1;
for (i = 0; i < PTRS_PER_PGD; i++) {
if (!pgd_none(*start))
walk_level_2(m, &st, *start, i * LEVEL_1_MULT);
else
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
if (pgd_large(*start) || !pgd_present(*start))
note_page(m, &st, __pgprot(prot), 1);
else
walk_pud_level(m, &st, *start,
i * PGD_LEVEL_MULT);
} else
note_page(m, &st, __pgprot(0), 1);
start++;
}
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
note_page(m, &st, __pgprot(0), 0);
}
static int ptdump_show(struct seq_file *m, void *v)
{
seq_puts(m, "Kernel pagetable dump\n");
walk_level_1(m);
walk_pgd_level(m);
return 0;
}
......@@ -287,6 +328,18 @@ int pt_dump_init(void)
{
struct dentry *pe;
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[2].start_address = VMALLOC_START;
address_markers[3].start_address = VMALLOC_END;
# ifdef CONFIG_HIGHMEM
address_markers[4].start_address = PKMAP_BASE;
address_markers[5].start_address = FIXADDR_START;
# else
address_markers[4].start_address = FIXADDR_START;
# endif
#endif
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment