Commit 292d3867 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:
 "Misc updates:

   - fix e820 error handling

   - convert page table setup code from assembly to C

   - fix kexec environment bug

   - ... plus small cleanups"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/kconfig: Remove misleading note regarding hibernation and KASLR
  x86/boot: Fix KASLR and memmap= collision
  x86/e820/32: Fix e820_search_gap() error handling on x86-32
  x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C
  x86/e820: Make e820_search_gap() static and remove unused variables
parents 4abaa800 5773ebfe
...@@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE ...@@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE
theoretically possible, but the implementations are further theoretically possible, but the implementations are further
limited due to memory layouts. limited due to memory layouts.
If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
time. To enable it, boot with "kaslr" on the kernel command
line (which will also disable hibernation).
If unsure, say N. If unsure, say N.
# Relocation on x86 needs some additional build support # Relocation on x86 needs some additional build support
......
...@@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen); ...@@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen);
unsigned int atou(const char *s); unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
size_t strlen(const char *s); size_t strlen(const char *s);
char *strchr(const char *s, int c);
/* tty.c */ /* tty.c */
void puts(const char *); void puts(const char *);
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
*/ */
#include "misc.h" #include "misc.h"
#include "error.h" #include "error.h"
#include "../boot.h"
#include <generated/compile.h> #include <generated/compile.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void) ...@@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void)
#include "../../lib/kaslr.c" #include "../../lib/kaslr.c"
struct mem_vector { struct mem_vector {
unsigned long start; unsigned long long start;
unsigned long size; unsigned long long size;
}; };
/* Only supporting at most 4 unusable memmap regions with kaslr */
#define MAX_MEMMAP_REGIONS 4
static bool memmap_too_large;
enum mem_avoid_index { enum mem_avoid_index {
MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_ZO_RANGE = 0,
MEM_AVOID_INITRD, MEM_AVOID_INITRD,
MEM_AVOID_CMDLINE, MEM_AVOID_CMDLINE,
MEM_AVOID_BOOTPARAMS, MEM_AVOID_BOOTPARAMS,
MEM_AVOID_MEMMAP_BEGIN,
MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
MEM_AVOID_MAX, MEM_AVOID_MAX,
}; };
...@@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) ...@@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
return true; return true;
} }
/**
* _memparse - Parse a string with mem suffixes into a number
* @ptr: Where parse begins
* @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
* potentially suffixed with K, M, G, T, P, E.
*/
static unsigned long long _memparse(const char *ptr, char **retptr)
{
char *endptr; /* Local pointer to end of parsed string */
unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
switch (*endptr) {
case 'E':
case 'e':
ret <<= 10;
case 'P':
case 'p':
ret <<= 10;
case 'T':
case 't':
ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
case 'M':
case 'm':
ret <<= 10;
case 'K':
case 'k':
ret <<= 10;
endptr++;
default:
break;
}
if (retptr)
*retptr = endptr;
return ret;
}
static int
parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
{
char *oldp;
if (!p)
return -EINVAL;
/* We don't care about this option here */
if (!strncmp(p, "exactmap", 8))
return -EINVAL;
oldp = p;
*size = _memparse(p, &p);
if (p == oldp)
return -EINVAL;
switch (*p) {
case '@':
/* Skip this region, usable */
*start = 0;
*size = 0;
return 0;
case '#':
case '$':
case '!':
*start = _memparse(p + 1, &p);
return 0;
}
return -EINVAL;
}
static void mem_avoid_memmap(void)
{
char arg[128];
int rc;
int i;
char *str;
/* See if we have any memmap areas */
rc = cmdline_find_option("memmap", arg, sizeof(arg));
if (rc <= 0)
return;
i = 0;
str = arg;
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
unsigned long long start, size;
char *k = strchr(str, ',');
if (k)
*k++ = 0;
rc = parse_memmap(str, &start, &size);
if (rc < 0)
break;
str = k;
/* A usable region that should not be skipped */
if (size == 0)
continue;
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
i++;
}
/* More than 4 memmaps, fail kaslr */
if ((i >= MAX_MEMMAP_REGIONS) && str)
memmap_too_large = true;
}
/* /*
* In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
* The mem_avoid array is used to store the ranges that need to be avoided * The mem_avoid array is used to store the ranges that need to be avoided
...@@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, ...@@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* We don't need to set a mapping for setup_data. */ /* We don't need to set a mapping for setup_data. */
/* Mark the memmap regions we need to avoid */
mem_avoid_memmap();
#ifdef CONFIG_X86_VERBOSE_BOOTUP #ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */ /* Make sure video RAM can be used. */
add_identity_map(0, PMD_SIZE); add_identity_map(0, PMD_SIZE);
...@@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum, ...@@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
int i; int i;
unsigned long addr; unsigned long addr;
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n");
return 0;
}
/* Make sure minimum is aligned. */ /* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
...@@ -456,7 +590,7 @@ void choose_random_location(unsigned long input, ...@@ -456,7 +590,7 @@ void choose_random_location(unsigned long input,
/* Walk e820 and find a random address. */ /* Walk e820 and find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size); random_addr = find_random_phys_addr(min_addr, output_size);
if (!random_addr) { if (!random_addr) {
warn("KASLR disabled: could not find suitable E820 region!"); warn("Physical KASLR disabled: no suitable memory region!");
} else { } else {
/* Update the new physical address location. */ /* Update the new physical address location. */
if (*output != random_addr) { if (*output != random_addr) {
......
...@@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2) ...@@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2)
} }
return NULL; return NULL;
} }
/**
* strchr - Find the first occurrence of the character c in the string s.
* @s: the string to be searched
* @c: the character to search for
*/
char *strchr(const char *s, int c)
{
while (*s != (char)c)
if (*s++ == '\0')
return NULL;
return (char *)s;
}
...@@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, ...@@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
int checktype); int checktype);
extern void update_e820(void); extern void update_e820(void);
extern void e820_setup_gap(void); extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data; struct setup_data;
extern void parse_e820_ext(u64 phys_addr, u32 data_len); extern void parse_e820_ext(u64 phys_addr, u32 data_len);
......
...@@ -27,6 +27,7 @@ struct vm_area_struct; ...@@ -27,6 +27,7 @@ struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024]; extern pgd_t swapper_pg_dir[1024];
extern pgd_t initial_page_table[1024]; extern pgd_t initial_page_table[1024];
extern pmd_t initial_pg_pmd[];
static inline void pgtable_cache_init(void) { } static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { } static inline void check_pgt_cache(void) { }
...@@ -75,4 +76,35 @@ do { \ ...@@ -75,4 +76,35 @@ do { \
#define kern_addr_valid(kaddr) (0) #define kern_addr_valid(kaddr) (0)
#endif #endif
/*
* This is how much memory in addition to the memory covered up to
* and including _end we need mapped initially.
* We need:
* (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
* (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*
* KERNEL_IMAGE_SIZE should be greater than pa(_end)
* and small than max_low_pfn, otherwise will waste some page table entries
*/
#if PTRS_PER_PMD > 1
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
#else
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
/*
* Number of possible pages in the lowmem region.
*
* We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
* gas warning about overflowing shift count when gas has been compiled
* with only a host target support using a 32-bit type for internal
* representation.
*/
#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
#endif /* _ASM_X86_PGTABLE_32_H */ #endif /* _ASM_X86_PGTABLE_32_H */
...@@ -580,24 +580,19 @@ static void __init update_e820_saved(void) ...@@ -580,24 +580,19 @@ static void __init update_e820_saved(void)
} }
#define MAX_GAP_END 0x100000000ull #define MAX_GAP_END 0x100000000ull
/* /*
* Search for a gap in the e820 memory space from start_addr to end_addr. * Search for a gap in the e820 memory space from 0 to MAX_GAP_END.
*/ */
__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, static int __init e820_search_gap(unsigned long *gapstart,
unsigned long start_addr, unsigned long long end_addr) unsigned long *gapsize)
{ {
unsigned long long last; unsigned long long last = MAX_GAP_END;
int i = e820->nr_map; int i = e820->nr_map;
int found = 0; int found = 0;
last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
while (--i >= 0) { while (--i >= 0) {
unsigned long long start = e820->map[i].addr; unsigned long long start = e820->map[i].addr;
unsigned long long end = start + e820->map[i].size; unsigned long long end = start + e820->map[i].size;
if (end < start_addr)
continue;
/* /*
* Since "last" is at most 4GB, we know we'll * Since "last" is at most 4GB, we know we'll
* fit in 32 bits if this condition is true * fit in 32 bits if this condition is true
...@@ -628,18 +623,19 @@ __init void e820_setup_gap(void) ...@@ -628,18 +623,19 @@ __init void e820_setup_gap(void)
unsigned long gapstart, gapsize; unsigned long gapstart, gapsize;
int found; int found;
gapstart = 0x10000000;
gapsize = 0x400000; gapsize = 0x400000;
found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); found = e820_search_gap(&gapstart, &gapsize);
#ifdef CONFIG_X86_64
if (!found) { if (!found) {
#ifdef CONFIG_X86_64
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
printk(KERN_ERR printk(KERN_ERR
"e820: cannot find a gap in the 32bit address range\n" "e820: cannot find a gap in the 32bit address range\n"
"e820: PCI devices with unassigned 32bit BARs may break!\n"); "e820: PCI devices with unassigned 32bit BARs may break!\n");
} #else
gapstart = 0x10000000;
#endif #endif
}
/* /*
* e820_reserve_resources_late protect stolen RAM already * e820_reserve_resources_late protect stolen RAM already
......
...@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) ...@@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void)
start_kernel(); start_kernel();
} }
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end.
*
* In PAE mode initial_page_table is statically defined to contain
* enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD entries
* to the first kernel PMD. Note the upper half of each PMD or PTE are
* always zero at this stage.
*/
void __init mk_early_pgtbl_32(void)
{
#ifdef __pa
#undef __pa
#endif
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
pte_t pte, *ptep;
int i;
unsigned long *ptr;
/* Enough space to fit pagetables for the low memory linear map */
const unsigned long limit = __pa(_end) +
(PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT);
#ifdef CONFIG_X86_PAE
pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd);
#define SET_PL2(pl2, val) { (pl2).pmd = (val); }
#else
pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table);
#define SET_PL2(pl2, val) { (pl2).pgd = (val); }
#endif
ptep = (pte_t *)__pa(__brk_base);
pte.pte = PTE_IDENT_ATTR;
while ((pte.pte & PTE_PFN_MASK) < limit) {
SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR);
*pl2p = pl2;
#ifndef CONFIG_X86_PAE
/* Kernel PDE entry */
*(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2;
#endif
for (i = 0; i < PTRS_PER_PTE; i++) {
*ptep = pte;
pte.pte += PAGE_SIZE;
ptep++;
}
pl2p++;
}
ptr = (unsigned long *)__pa(&max_pfn_mapped);
/* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */
*ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
ptr = (unsigned long *)__pa(&_brk_end);
*ptr = (unsigned long)ptep + PAGE_OFFSET;
}
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <asm/nops.h> #include <asm/nops.h>
#include <asm/bootparam.h> #include <asm/bootparam.h>
#include <asm/export.h> #include <asm/export.h>
#include <asm/pgtable_32.h>
/* Physical address */ /* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET) #define pa(X) ((X) - __PAGE_OFFSET)
...@@ -41,43 +42,9 @@ ...@@ -41,43 +42,9 @@
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
* This is how much memory in addition to the memory covered up to
* and including _end we need mapped initially.
* We need:
* (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
* (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*
* KERNEL_IMAGE_SIZE should be greater than pa(_end)
* and small than max_low_pfn, otherwise will waste some page table entries
*/
#if PTRS_PER_PMD > 1
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
#else
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
#define SIZEOF_PTREGS 17*4 #define SIZEOF_PTREGS 17*4
/*
* Number of possible pages in the lowmem region.
*
* We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a
* gas warning about overflowing shift count when gas has been compiled
* with only a host target support using a 32-bit type for internal
* representation.
*/
LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)
/* Enough space to fit pagetables for the low memory linear map */
MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
/* /*
* Worst-case size of the kernel mapping we need to make: * Worst-case size of the kernel mapping we need to make:
* a relocatable kernel can live anywhere in lowmem, so we need to be able * a relocatable kernel can live anywhere in lowmem, so we need to be able
...@@ -160,90 +127,15 @@ ENTRY(startup_32) ...@@ -160,90 +127,15 @@ ENTRY(startup_32)
call load_ucode_bsp call load_ucode_bsp
#endif #endif
/* /* Create early pagetables. */
* Initialize page tables. This creates a PDE and a set of page call mk_early_pgtbl_32
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end.
*/
#ifdef CONFIG_X86_PAE
/*
* In PAE mode initial_page_table is statically defined to contain
* enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD entries
* to the first kernel PMD.
*
* Note the upper half of each PMD or PTE are always zero at this stage.
*/
#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(__brk_base), %edi
movl $pa(initial_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
movl %ecx,(%edx) /* Store PMD entry */
/* Upper half already zero */
addl $8,%edx
movl $512,%ecx
11:
stosl
xchgl %eax,%ebx
stosl
xchgl %eax,%ebx
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */ /* Do early initialization of the fixmap area */
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
#ifdef CONFIG_X86_PAE
#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */ #else
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(__brk_base), %edi
movl $pa(initial_page_table), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
movl $1024, %ecx
11:
stosl
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_page_table+0xffc) movl %eax,pa(initial_page_table+0xffc)
#endif #endif
...@@ -666,6 +558,7 @@ ENTRY(setup_once_ref) ...@@ -666,6 +558,7 @@ ENTRY(setup_once_ref)
__PAGE_ALIGNED_BSS __PAGE_ALIGNED_BSS
.align PAGE_SIZE .align PAGE_SIZE
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
.globl initial_pg_pmd
initial_pg_pmd: initial_pg_pmd:
.fill 1024*KPMDS,4,0 .fill 1024*KPMDS,4,0
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment