Commit 93dbda7c authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by H. Peter Anvin

x86: add brk allocation for very, very early allocations

Impact: new interface

Add a brk()-like allocator which effectively extends the bss in order
to allow very early code to do dynamic allocations.  This is better than
using statically allocated arrays for data in subsystems which may never
get used.

The space for brk allocations is in the bss ELF segment, so that the
space is mapped properly by the code which maps the kernel, and so
that bootloaders keep the space free rather than putting a ramdisk or
something into it.

The bss itself, delimited by __bss_stop, ends before the brk area
(__brk_base to __brk_limit).  The kernel text, data and bss is reserved
up to __bss_stop.

Any brk-allocated data is reserved separately just before the kernel
pagetable is built, as that code allocates from unreserved spaces
in the e820 map, potentially allocating from any unused brk memory.
Ultimately any unused memory in the brk area is used in the general
kernel memory pool.

Initially the brk space is set to 1MB, which is probably much larger
than any user needs (the largest current user is i386 head_32.S's code
to build the pagetables to map the kernel, which can get fairly large
with a big kernel image and no PSE support).  So long as the system
has sufficient memory for the bootloader to reserve the kernel+1MB brk,
there are no bad effects resulting from an over-large brk.
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent b9719a4d
#ifndef _ASM_X86_SECTIONS_H
#define _ASM_X86_SECTIONS_H
#include <asm-generic/sections.h> #include <asm-generic/sections.h>
extern char __brk_base[], __brk_limit[];
#endif /* _ASM_X86_SECTIONS_H */
...@@ -100,6 +100,10 @@ extern struct boot_params boot_params; ...@@ -100,6 +100,10 @@ extern struct boot_params boot_params;
*/ */
#define LOWMEMSIZE() (0x9f000) #define LOWMEMSIZE() (0x9f000)
/* exceedingly early brk-like allocator */
extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
#ifdef __i386__ #ifdef __i386__
void __init i386_start_kernel(void); void __init i386_start_kernel(void);
......
...@@ -18,7 +18,7 @@ void __init i386_start_kernel(void) ...@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
{ {
reserve_trampoline_memory(); reserve_trampoline_memory();
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */ /* Reserve INITRD */
......
...@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data) ...@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
reserve_trampoline_memory(); reserve_trampoline_memory();
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */ /* Reserve INITRD */
......
...@@ -114,6 +114,9 @@ ...@@ -114,6 +114,9 @@
unsigned int boot_cpu_id __read_mostly; unsigned int boot_cpu_id __read_mostly;
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
int default_cpu_present_to_apicid(int mps_cpu) int default_cpu_present_to_apicid(int mps_cpu)
{ {
...@@ -337,6 +340,34 @@ static void __init relocate_initrd(void) ...@@ -337,6 +340,34 @@ static void __init relocate_initrd(void)
} }
#endif #endif
void * __init extend_brk(size_t size, size_t align)
{
size_t mask = align - 1;
void *ret;
BUG_ON(_brk_start == 0);
BUG_ON(align & mask);
_brk_end = (_brk_end + mask) & ~mask;
BUG_ON((char *)(_brk_end + size) > __brk_limit);
ret = (void *)_brk_end;
_brk_end += size;
memset(ret, 0, size);
return ret;
}
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
/* Mark brk area as locked down and no longer taking any new allocations */
_brk_start = 0;
}
static void __init reserve_initrd(void) static void __init reserve_initrd(void)
{ {
u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_image = boot_params.hdr.ramdisk_image;
...@@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = (unsigned long) _text; init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext; init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata; init_mm.end_data = (unsigned long) _edata;
#ifdef CONFIG_X86_32 init_mm.brk = _brk_end;
init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
#else
init_mm.brk = (unsigned long) &_end;
#endif
code_resource.start = virt_to_phys(_text); code_resource.start = virt_to_phys(_text);
code_resource.end = virt_to_phys(_etext)-1; code_resource.end = virt_to_phys(_etext)-1;
...@@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check(); setup_bios_corruption_check();
#endif #endif
reserve_brk();
/* max_pfn_mapped is updated here */ /* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped; max_pfn_mapped = max_low_pfn_mapped;
......
...@@ -189,7 +189,14 @@ SECTIONS ...@@ -189,7 +189,14 @@ SECTIONS
*(.bss) *(.bss)
. = ALIGN(4); . = ALIGN(4);
__bss_stop = .; __bss_stop = .;
. = ALIGN(PAGE_SIZE);
__brk_base = . ;
. += 1024 * 1024 ;
__brk_limit = . ;
_end = . ; _end = . ;
/* This is where the kernel creates the early boot page tables */ /* This is where the kernel creates the early boot page tables */
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
pg0 = . ; pg0 = . ;
......
...@@ -247,6 +247,11 @@ SECTIONS ...@@ -247,6 +247,11 @@ SECTIONS
*(.bss.page_aligned) *(.bss.page_aligned)
*(.bss) *(.bss)
__bss_stop = .; __bss_stop = .;
. = ALIGN(PAGE_SIZE);
__brk_base = . ;
. += 1024 * 1024 ;
__brk_limit = . ;
} }
_end = . ; _end = . ;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) ...@@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
static inline unsigned long highmap_end_pfn(void) static inline unsigned long highmap_end_pfn(void)
{ {
return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
} }
#endif #endif
...@@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa) ...@@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the high * No need to redo, when the primary call touched the high
* mapping already: * mapping already:
*/ */
if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) if (within(vaddr, (unsigned long) _text, _brk_end))
return 0; return 0;
/* /*
......
...@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, ...@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
{ {
pmd_t *kernel_pmd; pmd_t *kernel_pmd;
init_pg_tables_start = __pa(pgd); max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; xen_start_info->nr_pt_frames * PAGE_SIZE +
max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); 512*1024);
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment