Commit dd5f7260 authored by Vivek Goyal's avatar Vivek Goyal Committed by Linus Torvalds

kexec: support for kexec on panic using new system call

This patch adds support for loading a kexec on panic (kdump) kernel usning
new system call.

It prepares ELF headers for memory areas to be dumped and for saved cpu
registers.  Also prepares the memory map for second kernel and limits its
boot to reserved areas only.
Signed-off-by: default avatarVivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 27f48d3e
#ifndef _ASM_X86_CRASH_H
#define _ASM_X86_CRASH_H
int crash_load_segments(struct kimage *image);
int crash_copy_backup_region(struct kimage *image);
int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
#endif /* _ASM_X86_CRASH_H */
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/bootparam.h> #include <asm/bootparam.h>
struct kimage;
/* /*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
* I.e. Maximum page that is mapped directly into kernel memory, * I.e. Maximum page that is mapped directly into kernel memory,
...@@ -62,6 +64,10 @@ ...@@ -62,6 +64,10 @@
# define KEXEC_ARCH KEXEC_ARCH_X86_64 # define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif #endif
/* Memory to backup during crash kdump */
#define KEXEC_BACKUP_SRC_START (0UL)
#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
/* /*
* CPU does not save ss and sp on stack if execution is already * CPU does not save ss and sp on stack if execution is already
* running in kernel mode at the time of NMI occurrence. This code * running in kernel mode at the time of NMI occurrence. This code
...@@ -161,17 +167,35 @@ struct kimage_arch { ...@@ -161,17 +167,35 @@ struct kimage_arch {
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
/* Details of backup region */
unsigned long backup_src_start;
unsigned long backup_src_sz;
/* Physical address of backup segment */
unsigned long backup_load_addr;
/* Core ELF header buffer */
void *elf_headers;
unsigned long elf_headers_sz;
unsigned long elf_load_addr;
}; };
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_64
/*
* Number of elements and order of elements in this structure should match
* with the ones in arch/x86/purgatory/entry64.S. If you make a change here
* make an appropriate change in purgatory too.
*/
struct kexec_entry64_regs { struct kexec_entry64_regs {
uint64_t rax; uint64_t rax;
uint64_t rbx;
uint64_t rcx; uint64_t rcx;
uint64_t rdx; uint64_t rdx;
uint64_t rsi; uint64_t rbx;
uint64_t rdi;
uint64_t rsp; uint64_t rsp;
uint64_t rbp; uint64_t rbp;
uint64_t rsi;
uint64_t rdi;
uint64_t r8; uint64_t r8;
uint64_t r9; uint64_t r9;
uint64_t r10; uint64_t r10;
......
This diff is collapsed.
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
#include <asm/bootparam.h> #include <asm/bootparam.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/crash.h>
#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
/* /*
* Defines lowest physical address for various segments. Not sure where * Defines lowest physical address for various segments. Not sure where
...@@ -58,18 +61,24 @@ static int setup_initrd(struct boot_params *params, ...@@ -58,18 +61,24 @@ static int setup_initrd(struct boot_params *params,
return 0; return 0;
} }
static int setup_cmdline(struct boot_params *params, static int setup_cmdline(struct kimage *image, struct boot_params *params,
unsigned long bootparams_load_addr, unsigned long bootparams_load_addr,
unsigned long cmdline_offset, char *cmdline, unsigned long cmdline_offset, char *cmdline,
unsigned long cmdline_len) unsigned long cmdline_len)
{ {
char *cmdline_ptr = ((char *)params) + cmdline_offset; char *cmdline_ptr = ((char *)params) + cmdline_offset;
unsigned long cmdline_ptr_phys; unsigned long cmdline_ptr_phys, len;
uint32_t cmdline_low_32, cmdline_ext_32; uint32_t cmdline_low_32, cmdline_ext_32;
memcpy(cmdline_ptr, cmdline, cmdline_len); memcpy(cmdline_ptr, cmdline, cmdline_len);
if (image->type == KEXEC_TYPE_CRASH) {
len = sprintf(cmdline_ptr + cmdline_len - 1,
" elfcorehdr=0x%lx", image->arch.elf_load_addr);
cmdline_len += len;
}
cmdline_ptr[cmdline_len - 1] = '\0'; cmdline_ptr[cmdline_len - 1] = '\0';
pr_debug("Final command line is: %s\n", cmdline_ptr);
cmdline_ptr_phys = bootparams_load_addr + cmdline_offset; cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL; cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
cmdline_ext_32 = cmdline_ptr_phys >> 32; cmdline_ext_32 = cmdline_ptr_phys >> 32;
...@@ -98,11 +107,12 @@ static int setup_memory_map_entries(struct boot_params *params) ...@@ -98,11 +107,12 @@ static int setup_memory_map_entries(struct boot_params *params)
return 0; return 0;
} }
static int setup_boot_parameters(struct boot_params *params) static int setup_boot_parameters(struct kimage *image,
struct boot_params *params)
{ {
unsigned int nr_e820_entries; unsigned int nr_e820_entries;
unsigned long long mem_k, start, end; unsigned long long mem_k, start, end;
int i; int i, ret = 0;
/* Get subarch from existing bootparams */ /* Get subarch from existing bootparams */
params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
...@@ -125,7 +135,13 @@ static int setup_boot_parameters(struct boot_params *params) ...@@ -125,7 +135,13 @@ static int setup_boot_parameters(struct boot_params *params)
/* Default sysdesc table */ /* Default sysdesc table */
params->sys_desc_table.length = 0; params->sys_desc_table.length = 0;
if (image->type == KEXEC_TYPE_CRASH) {
ret = crash_setup_memmap_entries(image, params);
if (ret)
return ret;
} else
setup_memory_map_entries(params); setup_memory_map_entries(params);
nr_e820_entries = params->e820_entries; nr_e820_entries = params->e820_entries;
for (i = 0; i < nr_e820_entries; i++) { for (i = 0; i < nr_e820_entries; i++) {
...@@ -153,7 +169,7 @@ static int setup_boot_parameters(struct boot_params *params) ...@@ -153,7 +169,7 @@ static int setup_boot_parameters(struct boot_params *params)
memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer, memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
EDD_MBR_SIG_MAX * sizeof(unsigned int)); EDD_MBR_SIG_MAX * sizeof(unsigned int));
return 0; return ret;
} }
int bzImage64_probe(const char *buf, unsigned long len) int bzImage64_probe(const char *buf, unsigned long len)
...@@ -240,6 +256,22 @@ void *bzImage64_load(struct kimage *image, char *kernel, ...@@ -240,6 +256,22 @@ void *bzImage64_load(struct kimage *image, char *kernel,
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
/*
* In case of crash dump, we will append elfcorehdr=<addr> to
* command line. Make sure it does not overflow
*/
if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
return ERR_PTR(-EINVAL);
}
/* Allocate and load backup region */
if (image->type == KEXEC_TYPE_CRASH) {
ret = crash_load_segments(image);
if (ret)
return ERR_PTR(ret);
}
/* /*
* Load purgatory. For 64bit entry point, purgatory code can be * Load purgatory. For 64bit entry point, purgatory code can be
* anywhere. * anywhere.
...@@ -254,7 +286,8 @@ void *bzImage64_load(struct kimage *image, char *kernel, ...@@ -254,7 +286,8 @@ void *bzImage64_load(struct kimage *image, char *kernel,
pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
/* Load Bootparams and cmdline */ /* Load Bootparams and cmdline */
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len; params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params = kzalloc(params_cmdline_sz, GFP_KERNEL); params = kzalloc(params_cmdline_sz, GFP_KERNEL);
if (!params) if (!params)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -303,8 +336,8 @@ void *bzImage64_load(struct kimage *image, char *kernel, ...@@ -303,8 +336,8 @@ void *bzImage64_load(struct kimage *image, char *kernel,
setup_initrd(params, initrd_load_addr, initrd_len); setup_initrd(params, initrd_load_addr, initrd_len);
} }
setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params), setup_cmdline(image, params, bootparam_load_addr,
cmdline, cmdline_len); sizeof(struct boot_params), cmdline, cmdline_len);
/* bootloader info. Do we need a separate ID for kexec kernel loader? */ /* bootloader info. Do we need a separate ID for kexec kernel loader? */
params->hdr.type_of_loader = 0x0D << 4; params->hdr.type_of_loader = 0x0D << 4;
...@@ -332,7 +365,9 @@ void *bzImage64_load(struct kimage *image, char *kernel, ...@@ -332,7 +365,9 @@ void *bzImage64_load(struct kimage *image, char *kernel,
if (ret) if (ret)
goto out_free_params; goto out_free_params;
setup_boot_parameters(params); ret = setup_boot_parameters(image, params);
if (ret)
goto out_free_params;
/* Allocate loader specific data */ /* Allocate loader specific data */
ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL); ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
......
...@@ -178,6 +178,38 @@ static void load_segments(void) ...@@ -178,6 +178,38 @@ static void load_segments(void)
); );
} }
/* Update purgatory as needed after various image segments have been prepared */
static int arch_update_purgatory(struct kimage *image)
{
int ret = 0;
if (!image->file_mode)
return 0;
/* Setup copying of backup region */
if (image->type == KEXEC_TYPE_CRASH) {
ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
&image->arch.backup_load_addr,
sizeof(image->arch.backup_load_addr), 0);
if (ret)
return ret;
ret = kexec_purgatory_get_set_symbol(image, "backup_src",
&image->arch.backup_src_start,
sizeof(image->arch.backup_src_start), 0);
if (ret)
return ret;
ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
&image->arch.backup_src_sz,
sizeof(image->arch.backup_src_sz), 0);
if (ret)
return ret;
}
return ret;
}
int machine_kexec_prepare(struct kimage *image) int machine_kexec_prepare(struct kimage *image)
{ {
unsigned long start_pgtable; unsigned long start_pgtable;
...@@ -191,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image) ...@@ -191,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
if (result) if (result)
return result; return result;
/* update purgatory as needed */
result = arch_update_purgatory(image);
if (result)
return result;
return 0; return 0;
} }
...@@ -315,6 +352,9 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, ...@@ -315,6 +352,9 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
void *arch_kexec_kernel_image_load(struct kimage *image) void *arch_kexec_kernel_image_load(struct kimage *image)
{ {
vfree(image->arch.elf_headers);
image->arch.elf_headers = NULL;
if (!image->fops || !image->fops->load) if (!image->fops || !image->fops->load)
return ERR_PTR(-ENOEXEC); return ERR_PTR(-ENOEXEC);
......
...@@ -61,13 +61,13 @@ new_cs_exit: ...@@ -61,13 +61,13 @@ new_cs_exit:
.balign 4 .balign 4
entry64_regs: entry64_regs:
rax: .quad 0x0 rax: .quad 0x0
rbx: .quad 0x0
rcx: .quad 0x0 rcx: .quad 0x0
rdx: .quad 0x0 rdx: .quad 0x0
rsi: .quad 0x0 rbx: .quad 0x0
rdi: .quad 0x0
rsp: .quad 0x0 rsp: .quad 0x0
rbp: .quad 0x0 rbp: .quad 0x0
rsi: .quad 0x0
rdi: .quad 0x0
r8: .quad 0x0 r8: .quad 0x0
r9: .quad 0x0 r9: .quad 0x0
r10: .quad 0x0 r10: .quad 0x0
......
...@@ -548,6 +548,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, ...@@ -548,6 +548,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
{ {
int ret; int ret;
struct kimage *image; struct kimage *image;
bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
image = do_kimage_alloc_init(); image = do_kimage_alloc_init();
if (!image) if (!image)
...@@ -555,6 +556,12 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, ...@@ -555,6 +556,12 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
image->file_mode = 1; image->file_mode = 1;
if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */
image->control_page = crashk_res.start;
image->type = KEXEC_TYPE_CRASH;
}
ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
cmdline_ptr, cmdline_len, flags); cmdline_ptr, cmdline_len, flags);
if (ret) if (ret)
...@@ -572,11 +579,13 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, ...@@ -572,11 +579,13 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
goto out_free_post_load_bufs; goto out_free_post_load_bufs;
} }
if (!kexec_on_panic) {
image->swap_page = kimage_alloc_control_pages(image, 0); image->swap_page = kimage_alloc_control_pages(image, 0);
if (!image->swap_page) { if (!image->swap_page) {
pr_err(KERN_ERR "Could not allocate swap buffer\n"); pr_err(KERN_ERR "Could not allocate swap buffer\n");
goto out_free_control_pages; goto out_free_control_pages;
} }
}
*rimage = image; *rimage = image;
return 0; return 0;
...@@ -1113,9 +1122,13 @@ static int kimage_load_crash_segment(struct kimage *image, ...@@ -1113,9 +1122,13 @@ static int kimage_load_crash_segment(struct kimage *image,
unsigned long maddr; unsigned long maddr;
size_t ubytes, mbytes; size_t ubytes, mbytes;
int result; int result;
unsigned char __user *buf; unsigned char __user *buf = NULL;
unsigned char *kbuf = NULL;
result = 0; result = 0;
if (image->file_mode)
kbuf = segment->kbuf;
else
buf = segment->buf; buf = segment->buf;
ubytes = segment->bufsz; ubytes = segment->bufsz;
mbytes = segment->memsz; mbytes = segment->memsz;
...@@ -1139,6 +1152,11 @@ static int kimage_load_crash_segment(struct kimage *image, ...@@ -1139,6 +1152,11 @@ static int kimage_load_crash_segment(struct kimage *image,
/* Zero the trailing part of the page */ /* Zero the trailing part of the page */
memset(ptr + uchunk, 0, mchunk - uchunk); memset(ptr + uchunk, 0, mchunk - uchunk);
} }
/* For file based kexec, source pages are in kernel memory */
if (image->file_mode)
memcpy(ptr, kbuf, uchunk);
else
result = copy_from_user(ptr, buf, uchunk); result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page); kexec_flush_icache_page(page);
kunmap(page); kunmap(page);
...@@ -1148,6 +1166,9 @@ static int kimage_load_crash_segment(struct kimage *image, ...@@ -1148,6 +1166,9 @@ static int kimage_load_crash_segment(struct kimage *image,
} }
ubytes -= uchunk; ubytes -= uchunk;
maddr += mchunk; maddr += mchunk;
if (image->file_mode)
kbuf += mchunk;
else
buf += mchunk; buf += mchunk;
mbytes -= mchunk; mbytes -= mchunk;
} }
...@@ -2127,7 +2148,14 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, ...@@ -2127,7 +2148,14 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
kbuf->top_down = top_down; kbuf->top_down = top_down;
/* Walk the RAM ranges and allocate a suitable range for the buffer */ /* Walk the RAM ranges and allocate a suitable range for the buffer */
ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback); if (image->type == KEXEC_TYPE_CRASH)
ret = walk_iomem_res("Crash kernel",
IORESOURCE_MEM | IORESOURCE_BUSY,
crashk_res.start, crashk_res.end, kbuf,
locate_mem_hole_callback);
else
ret = walk_system_ram_res(0, -1, kbuf,
locate_mem_hole_callback);
if (ret != 1) { if (ret != 1) {
/* A suitable memory range could not be found for buffer */ /* A suitable memory range could not be found for buffer */
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment