Commit fee7b0d8 authored by Huang Ying's avatar Huang Ying Committed by H. Peter Anvin

x86, kexec: x86_64: add kexec jump support for x86_64

Impact: New major feature

This patch add kexec jump support for x86_64. More information about
kexec jump can be found in corresponding x86_32 support patch.
Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 53594547
...@@ -1431,7 +1431,7 @@ config CRASH_DUMP ...@@ -1431,7 +1431,7 @@ config CRASH_DUMP
config KEXEC_JUMP config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)" bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL depends on EXPERIMENTAL
depends on KEXEC && HIBERNATION && X86_32 depends on KEXEC && HIBERNATION
---help--- ---help---
Jump between original kernel and kexeced kernel and invoke Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC code in physical address mode via KEXEC
......
...@@ -9,13 +9,13 @@ ...@@ -9,13 +9,13 @@
# define PAGES_NR 4 # define PAGES_NR 4
#else #else
# define PA_CONTROL_PAGE 0 # define PA_CONTROL_PAGE 0
# define PA_TABLE_PAGE 1 # define VA_CONTROL_PAGE 1
# define PAGES_NR 2 # define PA_TABLE_PAGE 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#endif #endif
#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
...@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page, ...@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae, unsigned int has_pae,
unsigned int preserve_context); unsigned int preserve_context);
#else #else
NORET_TYPE void unsigned long
relocate_kernel(unsigned long indirection_page, relocate_kernel(unsigned long indirection_page,
unsigned long page_list, unsigned long page_list,
unsigned long start_address) ATTRIB_NORET; unsigned long start_address,
unsigned int preserve_context);
#endif #endif
#define ARCH_HAS_KIMAGE_ARCH #define ARCH_HAS_KIMAGE_ARCH
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/suspend.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image) ...@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
{ {
unsigned long page_list[PAGES_NR]; unsigned long page_list[PAGES_NR];
void *control_page; void *control_page;
int save_ftrace_enabled;
tracer_disable(); #ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
save_processor_state();
#endif
save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */ /* Interrupts aren't acceptable while we reboot */
local_irq_disable(); local_irq_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
* one.
*/
disable_IO_APIC();
#endif
}
control_page = page_address(image->control_code_page) + PAGE_SIZE; control_page = page_address(image->control_code_page) + PAGE_SIZE;
memcpy(control_page, relocate_kernel, PAGE_SIZE); memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] = page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page)); (unsigned long)__pa(page_address(image->control_code_page));
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
/* /*
* The segment registers are funny things, they have both a * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is * visible and an invisible part. Whenever the visible part is
...@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image) ...@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
set_idt(phys_to_virt(0), 0); set_idt(phys_to_virt(0), 0);
/* now call it */ /* now call it */
relocate_kernel((unsigned long)image->head, (unsigned long)page_list, image->start = relocate_kernel((unsigned long)image->head,
image->start); (unsigned long)page_list,
image->start,
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
restore_processor_state();
#endif
__ftrace_enabled_restore(save_ftrace_enabled);
} }
void arch_crash_save_vmcoreinfo(void) void arch_crash_save_vmcoreinfo(void)
......
...@@ -19,6 +19,24 @@ ...@@ -19,6 +19,24 @@
#define PTR(x) (x << 3) #define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
/*
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
/* Minimal CPU state */
#define RSP DATA(0x0)
#define CR0 DATA(0x8)
#define CR3 DATA(0x10)
#define CR4 DATA(0x18)
/* other data */
#define CP_PA_TABLE_PAGE DATA(0x20)
#define CP_PA_SWAP_PAGE DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
.text .text
.align PAGE_SIZE .align PAGE_SIZE
.code64 .code64
...@@ -28,8 +46,27 @@ relocate_kernel: ...@@ -28,8 +46,27 @@ relocate_kernel:
* %rdi indirection_page * %rdi indirection_page
* %rsi page_list * %rsi page_list
* %rdx start address * %rdx start address
* %rcx preserve_context
*/ */
/* Save the CPU context, used for jumping back */
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushf
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
movq %rsp, RSP(%r11)
movq %cr0, %rax
movq %rax, CR0(%r11)
movq %cr3, %rax
movq %rax, CR3(%r11)
movq %cr4, %rax
movq %rax, CR4(%r11)
/* zero out flags, and disable interrupts */ /* zero out flags, and disable interrupts */
pushq $0 pushq $0
popfq popfq
...@@ -41,10 +78,18 @@ relocate_kernel: ...@@ -41,10 +78,18 @@ relocate_kernel:
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
/* get physical address of page table now too */ /* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx movq PTR(PA_TABLE_PAGE)(%rsi), %r9
/* get physical address of swap page now */
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
/* save some information for jumping back */
movq %r9, CP_PA_TABLE_PAGE(%r11)
movq %r10, CP_PA_SWAP_PAGE(%r11)
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
/* Switch to the identity mapped page tables */ /* Switch to the identity mapped page tables */
movq %rcx, %cr3 movq %r9, %cr3
/* setup a new stack at the end of the physical control page */ /* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp lea PAGE_SIZE(%r8), %rsp
...@@ -83,9 +128,87 @@ identity_mapped: ...@@ -83,9 +128,87 @@ identity_mapped:
1: 1:
/* Flush the TLB (needed?) */ /* Flush the TLB (needed?) */
movq %rcx, %cr3 movq %r9, %cr3
movq %rcx, %r11
call swap_pages
/*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3
/*
* set all of the registers to known values
* leave %rsp alone
*/
testq %r11, %r11
jnz 1f
xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
ret
1:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
call *%rdx
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
call 1f
1:
popq %r8
subq $(1b - relocate_kernel), %r8
movq CP_PA_SWAP_PAGE(%r8), %r10
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
movq CP_PA_TABLE_PAGE(%r8), %rax
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
ret
virtual_mapped:
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
movq CR3(%r8), %rax
movq CR0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0
movq %rbp, %rax
popf
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ret
/* Do the copies */ /* Do the copies */
swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */ movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi xorq %rdi, %rdi
xorq %rsi, %rsi xorq %rsi, %rsi
...@@ -117,39 +240,27 @@ identity_mapped: ...@@ -117,39 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */ movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi andq $0xfffffffffffff000, %rsi
movq %rdi, %rdx
movq %rsi, %rax
movq %r10, %rdi
movq $512, %rcx movq $512, %rcx
rep ; movsq rep ; movsq
jmp 0b
3:
/* movq %rax, %rdi
* To be certain of avoiding problems with self-modifying code movq %rdx, %rsi
* I need to execute a serializing instruction here. movq $512, %rcx
* So I flush the TLB by reloading %cr3 here, it's handy, rep ; movsq
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3
/*
* set all of the registers to known values
* leave %rsp alone
*/
xorq %rax, %rax movq %rdx, %rdi
xorq %rbx, %rbx movq %r10, %rsi
xorq %rcx, %rcx movq $512, %rcx
xorq %rdx, %rdx rep ; movsq
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
ret ret
.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
...@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), ...@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0), ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area"); "irq_stack_union is not at start of per-cpu area");
#endif #endif
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big")
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment