Commit 54622f10 authored by Mohan Kumar M's avatar Mohan Kumar M Committed by Benjamin Herrenschmidt

powerpc: Support for relocatable kdump kernel

This adds relocatable kernel support for kdump. With this one can
use the same regular kernel to capture the kdump. A signature (0xfeed1234)
is passed in r6 from panic code to the next kernel through kexec_sequence
and purgatory code. The signature is used to differentiate between
kdump kernel and non-kdump kernels.

The purgatory code compares the signature and sets the __kdump_flag in
head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
is set, the kernel will behave as relocatable kdump kernel. This kernel
will boot at the address where it was loaded by kexec-tools ie. at the
address reserved through crashkernel boot parameter.

CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
kernel as relocatable. So the same kernel can be used as production and
kdump kernel.

This patch incorporates the changes suggested by Paul Mackerras to avoid
GOT use and to avoid two copies of the code.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarMohan Kumar M <mohan@in.ibm.com>
Signed-off-by: default avatarMichael Ellerman <michael@ellerman.id.au>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 4792adba
...@@ -109,7 +109,8 @@ There are two possible methods of using Kdump. ...@@ -109,7 +109,8 @@ There are two possible methods of using Kdump.
2) Or use the system kernel binary itself as dump-capture kernel and there is 2) Or use the system kernel binary itself as dump-capture kernel and there is
no need to build a separate dump-capture kernel. This is possible no need to build a separate dump-capture kernel. This is possible
only with the architecutres which support a relocatable kernel. As only with the architecutres which support a relocatable kernel. As
of today, i386, x86_64 and ia64 architectures support relocatable kernel. of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
kernel.
Building a relocatable kernel is advantageous from the point of view that Building a relocatable kernel is advantageous from the point of view that
one does not have to build a second kernel for capturing the dump. But one does not have to build a second kernel for capturing the dump. But
...@@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64) ...@@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
Dump-capture kernel config options (Arch Dependent, ppc64) Dump-capture kernel config options (Arch Dependent, ppc64)
---------------------------------------------------------- ----------------------------------------------------------
* Make and install the kernel and its modules. DO NOT add this kernel 1) Enable "Build a kdump crash kernel" support under "Kernel" options:
to the boot loader configuration files.
CONFIG_CRASH_DUMP=y
2) Enable "Build a relocatable kernel" support
CONFIG_RELOCATABLE=y
Make and install the kernel and its modules.
Dump-capture kernel config options (Arch Dependent, ia64) Dump-capture kernel config options (Arch Dependent, ia64)
---------------------------------------------------------- ----------------------------------------------------------
......
...@@ -323,13 +323,11 @@ config KEXEC ...@@ -323,13 +323,11 @@ config KEXEC
config CRASH_DUMP config CRASH_DUMP
bool "Build a kdump crash kernel" bool "Build a kdump crash kernel"
depends on PPC_MULTIPLATFORM && PPC64 depends on PPC_MULTIPLATFORM && PPC64 && RELOCATABLE
help help
Build a kernel suitable for use as a kdump capture kernel. Build a kernel suitable for use as a kdump capture kernel.
The kernel will be linked at a different address than normal, and The same kernel binary can be used as production kernel and dump
so can only be used for Kdump. capture kernel.
Don't change this unless you know what you are doing.
config PHYP_DUMP config PHYP_DUMP
bool "Hypervisor-assisted dump (EXPERIMENTAL)" bool "Hypervisor-assisted dump (EXPERIMENTAL)"
...@@ -829,11 +827,9 @@ config PAGE_OFFSET ...@@ -829,11 +827,9 @@ config PAGE_OFFSET
default "0xc000000000000000" default "0xc000000000000000"
config KERNEL_START config KERNEL_START
hex hex
default "0xc000000002000000" if CRASH_DUMP
default "0xc000000000000000" default "0xc000000000000000"
config PHYSICAL_START config PHYSICAL_START
hex hex
default "0x02000000" if CRASH_DUMP
default "0x00000000" default "0x00000000"
endif endif
......
...@@ -9,6 +9,12 @@ ...@@ -9,6 +9,12 @@
* Reserve to the end of the FWNMI area, see head_64.S */ * Reserve to the end of the FWNMI area, see head_64.S */
#define KDUMP_RESERVE_LIMIT 0x10000 /* 64K */ #define KDUMP_RESERVE_LIMIT 0x10000 /* 64K */
/*
* Used to differentiate between relocatable kdump kernel and other
* kernels
*/
#define KDUMP_SIGNATURE 0xfeed1234
#ifdef CONFIG_CRASH_DUMP #ifdef CONFIG_CRASH_DUMP
#define KDUMP_TRAMPOLINE_START 0x0100 #define KDUMP_TRAMPOLINE_START 0x0100
...@@ -19,17 +25,18 @@ ...@@ -19,17 +25,18 @@
#endif /* CONFIG_CRASH_DUMP */ #endif /* CONFIG_CRASH_DUMP */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#ifdef CONFIG_CRASH_DUMP
extern unsigned long __kdump_flag;
#if defined(CONFIG_CRASH_DUMP) && !defined(CONFIG_RELOCATABLE)
extern void reserve_kdump_trampoline(void); extern void reserve_kdump_trampoline(void);
extern void setup_kdump_trampoline(void); extern void setup_kdump_trampoline(void);
#else
#else /* !CONFIG_CRASH_DUMP */ /* !CRASH_DUMP || RELOCATABLE */
static inline void reserve_kdump_trampoline(void) { ; } static inline void reserve_kdump_trampoline(void) { ; }
static inline void setup_kdump_trampoline(void) { ; } static inline void setup_kdump_trampoline(void) { ; }
#endif
#endif /* CONFIG_CRASH_DUMP */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __PPC64_KDUMP_H */ #endif /* __PPC64_KDUMP_H */
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
/* Stores the physical address of elf header of crash image. */ /* Stores the physical address of elf header of crash image. */
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
#ifndef CONFIG_RELOCATABLE
void __init reserve_kdump_trampoline(void) void __init reserve_kdump_trampoline(void)
{ {
lmb_reserve(0, KDUMP_RESERVE_LIMIT); lmb_reserve(0, KDUMP_RESERVE_LIMIT);
...@@ -68,6 +69,7 @@ void __init setup_kdump_trampoline(void) ...@@ -68,6 +69,7 @@ void __init setup_kdump_trampoline(void)
DBG(" <- setup_kdump_trampoline()\n"); DBG(" <- setup_kdump_trampoline()\n");
} }
#endif /* CONFIG_RELOCATABLE */
/* /*
* Note: elfcorehdr_addr is not just limited to vmcore. It is also used by * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
......
...@@ -97,6 +97,12 @@ __secondary_hold_spinloop: ...@@ -97,6 +97,12 @@ __secondary_hold_spinloop:
__secondary_hold_acknowledge: __secondary_hold_acknowledge:
.llong 0x0 .llong 0x0
/* This flag is set by purgatory if we should be a kdump kernel. */
/* Do not move this variable as purgatory knows about it. */
.globl __kdump_flag
__kdump_flag:
.llong 0x0
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
/* /*
* At offset 0x20, there is a pointer to iSeries LPAR data. * At offset 0x20, there is a pointer to iSeries LPAR data.
...@@ -1384,7 +1390,13 @@ _STATIC(__after_prom_start) ...@@ -1384,7 +1390,13 @@ _STATIC(__after_prom_start)
/* process relocations for the final address of the kernel */ /* process relocations for the final address of the kernel */
lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
sldi r25,r25,32 sldi r25,r25,32
mr r3,r25 #ifdef CONFIG_CRASH_DUMP
ld r7,__kdump_flag-_stext(r26)
cmpldi cr0,r7,1 /* kdump kernel ? - stay where we are */
bne 1f
add r25,r25,r26
#endif
1: mr r3,r25
bl .relocate bl .relocate
#endif #endif
...@@ -1398,11 +1410,26 @@ _STATIC(__after_prom_start) ...@@ -1398,11 +1410,26 @@ _STATIC(__after_prom_start)
li r3,0 /* target addr */ li r3,0 /* target addr */
mr. r4,r26 /* In some cases the loader may */ mr. r4,r26 /* In some cases the loader may */
beq 9f /* have already put us at zero */ beq 9f /* have already put us at zero */
lis r5,(copy_to_here - _stext)@ha
addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
li r6,0x100 /* Start offset, the first 0x100 */ li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */ /* bytes were copied earlier. */
#ifdef CONFIG_CRASH_DUMP
/*
* Check if the kernel has to be running as relocatable kernel based on the
* variable __kdump_flag, if it is set the kernel is treated as relocatable
* kernel, otherwise it will be moved to PHYSICAL_START
*/
ld r7,__kdump_flag-_stext(r26)
cmpldi cr0,r7,1
bne 3f
li r5,__end_interrupts - _stext /* just copy interrupts */
b 5f
3:
#endif
lis r5,(copy_to_here - _stext)@ha
addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
bl .copy_and_flush /* copy the first n bytes */ bl .copy_and_flush /* copy the first n bytes */
/* this includes the code being */ /* this includes the code being */
/* executed here. */ /* executed here. */
...@@ -1411,15 +1438,15 @@ _STATIC(__after_prom_start) ...@@ -1411,15 +1438,15 @@ _STATIC(__after_prom_start)
mtctr r8 mtctr r8
bctr bctr
p_end: .llong _end - _stext
4: /* Now copy the rest of the kernel up to _end */ 4: /* Now copy the rest of the kernel up to _end */
addis r5,r26,(p_end - _stext)@ha addis r5,r26,(p_end - _stext)@ha
ld r5,(p_end - _stext)@l(r5) /* get _end */ ld r5,(p_end - _stext)@l(r5) /* get _end */
bl .copy_and_flush /* copy the rest */ 5: bl .copy_and_flush /* copy the rest */
9: b .start_here_multiplatform 9: b .start_here_multiplatform
p_end: .llong _end - _stext
/* /*
* Copy routine used to copy the kernel to start at physical address 0 * Copy routine used to copy the kernel to start at physical address 0
* and flush and invalidate the caches as needed. * and flush and invalidate the caches as needed.
......
...@@ -458,6 +458,42 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, ...@@ -458,6 +458,42 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
spin_unlock_irqrestore(&(tbl->it_lock), flags); spin_unlock_irqrestore(&(tbl->it_lock), flags);
} }
static void iommu_table_clear(struct iommu_table *tbl)
{
if (!__kdump_flag) {
/* Clear the table in case firmware left allocations in it */
ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
return;
}
#ifdef CONFIG_CRASH_DUMP
if (ppc_md.tce_get) {
unsigned long index, tceval, tcecount = 0;
/* Reserve the existing mappings left by the first kernel. */
for (index = 0; index < tbl->it_size; index++) {
tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
/*
* Freed TCE entry contains 0x7fffffffffffffff on JS20
*/
if (tceval && (tceval != 0x7fffffffffffffffUL)) {
__set_bit(index, tbl->it_map);
tcecount++;
}
}
if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
printk(KERN_WARNING "TCE table is full; freeing ");
printk(KERN_WARNING "%d entries for the kdump boot\n",
KDUMP_MIN_TCE_ENTRIES);
for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
index < tbl->it_size; index++)
__clear_bit(index, tbl->it_map);
}
}
#endif
}
/* /*
* Build a iommu_table structure. This contains a bit map which * Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space. * is used to manage allocation of the tce space.
...@@ -484,38 +520,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) ...@@ -484,38 +520,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_largehint = tbl->it_halfpoint; tbl->it_largehint = tbl->it_halfpoint;
spin_lock_init(&tbl->it_lock); spin_lock_init(&tbl->it_lock);
#ifdef CONFIG_CRASH_DUMP iommu_table_clear(tbl);
if (ppc_md.tce_get) {
unsigned long index;
unsigned long tceval;
unsigned long tcecount = 0;
/*
* Reserve the existing mappings left by the first kernel.
*/
for (index = 0; index < tbl->it_size; index++) {
tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
/*
* Freed TCE entry contains 0x7fffffffffffffff on JS20
*/
if (tceval && (tceval != 0x7fffffffffffffffUL)) {
__set_bit(index, tbl->it_map);
tcecount++;
}
}
if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
printk(KERN_WARNING "TCE table is full; ");
printk(KERN_WARNING "freeing %d entries for the kdump boot\n",
KDUMP_MIN_TCE_ENTRIES);
for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
index < tbl->it_size; index++)
__clear_bit(index, tbl->it_map);
}
}
#else
/* Clear the hardware table in case firmware left allocations in it */
ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
#endif
if (!welcomed) { if (!welcomed) {
printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
......
...@@ -88,11 +88,13 @@ void __init reserve_crashkernel(void) ...@@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
crash_size = crashk_res.end - crashk_res.start + 1; crash_size = crashk_res.end - crashk_res.start + 1;
#ifndef CONFIG_RELOCATABLE
if (crashk_res.start != KDUMP_KERNELBASE) if (crashk_res.start != KDUMP_KERNELBASE)
printk("Crash kernel location must be 0x%x\n", printk("Crash kernel location must be 0x%x\n",
KDUMP_KERNELBASE); KDUMP_KERNELBASE);
crashk_res.start = KDUMP_KERNELBASE; crashk_res.start = KDUMP_KERNELBASE;
#endif
crash_size = PAGE_ALIGN(crash_size); crash_size = PAGE_ALIGN(crash_size);
crashk_res.end = crashk_res.start + crash_size - 1; crashk_res.end = crashk_res.start + crash_size - 1;
......
...@@ -255,11 +255,14 @@ static union thread_union kexec_stack ...@@ -255,11 +255,14 @@ static union thread_union kexec_stack
/* Our assembly helper, in kexec_stub.S */ /* Our assembly helper, in kexec_stub.S */
extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
void *image, void *control, void *image, void *control,
void (*clear_all)(void)) ATTRIB_NORET; void (*clear_all)(void),
unsigned long kdump_flag) ATTRIB_NORET;
/* too late to fail here */ /* too late to fail here */
void default_machine_kexec(struct kimage *image) void default_machine_kexec(struct kimage *image)
{ {
unsigned long kdump_flag = 0;
/* prepare control code if any */ /* prepare control code if any */
/* /*
...@@ -272,6 +275,8 @@ void default_machine_kexec(struct kimage *image) ...@@ -272,6 +275,8 @@ void default_machine_kexec(struct kimage *image)
if (crashing_cpu == -1) if (crashing_cpu == -1)
kexec_prepare_cpus(); kexec_prepare_cpus();
else
kdump_flag = KDUMP_SIGNATURE;
/* switch to a staticly allocated stack. Based on irq stack code. /* switch to a staticly allocated stack. Based on irq stack code.
* XXX: the task struct will likely be invalid once we do the copy! * XXX: the task struct will likely be invalid once we do the copy!
...@@ -284,7 +289,7 @@ void default_machine_kexec(struct kimage *image) ...@@ -284,7 +289,7 @@ void default_machine_kexec(struct kimage *image)
*/ */
kexec_sequence(&kexec_stack, image->start, image, kexec_sequence(&kexec_stack, image->start, image,
page_address(image->control_code_page), page_address(image->control_code_page),
ppc_md.hpte_clear_all); ppc_md.hpte_clear_all, kdump_flag);
/* NOTREACHED */ /* NOTREACHED */
} }
......
...@@ -611,10 +611,12 @@ real_mode: /* assume normal blr return */ ...@@ -611,10 +611,12 @@ real_mode: /* assume normal blr return */
/* /*
* kexec_sequence(newstack, start, image, control, clear_all()) * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
* *
* does the grungy work with stack switching and real mode switches * does the grungy work with stack switching and real mode switches
* also does simple calls to other code * also does simple calls to other code
*
* kdump_flag says whether the next kernel should be a kdump kernel.
*/ */
_GLOBAL(kexec_sequence) _GLOBAL(kexec_sequence)
...@@ -647,7 +649,7 @@ _GLOBAL(kexec_sequence) ...@@ -647,7 +649,7 @@ _GLOBAL(kexec_sequence)
mr r29,r5 /* image (virt) */ mr r29,r5 /* image (virt) */
mr r28,r6 /* control, unused */ mr r28,r6 /* control, unused */
mr r27,r7 /* clear_all() fn desc */ mr r27,r7 /* clear_all() fn desc */
mr r26,r8 /* spare */ mr r26,r8 /* kdump flag */
lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
/* disable interrupts, we are overwriting kernel data next */ /* disable interrupts, we are overwriting kernel data next */
...@@ -709,5 +711,6 @@ _GLOBAL(kexec_sequence) ...@@ -709,5 +711,6 @@ _GLOBAL(kexec_sequence)
mr r4,r30 # start, aka phys mem offset mr r4,r30 # start, aka phys mem offset
mtlr 4 mtlr 4
li r5,0 li r5,0
blr /* image->start(physid, image->start, 0); */ mr r6,r26 /* kdump_flag */
blr /* image->start(physid, image->start, 0, kdump_flag); */
#endif /* CONFIG_KEXEC */ #endif /* CONFIG_KEXEC */
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/rtas.h> #include <asm/rtas.h>
#include <asm/cell-regs.h> #include <asm/cell-regs.h>
#include <asm/kdump.h>
#include "ras.h" #include "ras.h"
...@@ -111,9 +112,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order) ...@@ -111,9 +112,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
int ret = -ENOMEM; int ret = -ENOMEM;
unsigned long addr; unsigned long addr;
#ifdef CONFIG_CRASH_DUMP if (__kdump_flag)
rtas_call(ptcal_stop_tok, 1, 1, NULL, nid); rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
#endif
area = kmalloc(sizeof(*area), GFP_KERNEL); area = kmalloc(sizeof(*area), GFP_KERNEL);
if (!area) if (!area)
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <asm/tce.h> #include <asm/tce.h>
#include <asm/ppc-pci.h> #include <asm/ppc-pci.h>
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/kdump.h>
#include "plpar_wrappers.h" #include "plpar_wrappers.h"
...@@ -291,9 +292,8 @@ static void iommu_table_setparms(struct pci_controller *phb, ...@@ -291,9 +292,8 @@ static void iommu_table_setparms(struct pci_controller *phb,
tbl->it_base = (unsigned long)__va(*basep); tbl->it_base = (unsigned long)__va(*basep);
#ifndef CONFIG_CRASH_DUMP if (!__kdump_flag)
memset((void *)tbl->it_base, 0, *sizep); memset((void *)tbl->it_base, 0, *sizep);
#endif
tbl->it_busno = phb->bus->number; tbl->it_busno = phb->bus->number;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment