Commit 549e8152 authored by Paul Mackerras's avatar Paul Mackerras

powerpc: Make the 64-bit kernel as a position-independent executable

This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE) when it is set.  This involves
processing the dynamic relocations in the image in the early stages of
booting, even if the kernel is being run at the address it is linked at,
since the linker does not necessarily fill in words in the image for
which there are dynamic relocations.  (In fact the linker does fill in
such words for 64-bit executables, though not for 32-bit executables,
so in principle we could avoid calling relocate() entirely when we're
running a 64-bit kernel at the linked address.)

The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run.  In fact we call it twice; once before calling prom_init, and again
when starting the main kernel.  This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.

This also changes __va and __pa to use an equivalent definition that is
simpler.  With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).

With this, relocatable kernels still copy themselves down to physical
address 0 and run there.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent e31aa453
...@@ -806,6 +806,19 @@ config PIN_TLB ...@@ -806,6 +806,19 @@ config PIN_TLB
endmenu endmenu
if PPC64 if PPC64
config RELOCATABLE
bool "Build a relocatable kernel"
help
This builds a kernel image that is capable of running anywhere
in the RMA (real memory area) at any 16k-aligned base address.
The kernel is linked as a position-independent executable (PIE)
and contains dynamic relocations which are processed early
in the bootup process.
One use is for the kexec on panic case where the recovery kernel
must live at a different physical address than the primary
kernel.
config PAGE_OFFSET config PAGE_OFFSET
hex hex
default "0xc000000000000000" default "0xc000000000000000"
......
...@@ -63,7 +63,9 @@ override CC += -m$(CONFIG_WORD_SIZE) ...@@ -63,7 +63,9 @@ override CC += -m$(CONFIG_WORD_SIZE)
override AR := GNUTARGET=elf$(CONFIG_WORD_SIZE)-powerpc $(AR) override AR := GNUTARGET=elf$(CONFIG_WORD_SIZE)-powerpc $(AR)
endif endif
LDFLAGS_vmlinux := -Bstatic LDFLAGS_vmlinux-yy := -Bstatic
LDFLAGS_vmlinux-$(CONFIG_PPC64)$(CONFIG_RELOCATABLE) := -pie
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-yy)
CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=none -mcall-aixdesc CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=none -mcall-aixdesc
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple
......
...@@ -310,8 +310,11 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb ...@@ -310,8 +310,11 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb
$(obj)/vmlinux.strip: vmlinux $(obj)/vmlinux.strip: vmlinux
$(STRIP) -s -R .comment $< -o $@ $(STRIP) -s -R .comment $< -o $@
# The iseries hypervisor won't take an ET_DYN executable, so this
# changes the type (byte 17) in the file to ET_EXEC (2).
$(obj)/zImage.iseries: vmlinux $(obj)/zImage.iseries: vmlinux
$(STRIP) -s -R .comment $< -o $@ $(STRIP) -s -R .comment $< -o $@
printf "\x02" | dd of=$@ conv=notrunc bs=1 seek=17
$(obj)/uImage: vmlinux $(wrapperbits) $(obj)/uImage: vmlinux $(wrapperbits)
$(call if_changed,wrap,uboot) $(call if_changed,wrap,uboot)
......
...@@ -27,7 +27,8 @@ int parse_elf64(void *hdr, struct elf_info *info) ...@@ -27,7 +27,8 @@ int parse_elf64(void *hdr, struct elf_info *info)
elf64->e_ident[EI_MAG3] == ELFMAG3 && elf64->e_ident[EI_MAG3] == ELFMAG3 &&
elf64->e_ident[EI_CLASS] == ELFCLASS64 && elf64->e_ident[EI_CLASS] == ELFCLASS64 &&
elf64->e_ident[EI_DATA] == ELFDATA2MSB && elf64->e_ident[EI_DATA] == ELFDATA2MSB &&
elf64->e_type == ET_EXEC && (elf64->e_type == ET_EXEC ||
elf64->e_type == ET_DYN) &&
elf64->e_machine == EM_PPC64)) elf64->e_machine == EM_PPC64))
return 0; return 0;
...@@ -58,7 +59,8 @@ int parse_elf32(void *hdr, struct elf_info *info) ...@@ -58,7 +59,8 @@ int parse_elf32(void *hdr, struct elf_info *info)
elf32->e_ident[EI_MAG3] == ELFMAG3 && elf32->e_ident[EI_MAG3] == ELFMAG3 &&
elf32->e_ident[EI_CLASS] == ELFCLASS32 && elf32->e_ident[EI_CLASS] == ELFCLASS32 &&
elf32->e_ident[EI_DATA] == ELFDATA2MSB && elf32->e_ident[EI_DATA] == ELFDATA2MSB &&
elf32->e_type == ET_EXEC && (elf32->e_type == ET_EXEC ||
elf32->e_type == ET_DYN) &&
elf32->e_machine == EM_PPC)) elf32->e_machine == EM_PPC))
return 0; return 0;
......
...@@ -437,7 +437,7 @@ typedef struct { ...@@ -437,7 +437,7 @@ typedef struct {
}) })
#endif /* 1 */ #endif /* 1 */
/* This is only valid for addresses >= KERNELBASE */ /* This is only valid for addresses >= PAGE_OFFSET */
static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
{ {
if (ssize == MMU_SEGSIZE_256M) if (ssize == MMU_SEGSIZE_256M)
......
...@@ -71,15 +71,21 @@ ...@@ -71,15 +71,21 @@
#define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET) #define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET)
#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START)) #define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_FLATMEM) #if defined(CONFIG_RELOCATABLE)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
extern phys_addr_t memstart_addr; extern phys_addr_t memstart_addr;
extern phys_addr_t kernstart_addr; extern phys_addr_t kernstart_addr;
#endif #endif
#define PHYSICAL_START kernstart_addr #define PHYSICAL_START kernstart_addr
#define MEMORY_START memstart_addr
#else #else
#define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START) #define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START)
#endif
#ifdef CONFIG_PPC64
#define MEMORY_START 0UL
#elif defined(CONFIG_RELOCATABLE)
#define MEMORY_START memstart_addr
#else
#define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE) #define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
#endif #endif
...@@ -92,8 +98,8 @@ extern phys_addr_t kernstart_addr; ...@@ -92,8 +98,8 @@ extern phys_addr_t kernstart_addr;
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define __va(x) ((void *)((unsigned long)(x) - PHYSICAL_START + KERNELBASE)) #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - MEMORY_START))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - KERNELBASE) #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
/* /*
* Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
......
...@@ -16,6 +16,12 @@ static inline int in_kernel_text(unsigned long addr) ...@@ -16,6 +16,12 @@ static inline int in_kernel_text(unsigned long addr)
return 0; return 0;
} }
static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
{
return start < (unsigned long)__init_end &&
(unsigned long)_stext < end;
}
#undef dereference_function_descriptor #undef dereference_function_descriptor
void *dereference_function_descriptor(void *); void *dereference_function_descriptor(void *);
......
...@@ -35,6 +35,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ ...@@ -35,6 +35,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
paca.o cpu_setup_ppc970.o \ paca.o cpu_setup_ppc970.o \
cpu_setup_pa6t.o \ cpu_setup_pa6t.o \
firmware.o nvram_64.o firmware.o nvram_64.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
......
...@@ -1360,6 +1360,12 @@ _INIT_STATIC(__boot_from_prom) ...@@ -1360,6 +1360,12 @@ _INIT_STATIC(__boot_from_prom)
*/ */
rldicr r1,r1,0,59 rldicr r1,r1,0,59
#ifdef CONFIG_RELOCATABLE
/* Relocate code for where we are now */
mr r3,r26
bl .relocate
#endif
/* Restore parameters */ /* Restore parameters */
mr r3,r31 mr r3,r31
mr r4,r30 mr r4,r30
...@@ -1368,11 +1374,19 @@ _INIT_STATIC(__boot_from_prom) ...@@ -1368,11 +1374,19 @@ _INIT_STATIC(__boot_from_prom)
mr r7,r27 mr r7,r27
/* Do all of the interaction with OF client interface */ /* Do all of the interaction with OF client interface */
mr r8,r26
bl .prom_init bl .prom_init
/* We never return */ /* We never return */
trap trap
_STATIC(__after_prom_start) _STATIC(__after_prom_start)
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
sldi r25,r25,32
mr r3,r25
bl .relocate
#endif
/* /*
* We need to run with _stext at physical address PHYSICAL_START. * We need to run with _stext at physical address PHYSICAL_START.
...@@ -1381,10 +1395,9 @@ _STATIC(__after_prom_start) ...@@ -1381,10 +1395,9 @@ _STATIC(__after_prom_start)
* *
* Note: This process overwrites the OF exception vectors. * Note: This process overwrites the OF exception vectors.
*/ */
LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */ li r3,0 /* target addr */
cmpd r3,r26 /* In some cases the loader may */ mr. r4,r26 /* In some cases the loader may */
beq 9f /* have already put us at zero */ beq 9f /* have already put us at zero */
mr r4,r26 /* source address */
lis r5,(copy_to_here - _stext)@ha lis r5,(copy_to_here - _stext)@ha
addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
li r6,0x100 /* Start offset, the first 0x100 */ li r6,0x100 /* Start offset, the first 0x100 */
...@@ -1617,6 +1630,13 @@ _INIT_STATIC(start_here_multiplatform) ...@@ -1617,6 +1630,13 @@ _INIT_STATIC(start_here_multiplatform)
ori r6,r6,MSR_RI ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */ mtmsrd r6 /* RI on */
#ifdef CONFIG_RELOCATABLE
/* Save the physical address we're running at in kernstart_addr */
LOAD_REG_ADDR(r4, kernstart_addr)
clrldi r0,r25,2
std r0,0(r4)
#endif
/* The following gets the stack set up with the regs */ /* The following gets the stack set up with the regs */
/* pointing to the real addr of the kernel stack. This is */ /* pointing to the real addr of the kernel stack. This is */
/* all done to support the C function call below which sets */ /* all done to support the C function call below which sets */
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <asm/lppaca.h> #include <asm/lppaca.h>
#include <asm/paca.h> #include <asm/paca.h>
#include <asm/sections.h>
/* This symbol is provided by the linker - let it fill in the paca /* This symbol is provided by the linker - let it fill in the paca
* field correctly */ * field correctly */
...@@ -79,7 +80,7 @@ void __init initialise_pacas(void) ...@@ -79,7 +80,7 @@ void __init initialise_pacas(void)
new_paca->lock_token = 0x8000; new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu; new_paca->paca_index = cpu;
new_paca->kernel_toc = kernel_toc; new_paca->kernel_toc = kernel_toc;
new_paca->kernelbase = KERNELBASE; new_paca->kernelbase = (unsigned long) _stext;
new_paca->kernel_msr = MSR_KERNEL; new_paca->kernel_msr = MSR_KERNEL;
new_paca->hw_cpu_id = 0xffff; new_paca->hw_cpu_id = 0xffff;
new_paca->slb_shadow_ptr = &slb_shadow[cpu]; new_paca->slb_shadow_ptr = &slb_shadow[cpu];
......
...@@ -1192,6 +1192,9 @@ void __init early_init_devtree(void *params) ...@@ -1192,6 +1192,9 @@ void __init early_init_devtree(void *params)
/* Reserve LMB regions used by kernel, initrd, dt, etc... */ /* Reserve LMB regions used by kernel, initrd, dt, etc... */
lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
/* If relocatable, reserve first 32k for interrupt vectors etc. */
if (PHYSICAL_START > MEMORY_START)
lmb_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline(); reserve_kdump_trampoline();
reserve_crashkernel(); reserve_crashkernel();
early_reserve_mem(); early_reserve_mem();
......
...@@ -2309,13 +2309,14 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) ...@@ -2309,13 +2309,14 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unsigned long pp, unsigned long pp,
unsigned long r6, unsigned long r7) unsigned long r6, unsigned long r7,
unsigned long kbase)
{ {
struct prom_t *_prom; struct prom_t *_prom;
unsigned long hdr; unsigned long hdr;
unsigned long offset = reloc_offset();
#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC32
unsigned long offset = reloc_offset();
reloc_got2(offset); reloc_got2(offset);
#endif #endif
...@@ -2349,9 +2350,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, ...@@ -2349,9 +2350,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/ */
RELOC(of_platform) = prom_find_machine_type(); RELOC(of_platform) = prom_find_machine_type();
#ifndef CONFIG_RELOCATABLE
/* Bail if this is a kdump kernel. */ /* Bail if this is a kdump kernel. */
if (PHYSICAL_START > 0) if (PHYSICAL_START > 0)
prom_panic("Error: You can't boot a kdump kernel from OF!\n"); prom_panic("Error: You can't boot a kdump kernel from OF!\n");
#endif
/* /*
* Check for an initrd * Check for an initrd
...@@ -2371,7 +2374,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, ...@@ -2371,7 +2374,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* Copy the CPU hold code * Copy the CPU hold code
*/ */
if (RELOC(of_platform) != PLATFORM_POWERMAC) if (RELOC(of_platform) != PLATFORM_POWERMAC)
copy_and_flush(0, KERNELBASE + offset, 0x100, 0); copy_and_flush(0, kbase, 0x100, 0);
/* /*
* Do early parsing of command line * Do early parsing of command line
...@@ -2474,7 +2477,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, ...@@ -2474,7 +2477,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
reloc_got2(-offset); reloc_got2(-offset);
#endif #endif
__start(hdr, KERNELBASE + offset, 0); __start(hdr, kbase, 0);
return 0; return 0;
} }
/*
* Code to process dynamic relocations in the kernel.
*
* Copyright 2008 Paul Mackerras, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
RELA = 7
RELACOUNT = 0x6ffffff9
R_PPC64_RELATIVE = 22
/*
* r3 = desired final address of kernel
*/
_GLOBAL(relocate)
mflr r0
bcl 20,31,$+4
0: mflr r12 /* r12 has runtime addr of label 0 */
mtlr r0
ld r11,(p_dyn - 0b)(r12)
add r11,r11,r12 /* r11 has runtime addr of .dynamic section */
ld r9,(p_rela - 0b)(r12)
add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */
ld r10,(p_st - 0b)(r12)
add r10,r10,r12 /* r10 has runtime addr of _stext */
/*
* Scan the dynamic section for the RELA and RELACOUNT entries.
*/
li r7,0
li r8,0
1: ld r6,0(r11) /* get tag */
cmpdi r6,0
beq 4f /* end of list */
cmpdi r6,RELA
bne 2f
ld r7,8(r11) /* get RELA pointer in r7 */
b 3f
2: addis r6,r6,(-RELACOUNT)@ha
cmpdi r6,RELACOUNT@l
bne 3f
ld r8,8(r11) /* get RELACOUNT value in r8 */
3: addi r11,r11,16
b 1b
4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */
cmpdi cr1,r8,0
beq 6f
beq cr1,6f
/*
* Work out linktime address of _stext and hence the
* relocation offset to be applied.
* cur_offset [r7] = rela.run [r9] - rela.link [r7]
* _stext.link [r10] = _stext.run [r10] - cur_offset [r7]
* final_offset [r3] = _stext.final [r3] - _stext.link [r10]
*/
subf r7,r7,r9 /* cur_offset */
subf r10,r7,r10
subf r3,r10,r3 /* final_offset */
/*
* Run through the list of relocations and process the
* R_PPC64_RELATIVE ones.
*/
mtctr r8
5: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */
cmpwi r0,R_PPC64_RELATIVE
bne 6f
ld r6,0(r9) /* reloc->r_offset */
ld r0,16(r9) /* reloc->r_addend */
add r0,r0,r3
stdx r0,r7,r6
addi r9,r9,24
bdnz 5b
6: blr
p_dyn: .llong __dynamic_start - 0b
p_rela: .llong __rela_dyn_start - 0b
p_st: .llong _stext - 0b
...@@ -187,6 +187,21 @@ SECTIONS ...@@ -187,6 +187,21 @@ SECTIONS
*(.machine.desc) *(.machine.desc)
__machine_desc_end = . ; __machine_desc_end = . ;
} }
. = ALIGN(8);
.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) }
.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
{
__dynamic_start = .;
*(.dynamic)
}
.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
{
__rela_dyn_start = .;
*(.rela*)
}
/* freed after init ends here */ /* freed after init ends here */
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
......
...@@ -194,7 +194,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ...@@ -194,7 +194,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long tprot = prot; unsigned long tprot = prot;
/* Make kernel text executable */ /* Make kernel text executable */
if (in_kernel_text(vaddr)) if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N; tprot &= ~HPTE_R_N;
hash = hpt_hash(va, shift, ssize); hash = hpt_hash(va, shift, ssize);
......
...@@ -787,7 +787,7 @@ static void __devinit smp_core99_kick_cpu(int nr) ...@@ -787,7 +787,7 @@ static void __devinit smp_core99_kick_cpu(int nr)
{ {
unsigned int save_vector; unsigned int save_vector;
unsigned long target, flags; unsigned long target, flags;
unsigned int *vector = (unsigned int *)(KERNELBASE+0x100); unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
if (nr < 0 || nr > 3) if (nr < 0 || nr > 3)
return; return;
...@@ -801,7 +801,7 @@ static void __devinit smp_core99_kick_cpu(int nr) ...@@ -801,7 +801,7 @@ static void __devinit smp_core99_kick_cpu(int nr)
save_vector = *vector; save_vector = *vector;
/* Setup fake reset vector that does /* Setup fake reset vector that does
* b __secondary_start_pmac_0 + nr*8 - KERNELBASE * b __secondary_start_pmac_0 + nr*8
*/ */
target = (unsigned long) __secondary_start_pmac_0 + nr * 8; target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
patch_branch(vector, target, BRANCH_SET_LINK); patch_branch(vector, target, BRANCH_SET_LINK);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment