Commit ebaeb5ae authored by Mahesh Salgaonkar's avatar Mahesh Salgaonkar Committed by Benjamin Herrenschmidt

fadump: Convert firmware-assisted cpu state dump data into elf notes.

When registered for firmware assisted dump on powerpc, firmware preserves
the registers for the active CPUs during a system crash. This patch reads
the cpu register data stored in Firmware-assisted dump format (except for
crashing cpu) and converts it into elf notes and updates the PT_NOTE program
header accordingly. The exact register state for crashing cpu is saved to
fadump crash info structure in scratch area during crash_fadump() and read
during second kernel boot.
Signed-off-by: default avatarMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 2df173d9
...@@ -65,6 +65,18 @@ ...@@ -65,6 +65,18 @@
/* Dump status flag */ /* Dump status flag */
#define FADUMP_ERROR_FLAG 0x2000 #define FADUMP_ERROR_FLAG 0x2000
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
#define CPU_UNKNOWN (~((u32)0))
/* Utility macros */
#define SKIP_TO_NEXT_CPU(reg_entry) \
({ \
while (reg_entry->reg_id != REG_ID("CPUEND")) \
reg_entry++; \
reg_entry++; \
})
/* Kernel Dump section info */ /* Kernel Dump section info */
struct fadump_section { struct fadump_section {
u32 request_flag; u32 request_flag;
...@@ -119,6 +131,9 @@ struct fw_dump { ...@@ -119,6 +131,9 @@ struct fw_dump {
unsigned long reserve_bootvar; unsigned long reserve_bootvar;
unsigned long fadumphdr_addr; unsigned long fadumphdr_addr;
unsigned long cpu_notes_buf;
unsigned long cpu_notes_buf_size;
int ibm_configure_kernel_dump; int ibm_configure_kernel_dump;
unsigned long fadump_enabled:1; unsigned long fadump_enabled:1;
...@@ -143,13 +158,40 @@ static inline u64 str_to_u64(const char *str) ...@@ -143,13 +158,40 @@ static inline u64 str_to_u64(const char *str)
return val; return val;
} }
#define STR_TO_HEX(x) str_to_u64(x) #define STR_TO_HEX(x) str_to_u64(x)
#define REG_ID(x) str_to_u64(x)
#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF") #define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
/* The firmware-assisted dump format.
*
* The register save area is an area in the partition's memory used to preserve
* the register contents (CPU state data) for the active CPUs during a firmware
* assisted dump. The dump format contains register save area header followed
* by register entries. Each list of registers for a CPU starts with
* "CPUSTRT" and ends with "CPUEND".
*/
/* Register save area header. */
struct fadump_reg_save_area_header {
u64 magic_number;
u32 version;
u32 num_cpu_offset;
};
/* Register entry. */
struct fadump_reg_entry {
u64 reg_id;
u64 reg_value;
};
/* fadump crash info structure */ /* fadump crash info structure */
struct fadump_crash_info_header { struct fadump_crash_info_header {
u64 magic_number; u64 magic_number;
u64 elfcorehdr_addr; u64 elfcorehdr_addr;
u32 crashing_cpu;
struct pt_regs regs;
struct cpumask cpu_online_mask;
}; };
/* Crash memory ranges */ /* Crash memory ranges */
...@@ -165,7 +207,9 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, ...@@ -165,7 +207,9 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
extern int fadump_reserve_mem(void); extern int fadump_reserve_mem(void);
extern int setup_fadump(void); extern int setup_fadump(void);
extern int is_fadump_active(void); extern int is_fadump_active(void);
extern void crash_fadump(struct pt_regs *, const char *);
#else /* CONFIG_FA_DUMP */ #else /* CONFIG_FA_DUMP */
static inline int is_fadump_active(void) { return 0; } static inline int is_fadump_active(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
#endif #endif
#endif #endif
...@@ -240,6 +240,7 @@ static unsigned long get_fadump_area_size(void) ...@@ -240,6 +240,7 @@ static unsigned long get_fadump_area_size(void)
size += fw_dump.boot_memory_size; size += fw_dump.boot_memory_size;
size += sizeof(struct fadump_crash_info_header); size += sizeof(struct fadump_crash_info_header);
size += sizeof(struct elfhdr); /* ELF core header.*/ size += sizeof(struct elfhdr); /* ELF core header.*/
size += sizeof(struct elf_phdr); /* place holder for cpu notes */
/* Program headers for crash memory regions. */ /* Program headers for crash memory regions. */
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
...@@ -393,6 +394,285 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) ...@@ -393,6 +394,285 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
} }
} }
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
return;
fdh = __va(fw_dump.fadumphdr_addr);
crashing_cpu = smp_processor_id();
fdh->crashing_cpu = crashing_cpu;
crash_save_vmcoreinfo();
if (regs)
fdh->regs = *regs;
else
ppc_save_regs(&fdh->regs);
fdh->cpu_online_mask = *cpu_online_mask;
/* Call ibm,os-term rtas call to trigger firmware assisted dump */
rtas_os_term((char *)str);
}
#define GPR_MASK 0xffffff0000000000
static inline int fadump_gpr_index(u64 id)
{
int i = -1;
char str[3];
if ((id & GPR_MASK) == REG_ID("GPR")) {
/* get the digits at the end */
id &= ~GPR_MASK;
id >>= 24;
str[2] = '\0';
str[1] = id & 0xff;
str[0] = (id >> 8) & 0xff;
sscanf(str, "%d", &i);
if (i > 31)
i = -1;
}
return i;
}
static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
u64 reg_val)
{
int i;
i = fadump_gpr_index(reg_id);
if (i >= 0)
regs->gpr[i] = (unsigned long)reg_val;
else if (reg_id == REG_ID("NIA"))
regs->nip = (unsigned long)reg_val;
else if (reg_id == REG_ID("MSR"))
regs->msr = (unsigned long)reg_val;
else if (reg_id == REG_ID("CTR"))
regs->ctr = (unsigned long)reg_val;
else if (reg_id == REG_ID("LR"))
regs->link = (unsigned long)reg_val;
else if (reg_id == REG_ID("XER"))
regs->xer = (unsigned long)reg_val;
else if (reg_id == REG_ID("CR"))
regs->ccr = (unsigned long)reg_val;
else if (reg_id == REG_ID("DAR"))
regs->dar = (unsigned long)reg_val;
else if (reg_id == REG_ID("DSISR"))
regs->dsisr = (unsigned long)reg_val;
}
static struct fadump_reg_entry*
fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
while (reg_entry->reg_id != REG_ID("CPUEND")) {
fadump_set_regval(regs, reg_entry->reg_id,
reg_entry->reg_value);
reg_entry++;
}
reg_entry++;
return reg_entry;
}
static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
void *data, size_t data_len)
{
struct elf_note note;
note.n_namesz = strlen(name) + 1;
note.n_descsz = data_len;
note.n_type = type;
memcpy(buf, &note, sizeof(note));
buf += (sizeof(note) + 3)/4;
memcpy(buf, name, note.n_namesz);
buf += (note.n_namesz + 3)/4;
memcpy(buf, data, note.n_descsz);
buf += (note.n_descsz + 3)/4;
return buf;
}
static void fadump_final_note(u32 *buf)
{
struct elf_note note;
note.n_namesz = 0;
note.n_descsz = 0;
note.n_type = 0;
memcpy(buf, &note, sizeof(note));
}
static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
memset(&prstatus, 0, sizeof(prstatus));
/*
* FIXME: How do i get PID? Do I really need it?
* prstatus.pr_pid = ????
*/
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
return buf;
}
static void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/* First note is a place holder for cpu notes info. */
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
phdr->p_paddr = fw_dump.cpu_notes_buf;
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
}
return;
}
static void *fadump_cpu_notes_buf_alloc(unsigned long size)
{
void *vaddr;
struct page *page;
unsigned long order, count, i;
order = get_order(size);
vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
if (!vaddr)
return NULL;
count = 1 << order;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
SetPageReserved(page + i);
return vaddr;
}
static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
{
struct page *page;
unsigned long order, count, i;
order = get_order(size);
count = 1 << order;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
ClearPageReserved(page + i);
__free_pages(page, order);
}
/*
* Read CPU state dump data and convert it into ELF notes.
* The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
* used to access the data to allow for additional fields to be added without
* affecting compatibility. Each list of registers for a CPU starts with
* "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
* 8 Byte ASCII identifier and 8 Byte register value. The register entry
* with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
* of register value. For more details refer to PAPR document.
*
* Only for the crashing cpu we ignore the CPU dump data and get exact
* state from fadump crash info structure populated by first kernel at the
* time of crash.
*/
static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
{
struct fadump_reg_save_area_header *reg_header;
struct fadump_reg_entry *reg_entry;
struct fadump_crash_info_header *fdh = NULL;
void *vaddr;
unsigned long addr;
u32 num_cpus, *note_buf;
struct pt_regs regs;
int i, rc = 0, cpu = 0;
if (!fdm->cpu_state_data.bytes_dumped)
return -EINVAL;
addr = fdm->cpu_state_data.destination_address;
vaddr = __va(addr);
reg_header = vaddr;
if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
printk(KERN_ERR "Unable to read register save area.\n");
return -ENOENT;
}
pr_debug("--------CPU State Data------------\n");
pr_debug("Magic Number: %llx\n", reg_header->magic_number);
pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
vaddr += reg_header->num_cpu_offset;
num_cpus = *((u32 *)(vaddr));
pr_debug("NumCpus : %u\n", num_cpus);
vaddr += sizeof(u32);
reg_entry = (struct fadump_reg_entry *)vaddr;
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
if (!note_buf) {
printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
"cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
fw_dump.cpu_notes_buf = __pa(note_buf);
pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
(num_cpus * sizeof(note_buf_t)), note_buf);
if (fw_dump.fadumphdr_addr)
fdh = __va(fw_dump.fadumphdr_addr);
for (i = 0; i < num_cpus; i++) {
if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
printk(KERN_ERR "Unable to read CPU state data\n");
rc = -ENOENT;
goto error_out;
}
/* Lower 4 bytes of reg_value contains logical cpu id */
cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
SKIP_TO_NEXT_CPU(reg_entry);
continue;
}
pr_debug("Reading register data for cpu %d...\n", cpu);
if (fdh && fdh->crashing_cpu == cpu) {
regs = fdh->regs;
note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
SKIP_TO_NEXT_CPU(reg_entry);
} else {
reg_entry++;
reg_entry = fadump_read_registers(reg_entry, &regs);
note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
}
}
fadump_final_note(note_buf);
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
fdh->elfcorehdr_addr);
fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
return 0;
error_out:
fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
fw_dump.cpu_notes_buf_size);
fw_dump.cpu_notes_buf = 0;
fw_dump.cpu_notes_buf_size = 0;
return rc;
}
/* /*
* Validate and process the dump data stored by firmware before exporting * Validate and process the dump data stored by firmware before exporting
* it through '/proc/vmcore'. * it through '/proc/vmcore'.
...@@ -400,18 +680,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) ...@@ -400,18 +680,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
static int __init process_fadump(const struct fadump_mem_struct *fdm_active) static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
{ {
struct fadump_crash_info_header *fdh; struct fadump_crash_info_header *fdh;
int rc = 0;
if (!fdm_active || !fw_dump.fadumphdr_addr) if (!fdm_active || !fw_dump.fadumphdr_addr)
return -EINVAL; return -EINVAL;
/* Check if the dump data is valid. */ /* Check if the dump data is valid. */
if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
(fdm_active->cpu_state_data.error_flags != 0) ||
(fdm_active->rmr_region.error_flags != 0)) { (fdm_active->rmr_region.error_flags != 0)) {
printk(KERN_ERR "Dump taken by platform is not valid\n"); printk(KERN_ERR "Dump taken by platform is not valid\n");
return -EINVAL; return -EINVAL;
} }
if (fdm_active->rmr_region.bytes_dumped != if ((fdm_active->rmr_region.bytes_dumped !=
fdm_active->rmr_region.source_len) { fdm_active->rmr_region.source_len) ||
!fdm_active->cpu_state_data.bytes_dumped) {
printk(KERN_ERR "Dump taken by platform is incomplete\n"); printk(KERN_ERR "Dump taken by platform is incomplete\n");
return -EINVAL; return -EINVAL;
} }
...@@ -423,6 +706,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) ...@@ -423,6 +706,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
return -EINVAL; return -EINVAL;
} }
rc = fadump_build_cpu_notes(fdm_active);
if (rc)
return rc;
/* /*
* We are done validating dump info and elfcore header is now ready * We are done validating dump info and elfcore header is now ready
* to be exported. set elfcorehdr_addr so that vmcore module will * to be exported. set elfcorehdr_addr so that vmcore module will
...@@ -537,6 +824,27 @@ static int fadump_create_elfcore_headers(char *bufp) ...@@ -537,6 +824,27 @@ static int fadump_create_elfcore_headers(char *bufp)
elf = (struct elfhdr *)bufp; elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr); bufp += sizeof(struct elfhdr);
/*
* setup ELF PT_NOTE, place holder for cpu notes info. The notes info
* will be populated during second kernel boot after crash. Hence
* this PT_NOTE will always be the first elf note.
*
* NOTE: Any new ELF note addition should be placed after this note.
*/
phdr = (struct elf_phdr *)bufp;
bufp += sizeof(struct elf_phdr);
phdr->p_type = PT_NOTE;
phdr->p_flags = 0;
phdr->p_vaddr = 0;
phdr->p_align = 0;
phdr->p_offset = 0;
phdr->p_paddr = 0;
phdr->p_filesz = 0;
phdr->p_memsz = 0;
(elf->e_phnum)++;
/* setup PT_LOAD sections. */ /* setup PT_LOAD sections. */
for (i = 0; i < crash_mem_ranges; i++) { for (i = 0; i < crash_mem_ranges; i++) {
...@@ -588,6 +896,8 @@ static unsigned long init_fadump_header(unsigned long addr) ...@@ -588,6 +896,8 @@ static unsigned long init_fadump_header(unsigned long addr)
memset(fdh, 0, sizeof(struct fadump_crash_info_header)); memset(fdh, 0, sizeof(struct fadump_crash_info_header));
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr; fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
fdh->crashing_cpu = CPU_UNKNOWN;
return addr; return addr;
} }
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <asm/xmon.h> #include <asm/xmon.h>
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#include <mm/mmu_decl.h> #include <mm/mmu_decl.h>
#include <asm/fadump.h>
#include "setup.h" #include "setup.h"
...@@ -639,6 +640,11 @@ EXPORT_SYMBOL(check_legacy_ioport); ...@@ -639,6 +640,11 @@ EXPORT_SYMBOL(check_legacy_ioport);
static int ppc_panic_event(struct notifier_block *this, static int ppc_panic_event(struct notifier_block *this,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
/*
* If firmware-assisted dump has been registered then trigger
* firmware-assisted dump and let firmware handle everything else.
*/
crash_fadump(NULL, ptr);
ppc_md.panic(ptr); /* May not return */ ppc_md.panic(ptr); /* May not return */
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <asm/kexec.h> #include <asm/kexec.h>
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/rio.h> #include <asm/rio.h>
#include <asm/fadump.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
int (*__debugger)(struct pt_regs *regs) __read_mostly; int (*__debugger)(struct pt_regs *regs) __read_mostly;
...@@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, ...@@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
arch_spin_unlock(&die_lock); arch_spin_unlock(&die_lock);
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
crash_fadump(regs, "die oops");
/* /*
* A system reset (0x100) is a request to dump, so we always send * A system reset (0x100) is a request to dump, so we always send
* it through the crashdump code. * it through the crashdump code.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment