Commit 55a7b212 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Will Deacon:

 - RAS reporting via GHES/APEI (ACPI)

 - Indirect ftrace trampolines for modules

 - Improvements to kernel fault reporting

 - Page poisoning

 - Sigframe cleanups and preparation for SVE context

 - Core dump fixes

 - Sparse fixes (mainly relating to endianness)

 - xgene SoC PMU v3 driver

 - Misc cleanups and non-critical fixes

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (75 commits)
  arm64: fix endianness annotation for 'struct jit_ctx' and friends
  arm64: cpuinfo: constify attribute_group structures.
  arm64: ptrace: Fix incorrect get_user() use in compat_vfp_set()
  arm64: ptrace: Remove redundant overrun check from compat_vfp_set()
  arm64: ptrace: Avoid setting compat FP[SC]R to garbage if get_user fails
  arm64: fix endianness annotation for __apply_alternatives()/get_alt_insn()
  arm64: fix endianness annotation in get_kaslr_seed()
  arm64: add missing conversion to __wsum in ip_fast_csum()
  arm64: fix endianness annotation in acpi_parking_protocol.c
  arm64: use readq() instead of readl() to read 64bit entry_point
  arm64: fix endianness annotation for reloc_insn_movw() & reloc_insn_imm()
  arm64: fix endianness annotation for aarch64_insn_write()
  arm64: fix endianness annotation in aarch64_insn_read()
  arm64: fix endianness annotation in call_undef_hook()
  arm64: fix endianness annotation for debug-monitors.c
  ras: mark stub functions as 'inline'
  arm64: pass endianness info to sparse
  arm64: ftrace: fix !CONFIG_ARM64_MODULE_PLTS kernels
  arm64: signal: Allow expansion of the signal frame
  acpi: apei: check for pending errors when probing GHES entries
  ...
parents e5f76a2e 425e1ed7
......@@ -187,6 +187,16 @@
#define FSC_FAULT (0x04)
#define FSC_ACCESS (0x08)
#define FSC_PERM (0x0c)
#define FSC_SEA (0x10)
#define FSC_SEA_TTW0 (0x14)
#define FSC_SEA_TTW1 (0x15)
#define FSC_SEA_TTW2 (0x16)
#define FSC_SEA_TTW3 (0x17)
#define FSC_SECC (0x18)
#define FSC_SECC_TTW0 (0x1c)
#define FSC_SECC_TTW1 (0x1d)
#define FSC_SECC_TTW2 (0x1e)
#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~0xf)
......
......@@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void);
extern unsigned int user_debug;
static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr)
{
return -1;
}
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_ARM_SYSTEM_MISC_H */
......@@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
static struct of_device_id armv6_pmu_of_device_ids[] = {
static const struct of_device_id armv6_pmu_of_device_ids[] = {
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
{.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
{.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
......
......@@ -3,6 +3,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_GTDT if ACPI
select ACPI_IORT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
......@@ -19,7 +20,9 @@ config ARM64
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
......@@ -93,6 +96,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
select HAVE_NMI if ACPI_APEI_SEA
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
......@@ -245,6 +249,9 @@ config PGTABLE_LEVELS
config ARCH_SUPPORTS_UPROBES
def_bool y
config ARCH_PROC_KCORE_TEXT
def_bool y
source "init/Kconfig"
source "kernel/Kconfig.freezer"
......@@ -983,7 +990,7 @@ config RANDOMIZE_BASE
config RANDOMIZE_MODULE_REGION_FULL
bool "Randomize the module region independently from the core kernel"
depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
depends on RANDOMIZE_BASE
default y
help
Randomizes the location of the module region without considering the
......
......@@ -52,17 +52,19 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
AS += -EB
LD += -EB
UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
CHECKFLAGS += -D__AARCH64EL__
AS += -EL
LD += -EL
UTS_MACHINE := aarch64
endif
CHECKFLAGS += -D__aarch64__
CHECKFLAGS += -D__aarch64__ -m64
ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
KBUILD_CFLAGS_MODULE += -mcmodel=large
......@@ -70,6 +72,9 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
endif
endif
# Default value
......
......@@ -42,7 +42,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
} while (--ihl);
sum += ((sum >> 32) | (sum << 32));
return csum_fold(sum >> 32);
return csum_fold((__force u32)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
......
......@@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev);
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
if (!dev)
return false;
return dev->archdata.dma_coherent;
}
......
......@@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
({ \
clear_bit(TIF_32BIT, &current->mm->context.flags); \
clear_thread_flag(TIF_32BIT); \
current->personality &= ~READ_IMPLIES_EXEC; \
})
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
......@@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
/*
* Unlike the native SET_PERSONALITY macro, the compat version inherits
* READ_IMPLIES_EXEC across a fork() since this is the behaviour on
* arch/arm/.
*/
#define COMPAT_SET_PERSONALITY(ex) \
({ \
set_bit(TIF_32BIT, &current->mm->context.flags); \
......
......@@ -83,6 +83,7 @@
#define ESR_ELx_WNR (UL(1) << 6)
/* Shared ISS field definitions for Data/Instruction aborts */
#define ESR_ELx_FnV (UL(1) << 10)
#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_S1PTW (UL(1) << 7)
......
......@@ -48,16 +48,16 @@ do { \
} while (0)
static inline int
futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oparg = (int)(encoded_op << 8) >> 20;
int cmparg = (int)(encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
oparg = 1U << (oparg & 0x1f);
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
......
......@@ -204,6 +204,16 @@
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
#define FSC_SEA ESR_ELx_FSC_EXTABT
#define FSC_SEA_TTW0 (0x14)
#define FSC_SEA_TTW1 (0x15)
#define FSC_SEA_TTW2 (0x16)
#define FSC_SEA_TTW3 (0x17)
#define FSC_SECC (0x18)
#define FSC_SECC_TTW0 (0x1c)
#define FSC_SECC_TTW1 (0x1d)
#define FSC_SECC_TTW2 (0x1e)
#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
......
......@@ -30,6 +30,9 @@ struct mod_plt_sec {
struct mod_arch_specific {
struct mod_plt_sec core;
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
void *ftrace_trampoline;
};
#endif
......
......@@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
#define pud_present(pud) (pud_val(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
......
......@@ -104,6 +104,9 @@ struct thread_struct {
#define task_user_tls(t) (&(t)->thread.tp_value)
#endif
/* Sync TPIDR_EL0 back to thread_struct for current */
void tls_preserve_current_state(void);
#define INIT_THREAD { }
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
......
......@@ -30,5 +30,6 @@ struct stackframe {
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
#endif /* __ASM_STACKTRACE_H */
......@@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
int sig, int code, const char *name);
struct mm_struct;
extern void show_pte(struct mm_struct *mm, unsigned long addr);
extern void show_pte(unsigned long addr);
extern void __show_regs(struct pt_regs *);
extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
......@@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
__show_ratelimited; \
})
int handle_guest_sea(phys_addr_t addr, unsigned int esr);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SYSTEM_MISC_H */
......@@ -33,6 +33,26 @@ struct sigcontext {
__u8 __reserved[4096] __attribute__((__aligned__(16)));
};
/*
* Allocation of __reserved[]:
* (Note: records do not necessarily occur in the order shown here.)
*
* size description
*
* 0x210 fpsimd_context
* 0x10 esr_context
* 0x20 extra_context (optional)
* 0x10 terminator (null _aarch64_ctx)
*
* 0xdb0 (reserved for future allocation)
*
* New records that can exceed this space need to be opt-in for userspace, so
* that an expanded signal frame is not generated unexpectedly. The mechanism
* for opting in will depend on the extension that generates each new record.
* The above table documents the maximum set and sizes of records than can be
* generated when userspace does not opt in for any such extension.
*/
/*
* Header to be used at the beginning of structures extending the user
* context. Such structures must be placed after the rt_sigframe on the stack
......@@ -61,4 +81,39 @@ struct esr_context {
__u64 esr;
};
/*
* extra_context: describes extra space in the signal frame for
* additional structures that don't fit in sigcontext.__reserved[].
*
* Note:
*
* 1) fpsimd_context, esr_context and extra_context must be placed in
* sigcontext.__reserved[] if present. They cannot be placed in the
* extra space. Any other record can be placed either in the extra
* space or in sigcontext.__reserved[], unless otherwise specified in
* this file.
*
* 2) There must not be more than one extra_context.
*
* 3) If extra_context is present, it must be followed immediately in
* sigcontext.__reserved[] by the terminating null _aarch64_ctx.
*
* 4) The extra space to which datap points must start at the first
* 16-byte aligned address immediately after the terminating null
* _aarch64_ctx that follows the extra_context structure in
* __reserved[]. The extra space may overrun the end of __reserved[],
* as indicated by a sufficiently large value for the size field.
*
* 5) The extra space must itself be terminated with a null
* _aarch64_ctx.
*/
#define EXTRA_MAGIC 0x45585401
struct extra_context {
struct _aarch64_ctx head;
__u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */
__u32 size; /* size in bytes of the extra space */
__u32 __reserved[3];
};
#endif /* _UAPI__ASM_SIGCONTEXT_H */
......@@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
# will be included by each individual module but not by the core kernel itself
extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
......@@ -71,7 +71,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
{
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox;
__le32 cpu_id;
u32 cpu_id;
/*
* Map mailbox memory with attribute device nGnRE (ie ioremap -
......@@ -123,9 +123,9 @@ static void acpi_parking_protocol_cpu_postboot(void)
int cpu = smp_processor_id();
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
__le64 entry_point;
u64 entry_point;
entry_point = readl_relaxed(&mailbox->entry_point);
entry_point = readq_relaxed(&mailbox->entry_point);
/*
* Check if firmware has cleared the entry_point as expected
* by the protocol specification.
......
......@@ -28,7 +28,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
......@@ -60,7 +60,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
......@@ -109,7 +109,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
u32 *origptr, *replptr, *updptr;
__le32 *origptr, *replptr, *updptr;
for (alt = region->begin; alt < region->end; alt++) {
u32 insn;
......@@ -124,7 +124,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
origptr = ALT_ORIG_PTR(alt);
replptr = ALT_REPL_PTR(alt);
updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr;
updptr = use_linear_alias ? lm_alias(origptr) : origptr;
nr_inst = alt->alt_len / sizeof(insn);
for (i = 0; i < nr_inst; i++) {
......
......@@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcaps);
static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
{
/* file-wide pr_fmt adds "CPU features: " prefix */
pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
return 0;
}
static struct notifier_block cpu_hwcaps_notifier = {
.notifier_call = dump_cpu_hwcaps
};
static int __init register_cpu_hwcaps_dumper(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
&cpu_hwcaps_notifier);
return 0;
}
__initcall(register_cpu_hwcaps_dumper);
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
......@@ -639,8 +658,10 @@ void update_cpu_features(int cpu,
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
"Unsupported CPU feature variation.\n");
if (taint) {
pr_warn_once("Unsupported CPU feature variation detected.\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
}
}
u64 read_sanitised_ftr_reg(u32 id)
......
......@@ -227,7 +227,7 @@ static struct attribute *cpuregs_id_attrs[] = {
NULL
};
static struct attribute_group cpuregs_attr_group = {
static const struct attribute_group cpuregs_attr_group = {
.attrs = cpuregs_id_attrs,
.name = "identification"
};
......
......@@ -341,20 +341,22 @@ int aarch32_break_handler(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
get_user(thumb_instr, (u16 __user *)pc);
thumb_instr = le16_to_cpu(thumb_instr);
__le16 instr;
get_user(instr, (__le16 __user *)pc);
thumb_instr = le16_to_cpu(instr);
if (thumb_instr == AARCH32_BREAK_THUMB2_LO) {
/* get second half of 32-bit Thumb-2 instruction */
get_user(thumb_instr, (u16 __user *)(pc + 2));
thumb_instr = le16_to_cpu(thumb_instr);
get_user(instr, (__le16 __user *)(pc + 2));
thumb_instr = le16_to_cpu(instr);
bp = thumb_instr == AARCH32_BREAK_THUMB2_HI;
} else {
bp = thumb_instr == AARCH32_BREAK_THUMB;
}
} else {
/* 32-bit ARM instruction */
get_user(arm_instr, (u32 __user *)pc);
arm_instr = le32_to_cpu(arm_instr);
__le32 instr;
get_user(instr, (__le32 __user *)pc);
arm_instr = le32_to_cpu(instr);
bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM;
}
......
/*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.section ".text.ftrace_trampoline", "ax"
.align 3
0: .quad 0
__ftrace_trampoline:
ldr x16, 0b
br x16
ENDPROC(__ftrace_trampoline)
......@@ -10,10 +10,12 @@
*/
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/swab.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
......@@ -70,6 +72,58 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS
unsigned long *trampoline;
struct module *mod;
/*
* On kernels that support module PLTs, the offset between the
* branch instruction and its target may legally exceed the
* range of an ordinary relative 'bl' opcode. In this case, we
* need to branch via a trampoline in the module.
*
* NOTE: __module_text_address() must be called with preemption
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
if (WARN_ON(!mod))
return -EINVAL;
/*
* There is only one ftrace trampoline per module. For now,
* this is not a problem since on arm64, all dynamic ftrace
* invocations are routed via ftrace_caller(). This will need
* to be revisited if support for multiple ftrace entry points
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
if (trampoline[0] != addr) {
if (trampoline[0] != 0) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
/* point the trampoline to our ftrace entry point */
module_disable_ro(mod);
trampoline[0] = addr;
module_enable_ro(mod, true);
/* update trampoline before patching in the branch */
smp_wmb();
}
addr = (unsigned long)&trampoline[1];
#else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */
}
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
......@@ -84,12 +138,55 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
bool validate = true;
u32 old = 0, new;
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS
u32 replaced;
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*/
if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
if (WARN_ON(!mod))
return -EINVAL;
}
/*
* The instruction we are about to patch may be a branch and
* link instruction that was redirected via a PLT entry. In
* this case, the normal validation will fail, but we can at
* least check that we are dealing with a branch and link
* instruction that points into the right module.
*/
if (aarch64_insn_read((void *)pc, &replaced))
return -EFAULT;
if (!aarch64_insn_is_bl(replaced) ||
!within_module(pc + aarch64_get_branch_offset(replaced),
mod))
return -EINVAL;
validate = false;
#else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */
} else {
old = aarch64_insn_gen_branch_imm(pc, addr,
AARCH64_INSN_BRANCH_LINK);
}
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
return ftrace_modify_code(pc, old, new, true);
return ftrace_modify_code(pc, old, new, validate);
}
void arch_ftrace_update_code(int command)
......
......@@ -117,7 +117,7 @@ static void __kprobes patch_unmap(int fixmap)
int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
{
int ret;
u32 val;
__le32 val;
ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
if (!ret)
......@@ -126,7 +126,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
{
void *waddr = addr;
unsigned long flags = 0;
......@@ -145,8 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
insn = cpu_to_le32(insn);
return __aarch64_insn_write(addr, insn);
return __aarch64_insn_write(addr, cpu_to_le32(insn));
}
static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
......
......@@ -27,7 +27,7 @@ u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
{
int node, len;
u64 *prop;
fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
......
......@@ -74,7 +74,7 @@ enum aarch64_reloc_op {
RELOC_OP_PAGE,
};
static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
{
switch (reloc_op) {
case RELOC_OP_ABS:
......@@ -121,12 +121,12 @@ enum aarch64_insn_movw_imm_type {
AARCH64_INSN_IMM_MOVKZ,
};
static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, enum aarch64_insn_movw_imm_type imm_type)
{
u64 imm;
s64 sval;
u32 insn = le32_to_cpu(*(u32 *)place);
u32 insn = le32_to_cpu(*place);
sval = do_reloc(op, place, val);
imm = sval >> lsb;
......@@ -154,7 +154,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
*(u32 *)place = cpu_to_le32(insn);
*place = cpu_to_le32(insn);
if (imm > U16_MAX)
return -ERANGE;
......@@ -162,12 +162,12 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
return 0;
}
static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, int len, enum aarch64_insn_imm_type imm_type)
{
u64 imm, imm_mask;
s64 sval;
u32 insn = le32_to_cpu(*(u32 *)place);
u32 insn = le32_to_cpu(*place);
/* Calculate the relocation value. */
sval = do_reloc(op, place, val);
......@@ -179,7 +179,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
*(u32 *)place = cpu_to_le32(insn);
*place = cpu_to_le32(insn);
/*
* Extract the upper value bits (including the sign bit) and
......@@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
apply_alternatives((void *)s->sh_addr, s->sh_size);
return 0;
}
#ifdef CONFIG_ARM64_MODULE_PLTS
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
!strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
me->arch.ftrace_trampoline = (void *)s->sh_addr;
#endif
}
return 0;
......
......@@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
if (!acpi_disabled) {
struct pci_config_window *cfg = bridge->bus->sysdata;
struct acpi_device *adev = to_acpi_device(cfg->parent);
struct device *bus_dev = &bridge->bus->dev;
ACPI_COMPANION_SET(&bridge->dev, adev);
set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
}
return 0;
......
......@@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};
PMU_FORMAT_ATTR(event, "config:0-9");
PMU_FORMAT_ATTR(event, "config:0-15");
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
......
......@@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
pr_err("current sp %lx does not match saved sp %lx\n",
orig_sp, stack_addr);
pr_err("Saved registers for jprobe %p\n", jp);
show_regs(saved_regs);
__show_regs(saved_regs);
pr_err("Current registers\n");
show_regs(regs);
__show_regs(regs);
BUG();
}
unpause_graph_tracing();
......
......@@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs)
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL);
}
static void tls_thread_flush(void)
......@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
return 0;
}
void tls_preserve_current_state(void)
{
*task_user_tls(current) = read_sysreg(tpidr_el0);
}
static void tls_thread_switch(struct task_struct *next)
{
unsigned long tpidr, tpidrro;
tpidr = read_sysreg(tpidr_el0);
*task_user_tls(current) = tpidr;
tls_preserve_current_state();
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
......
......@@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
{
struct user_fpsimd_state *uregs;
uregs = &target->thread.fpsimd_state.user_fpsimd;
if (target == current)
fpsimd_preserve_current_state();
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
}
......@@ -648,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset,
void *kbuf, void __user *ubuf)
{
unsigned long *tls = &target->thread.tp_value;
if (target == current)
tls_preserve_current_state();
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
}
......@@ -894,21 +902,27 @@ static int compat_vfp_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
int ret;
int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
if (target == current)
fpsimd_preserve_current_state();
/*
* The VFP registers are packed into the fpsimd_state, so they all sit
* nicely together for us. We just need to create the fpscr separately.
*/
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0,
VFP_STATE_SIZE - sizeof(compat_ulong_t));
vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
0, vregs_end_pos);
if (count && !ret) {
fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
(uregs->fpcr & VFP_FPSCR_CTRL_MASK);
ret = put_user(fpscr, (compat_ulong_t *)ubuf);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr,
vregs_end_pos, VFP_STATE_SIZE);
}
return ret;
......@@ -921,20 +935,21 @@ static int compat_vfp_set(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
int ret;
if (pos + count > VFP_STATE_SIZE)
return -EIO;
int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0,
VFP_STATE_SIZE - sizeof(compat_ulong_t));
vregs_end_pos);
if (count && !ret) {
ret = get_user(fpscr, (compat_ulong_t *)ubuf);
uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpscr,
vregs_end_pos, VFP_STATE_SIZE);
if (!ret) {
uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
}
}
fpsimd_flush_task_state(target);
......
......@@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
}
name = of_flat_dt_get_machine_name();
if (!name)
return;
pr_info("Machine model: %s\n", name);
dump_stack_set_arch_desc("%s (DT)", name);
}
......
......@@ -19,10 +19,14 @@
#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/freezer.h>
#include <linux/stddef.h>
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
......@@ -41,10 +45,133 @@
struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
};
struct frame_record {
u64 fp;
u64 lr;
};
struct rt_sigframe_user_layout {
struct rt_sigframe __user *sigframe;
struct frame_record __user *next_frame;
unsigned long size; /* size of allocated sigframe data */
unsigned long limit; /* largest allowed size */
unsigned long fpsimd_offset;
unsigned long esr_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
const size_t reserved_size =
sizeof(user->sigframe->uc.uc_mcontext.__reserved);
memset(user, 0, sizeof(*user));
user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved);
user->limit = user->size + reserved_size;
user->limit -= TERMINATOR_SIZE;
user->limit -= EXTRA_CONTEXT_SIZE;
/* Reserve space for extension and terminator ^ */
}
static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
{
return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
}
/*
* Sanity limit on the approximate maximum size of signal frame we'll
* try to generate. Stack alignment padding and the frame record are
* not taken into account. This limit is not a guarantee and is
* NOT ABI.
*/
#define SIGFRAME_MAXSZ SZ_64K
static int __sigframe_alloc(struct rt_sigframe_user_layout *user,
unsigned long *offset, size_t size, bool extend)
{
size_t padded_size = round_up(size, 16);
if (padded_size > user->limit - user->size &&
!user->extra_offset &&
extend) {
int ret;
user->limit += EXTRA_CONTEXT_SIZE;
ret = __sigframe_alloc(user, &user->extra_offset,
sizeof(struct extra_context), false);
if (ret) {
user->limit -= EXTRA_CONTEXT_SIZE;
return ret;
}
/* Reserve space for the __reserved[] terminator */
user->size += TERMINATOR_SIZE;
/*
* Allow expansion up to SIGFRAME_MAXSZ, ensuring space for
* the terminator:
*/
user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE;
}
/* Still not enough space? Bad luck! */
if (padded_size > user->limit - user->size)
return -ENOMEM;
*offset = user->size;
user->size += padded_size;
return 0;
}
/*
* Allocate space for an optional record of <size> bytes in the user
* signal frame. The offset from the signal frame base address to the
* allocated block is assigned to *offset.
*/
static int sigframe_alloc(struct rt_sigframe_user_layout *user,
unsigned long *offset, size_t size)
{
return __sigframe_alloc(user, offset, size, true);
}
/* Allocate the null terminator record and prevent further allocations */
static int sigframe_alloc_end(struct rt_sigframe_user_layout *user)
{
int ret;
/* Un-reserve the space reserved for the terminator: */
user->limit += TERMINATOR_SIZE;
ret = sigframe_alloc(user, &user->end_offset,
sizeof(struct _aarch64_ctx));
if (ret)
return ret;
/* Prevent further allocation: */
user->limit = user->size;
return 0;
}
static void __user *apply_user_offset(
struct rt_sigframe_user_layout const *user, unsigned long offset)
{
char __user *base = (char __user *)user->sigframe;
return base + offset;
}
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
......@@ -92,12 +219,159 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
struct user_ctxs {
struct fpsimd_context __user *fpsimd;
};
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
struct sigcontext __user *const sc = &sf->uc.uc_mcontext;
struct _aarch64_ctx __user *head;
char __user *base = (char __user *)&sc->__reserved;
size_t offset = 0;
size_t limit = sizeof(sc->__reserved);
bool have_extra_context = false;
char const __user *const sfp = (char const __user *)sf;
user->fpsimd = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
while (1) {
int err = 0;
u32 magic, size;
char const __user *userp;
struct extra_context const __user *extra;
u64 extra_datap;
u32 extra_size;
struct _aarch64_ctx const __user *end;
u32 end_magic, end_size;
if (limit - offset < sizeof(*head))
goto invalid;
if (!IS_ALIGNED(offset, 16))
goto invalid;
head = (struct _aarch64_ctx __user *)(base + offset);
__get_user_error(magic, &head->magic, err);
__get_user_error(size, &head->size, err);
if (err)
return err;
if (limit - offset < size)
goto invalid;
switch (magic) {
case 0:
if (size)
goto invalid;
goto done;
case FPSIMD_MAGIC:
if (user->fpsimd)
goto invalid;
if (size < sizeof(*user->fpsimd))
goto invalid;
user->fpsimd = (struct fpsimd_context __user *)head;
break;
case ESR_MAGIC:
/* ignore */
break;
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
if (size < sizeof(*extra))
goto invalid;
userp = (char const __user *)head;
extra = (struct extra_context const __user *)userp;
userp += size;
__get_user_error(extra_datap, &extra->datap, err);
__get_user_error(extra_size, &extra->size, err);
if (err)
return err;
/* Check for the dummy terminator in __reserved[]: */
if (limit - offset - size < TERMINATOR_SIZE)
goto invalid;
end = (struct _aarch64_ctx const __user *)userp;
userp += TERMINATOR_SIZE;
__get_user_error(end_magic, &end->magic, err);
__get_user_error(end_size, &end->size, err);
if (err)
return err;
if (end_magic || end_size)
goto invalid;
/* Prevent looping/repeated parsing of extra_context */
have_extra_context = true;
base = (__force void __user *)extra_datap;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
if (!IS_ALIGNED(extra_size, 16))
goto invalid;
if (base != userp)
goto invalid;
/* Reject "unreasonably large" frames: */
if (extra_size > sfp + SIGFRAME_MAXSZ - userp)
goto invalid;
/*
* Ignore trailing terminator in __reserved[]
* and start parsing extra data:
*/
offset = 0;
limit = extra_size;
continue;
default:
goto invalid;
}
if (size < sizeof(*head))
goto invalid;
if (limit - offset < size)
goto invalid;
offset += size;
}
done:
if (!user->fpsimd)
goto invalid;
return 0;
invalid:
return -EINVAL;
}
static int restore_sigframe(struct pt_regs *regs,
struct rt_sigframe __user *sf)
{
sigset_t set;
int i, err;
void *aux = sf->uc.uc_mcontext.__reserved;
struct user_ctxs user;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
......@@ -116,12 +390,11 @@ static int restore_sigframe(struct pt_regs *regs,
regs->syscallno = ~0UL;
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
err = parse_user_sigframe(&user, sf);
if (err == 0) {
struct fpsimd_context *fpsimd_ctx =
container_of(aux, struct fpsimd_context, head);
err |= restore_fpsimd_context(fpsimd_ctx);
}
if (err == 0)
err = restore_fpsimd_context(user.fpsimd);
return err;
}
......@@ -162,16 +435,37 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return 0;
}
static int setup_sigframe(struct rt_sigframe __user *sf,
/* Determine the layout of optional records in the signal frame */
static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
{
int err;
err = sigframe_alloc(user, &user->fpsimd_offset,
sizeof(struct fpsimd_context));
if (err)
return err;
/* fault information, if valid */
if (current->thread.fault_code) {
err = sigframe_alloc(user, &user->esr_offset,
sizeof(struct esr_context));
if (err)
return err;
}
return sigframe_alloc_end(user);
}
static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
void *aux = sf->uc.uc_mcontext.__reserved;
struct _aarch64_ctx *end;
struct rt_sigframe __user *sf = user->sigframe;
/* set up the stack frame for unwinding */
__put_user_error(regs->regs[29], &sf->fp, err);
__put_user_error(regs->regs[30], &sf->lr, err);
__put_user_error(regs->regs[29], &user->next_frame->fp, err);
__put_user_error(regs->regs[30], &user->next_frame->lr, err);
for (i = 0; i < 31; i++)
__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
......@@ -185,58 +479,103 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
if (err == 0) {
struct fpsimd_context *fpsimd_ctx =
container_of(aux, struct fpsimd_context, head);
struct fpsimd_context __user *fpsimd_ctx =
apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
aux += sizeof(*fpsimd_ctx);
}
/* fault information, if valid */
if (current->thread.fault_code) {
struct esr_context *esr_ctx =
container_of(aux, struct esr_context, head);
if (err == 0 && user->esr_offset) {
struct esr_context __user *esr_ctx =
apply_user_offset(user, user->esr_offset);
__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
aux += sizeof(*esr_ctx);
}
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
apply_user_offset(user, user->extra_offset);
struct extra_context __user *extra;
struct _aarch64_ctx __user *end;
u64 extra_datap;
u32 extra_size;
extra = (struct extra_context __user *)userp;
userp += EXTRA_CONTEXT_SIZE;
end = (struct _aarch64_ctx __user *)userp;
userp += TERMINATOR_SIZE;
/*
* extra_datap is just written to the signal frame.
* The value gets cast back to a void __user *
* during sigreturn.
*/
extra_datap = (__force u64)userp;
extra_size = sfp + round_up(user->size, 16) - userp;
__put_user_error(EXTRA_MAGIC, &extra->head.magic, err);
__put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err);
__put_user_error(extra_datap, &extra->datap, err);
__put_user_error(extra_size, &extra->size, err);
/* Add the terminator */
__put_user_error(0, &end->magic, err);
__put_user_error(0, &end->size, err);
}
/* set the "end" magic */
end = aux;
__put_user_error(0, &end->magic, err);
__put_user_error(0, &end->size, err);
if (err == 0) {
struct _aarch64_ctx __user *end =
apply_user_offset(user, user->end_offset);
__put_user_error(0, &end->magic, err);
__put_user_error(0, &end->size, err);
}
return err;
}
static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig,
struct pt_regs *regs)
static int get_sigframe(struct rt_sigframe_user_layout *user,
struct ksignal *ksig, struct pt_regs *regs)
{
unsigned long sp, sp_top;
struct rt_sigframe __user *frame;
int err;
init_user_layout(user);
err = setup_sigframe_layout(user);
if (err)
return err;
sp = sp_top = sigsp(regs->sp, ksig);
sp = (sp - sizeof(struct rt_sigframe)) & ~15;
frame = (struct rt_sigframe __user *)sp;
sp = round_down(sp - sizeof(struct frame_record), 16);
user->next_frame = (struct frame_record __user *)sp;
sp = round_down(sp, 16) - sigframe_size(user);
user->sigframe = (struct rt_sigframe __user *)sp;
/*
* Check that we can actually write to the signal frame.
*/
if (!access_ok(VERIFY_WRITE, frame, sp_top - sp))
frame = NULL;
if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp))
return -EFAULT;
return frame;
return 0;
}
static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
void __user *frame, int usig)
struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
regs->regs[0] = usig;
regs->sp = (unsigned long)frame;
regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
if (ka->sa.sa_flags & SA_RESTORER)
......@@ -250,20 +589,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
int err = 0;
frame = get_sigframe(ksig, regs);
if (!frame)
if (get_sigframe(&user, ksig, regs))
return 1;
frame = user.sigframe;
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
err |= setup_sigframe(frame, regs, set);
err |= setup_sigframe(&user, regs, set);
if (err == 0) {
setup_return(regs, &ksig->ka, frame, usig);
setup_return(regs, &ksig->ka, &user, usig);
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
regs->regs[1] = (unsigned long)&frame->info;
......
......@@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
......
......@@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
}
}
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
unsigned long irq_stack_ptr;
......@@ -344,22 +344,24 @@ static int call_undef_hook(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* 16-bit Thumb instruction */
if (get_user(instr, (u16 __user *)pc))
__le16 instr_le;
if (get_user(instr_le, (__le16 __user *)pc))
goto exit;
instr = le16_to_cpu(instr);
instr = le16_to_cpu(instr_le);
if (aarch32_insn_is_wide(instr)) {
u32 instr2;
if (get_user(instr2, (u16 __user *)(pc + 2)))
if (get_user(instr_le, (__le16 __user *)(pc + 2)))
goto exit;
instr2 = le16_to_cpu(instr2);
instr2 = le16_to_cpu(instr_le);
instr = (instr << 16) | instr2;
}
} else {
/* 32-bit ARM instruction */
if (get_user(instr, (u32 __user *)pc))
__le32 instr_le;
if (get_user(instr_le, (__le32 __user *)pc))
goto exit;
instr = le32_to_cpu(instr);
instr = le32_to_cpu(instr_le);
}
raw_spin_lock_irqsave(&undef_lock, flags);
......@@ -728,8 +730,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
break;
case BUG_TRAP_TYPE_WARN:
/* Ideally, report_bug() should backtrace for us... but no. */
dump_backtrace(regs, NULL);
break;
default:
......
......@@ -37,7 +37,7 @@
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
extern char vdso_start, vdso_end;
extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
/*
......@@ -125,14 +125,14 @@ static int __init vdso_init(void)
struct page **vdso_pagelist;
unsigned long pfn;
if (memcmp(&vdso_start, "\177ELF", 4)) {
if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
......@@ -145,7 +145,7 @@ static int __init vdso_init(void)
/* Grab the vDSO code pages. */
pfn = sym_to_pfn(&vdso_start);
pfn = sym_to_pfn(vdso_start);
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
......
......@@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
return NULL;
}
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
......@@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size,
bool freed;
phys_addr_t paddr = dma_to_phys(dev, dma_handle);
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
return;
}
freed = dma_release_from_contiguous(dev,
phys_to_page(paddr),
......
......@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/preempt.h>
#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
......@@ -42,6 +43,8 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <acpi/ghes.h>
struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
......@@ -80,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
* Dump out the page tables associated with 'addr' in mm 'mm'.
* Dump out the page tables associated with 'addr' in the currently active mm.
*/
void show_pte(struct mm_struct *mm, unsigned long addr)
void show_pte(unsigned long addr)
{
struct mm_struct *mm;
pgd_t *pgd;
if (!mm)
if (addr < TASK_SIZE) {
/* TTBR0 */
mm = current->active_mm;
if (mm == &init_mm) {
pr_alert("[%016lx] user address but active_mm is swapper\n",
addr);
return;
}
} else if (addr >= VA_START) {
/* TTBR1 */
mm = &init_mm;
} else {
pr_alert("[%016lx] address between user and kernel address ranges\n",
addr);
return;
}
pr_alert("pgd = %p\n", mm->pgd);
pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
......@@ -196,8 +216,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
/*
* The kernel tried to access some page that wasn't present.
*/
static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
unsigned int esr, struct pt_regs *regs)
static void __do_kernel_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const char *msg;
......@@ -227,7 +247,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
show_pte(mm, addr);
show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
......@@ -239,18 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
struct pt_regs *regs)
struct pt_regs *regs, int fault)
{
struct siginfo si;
const struct fault_info *inf;
unsigned int lsb = 0;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
inf = esr_to_fault_info(esr);
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
show_pte(tsk->mm, addr);
show_regs(regs);
print_vma_addr(KERN_CONT ", in ", regs->pc);
pr_cont("\n");
__show_regs(regs);
}
tsk->thread.fault_address = addr;
......@@ -259,13 +281,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
/*
* Either small page or large page may be poisoned.
* In other words, VM_FAULT_HWPOISON_LARGE and
* VM_FAULT_HWPOISON are mutually exclusive.
*/
if (fault & VM_FAULT_HWPOISON_LARGE)
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
else if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
si.si_addr_lsb = lsb;
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->active_mm;
const struct fault_info *inf;
/*
......@@ -274,9 +306,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
inf = esr_to_fault_info(esr);
__do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
__do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
} else
__do_kernel_fault(mm, addr, esr, regs);
__do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
......@@ -329,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
int fault, sig, code, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
......@@ -368,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
......@@ -391,24 +425,42 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
major |= fault & VM_FAULT_MAJOR;
/*
* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_sem because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return 0;
if (fault & VM_FAULT_RETRY) {
/*
* If we need to retry but a fatal signal is pending,
* handle the signal first. We do not need to release
* the mmap_sem because it would already be released
* in __lock_page_or_retry in mm/filemap.c.
*/
if (fatal_signal_pending(current))
return 0;
/*
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
* starvation.
*/
if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
up_read(&mm->mmap_sem);
/*
* Major/minor page fault accounting is only done on the initial
* attempt. If we go through a retry, it is extremely likely that the
* page will be found in page cache at that point.
* Handle the "normal" (no error) case first.
*/
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR) {
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS)))) {
/*
* Major/minor page fault accounting is only done
* once. If we go through a retry, it is extremely
* likely that the page will be found in page cache at
* that point.
*/
if (major) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
......@@ -417,25 +469,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
if (fault & VM_FAULT_RETRY) {
/*
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
* starvation.
*/
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
up_read(&mm->mmap_sem);
/*
* Handle the "normal" case first - VM_FAULT_MAJOR
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS))))
return 0;
}
/*
* If we are in kernel mode at this point, we have no context to
......@@ -461,6 +497,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
*/
sig = SIGBUS;
code = BUS_ADRERR;
} else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
sig = SIGBUS;
code = BUS_MCEERR_AR;
} else {
/*
* Something tried to access memory that isn't in our memory
......@@ -471,11 +510,11 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
SEGV_ACCERR : SEGV_MAPERR;
}
__do_user_fault(tsk, addr, esr, sig, code, regs);
__do_user_fault(tsk, addr, esr, sig, code, regs, fault);
return 0;
no_context:
__do_kernel_fault(mm, addr, esr, regs);
__do_kernel_fault(addr, esr, regs);
return 0;
}
......@@ -522,6 +561,47 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
/*
* This abort handler deals with Synchronous External Abort.
* It calls notifiers, and then returns "fault".
*/
static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const struct fault_info *inf;
int ret = 0;
inf = esr_to_fault_info(esr);
pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
/*
* Synchronous aborts may interrupt code which had interrupts masked.
* Before calling out into the wider kernel tell the interested
* subsystems.
*/
if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
if (interrupts_enabled(regs))
nmi_enter();
ret = ghes_notify_sea();
if (interrupts_enabled(regs))
nmi_exit();
}
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = 0;
if (esr & ESR_ELx_FnV)
info.si_addr = NULL;
else
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, esr);
return ret;
}
static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
......@@ -539,22 +619,22 @@ static const struct fault_info fault_info[] = {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
{ do_sea, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_sea, SIGBUS, 0, "level 0 (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
{ do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
......@@ -589,6 +669,23 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "unknown 63" },
};
/*
* Handle Synchronous External Aborts that occur in a guest kernel.
*
* The return value will be zero if the SEA was successfully handled
* and non-zero if there was an error processing the error or there was
* no error to process.
*/
int handle_guest_sea(phys_addr_t addr, unsigned int esr)
{
int ret = -ENOENT;
if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
ret = ghes_notify_sea();
return ret;
}
/*
* Dispatch a data abort to the relevant handler.
*/
......
......@@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
pte_t *pte = NULL;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
if (!pgd_present(*pgd))
return NULL;
pud = pud_offset(pgd, addr);
if (!pud_present(*pud))
if (pud_none(*pud))
return NULL;
if (pud_huge(*pud))
/* swap or huge page */
if (!pud_present(*pud) || pud_huge(*pud))
return (pte_t *)pud;
/* table; check the next level */
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd))
if (pmd_none(*pmd))
return NULL;
if (pte_cont(pmd_pte(*pmd))) {
pmd = pmd_offset(
pud, (addr & CONT_PMD_MASK));
return (pte_t *)pmd;
}
if (pmd_huge(*pmd))
if (!pmd_present(*pmd) || pmd_huge(*pmd))
return (pte_t *)pmd;
pte = pte_offset_kernel(pmd, addr);
if (pte_present(*pte) && pte_cont(*pte)) {
pte = pte_offset_kernel(
pmd, (addr & CONT_PTE_MASK));
return pte;
}
return NULL;
}
......
......@@ -18,6 +18,7 @@
#include <linux/elf.h>
#include <linux/fs.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/export.h>
......@@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
if (addr < PHYS_OFFSET)
return 0;
if (addr + size > __pa(high_memory - 1) + 1)
return 0;
return 1;
/*
* Check whether addr is covered by a memory region without the
* MEMBLOCK_NOMAP attribute, and whether that region covers the
* entire range. In theory, this could lead to false negatives
* if the range is covered by distinct but adjacent memory regions
* that only differ in other attributes. However, few of such
* attributes have been defined, and it is debatable whether it
* follows that /dev/mem read() calls should be able traverse
* such boundaries.
*/
return memblock_is_region_memory(addr, size) &&
memblock_is_map_memory(addr);
}
/*
......
......@@ -31,6 +31,7 @@
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
......
......@@ -70,7 +70,7 @@ struct jit_ctx {
int idx;
int epilogue_offset;
int *offset;
u32 *image;
__le32 *image;
u32 stack_size;
};
......@@ -131,7 +131,7 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from,
static void jit_fill_hole(void *area, unsigned int size)
{
u32 *ptr;
__le32 *ptr;
/* We are guaranteed to have aligned memory. */
for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
......@@ -874,7 +874,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */
ctx.image = (u32 *)image_ptr;
ctx.image = (__le32 *)image_ptr;
ctx.idx = 0;
build_prologue(&ctx);
......
......@@ -39,6 +39,21 @@ config ACPI_APEI_PCIEAER
PCIe AER errors may be reported via APEI firmware first mode.
Turn on this option to enable the corresponding support.
config ACPI_APEI_SEA
bool "APEI Synchronous External Abort logging/recovering support"
depends on ARM64 && ACPI_APEI_GHES
default y
help
This option should be enabled if the system supports
firmware first handling of SEA (Synchronous External Abort).
SEA happens with certain faults of data abort or instruction
abort synchronous exceptions on ARMv8 systems. If a system
supports firmware first handling of SEA, the platform analyzes
and handles hardware error notifications from SEA, and it may then
form a HW error record for the OS to parse and handle. This
option allows the OS to look for such hardware error record, and
take appropriate action.
config ACPI_APEI_MEMORY_FAILURE
bool "APEI memory error recovering support"
depends on ACPI_APEI && MEMORY_FAILURE
......
......@@ -45,10 +45,14 @@
#include <linux/aer.h>
#include <linux/nmi.h>
#include <linux/sched/clock.h>
#include <linux/uuid.h>
#include <linux/ras.h>
#include <acpi/actbl1.h>
#include <acpi/ghes.h>
#include <acpi/apei.h>
#include <asm/tlbflush.h>
#include <ras/ras_event.h>
#include "apei-internal.h"
......@@ -80,6 +84,11 @@
((struct acpi_hest_generic_status *) \
((struct ghes_estatus_node *)(estatus_node) + 1))
static inline bool is_hest_type_generic_v2(struct ghes *ghes)
{
return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
}
/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
......@@ -110,11 +119,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
* Two virtual pages are used, one for IRQ/PROCESS context, the other for
* NMI context (optionally).
*/
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
#define GHES_IOREMAP_PAGES 2
#else
#define GHES_IOREMAP_PAGES 1
#endif
#define GHES_IOREMAP_IRQ_PAGE(base) (base)
#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
......@@ -133,6 +138,8 @@ static unsigned long ghes_estatus_pool_size_request;
static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
static int ghes_panic_timeout __read_mostly = 30;
static int ghes_ioremap_init(void)
{
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
......@@ -153,10 +160,14 @@ static void ghes_ioremap_exit(void)
static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
{
unsigned long vaddr;
phys_addr_t paddr;
pgprot_t prot;
vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
pfn << PAGE_SHIFT, PAGE_KERNEL);
paddr = pfn << PAGE_SHIFT;
prot = arch_apei_get_mem_attribute(paddr);
ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
return (void __iomem *)vaddr;
}
......@@ -240,6 +251,16 @@ static int ghes_estatus_pool_expand(unsigned long len)
return 0;
}
static int map_gen_v2(struct ghes *ghes)
{
return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
}
static void unmap_gen_v2(struct ghes *ghes)
{
apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
}
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
......@@ -249,10 +270,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
if (!ghes)
return ERR_PTR(-ENOMEM);
ghes->generic = generic;
if (is_hest_type_generic_v2(ghes)) {
rc = map_gen_v2(ghes);
if (rc)
goto err_free;
}
rc = apei_map_generic_address(&generic->error_status_address);
if (rc)
goto err_free;
goto err_unmap_read_ack_addr;
error_block_length = generic->error_block_length;
if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
pr_warning(FW_WARN GHES_PFX
......@@ -264,13 +292,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
if (!ghes->estatus) {
rc = -ENOMEM;
goto err_unmap;
goto err_unmap_status_addr;
}
return ghes;
err_unmap:
err_unmap_status_addr:
apei_unmap_generic_address(&generic->error_status_address);
err_unmap_read_ack_addr:
if (is_hest_type_generic_v2(ghes))
unmap_gen_v2(ghes);
err_free:
kfree(ghes);
return ERR_PTR(rc);
......@@ -280,6 +311,8 @@ static void ghes_fini(struct ghes *ghes)
{
kfree(ghes->estatus);
apei_unmap_generic_address(&ghes->generic->error_status_address);
if (is_hest_type_generic_v2(ghes))
unmap_gen_v2(ghes);
}
static inline int ghes_severity(int severity)
......@@ -400,8 +433,7 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
unsigned long pfn;
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
......@@ -432,14 +464,22 @@ static void ghes_do_proc(struct ghes *ghes,
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
guid_t *sec_type;
guid_t *fru_id = &NULL_UUID_LE;
char *fru_text = "";
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
sec_type = (guid_t *)gdata->section_type;
sec_sev = ghes_severity(gdata->error_severity);
if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
fru_id = (guid_t *)gdata->fru_id;
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
fru_text = gdata->fru_text;
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata+1);
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
ghes_edac_report_mem_error(ghes, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
......@@ -447,8 +487,8 @@ static void ghes_do_proc(struct ghes *ghes,
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err;
pcie_err = (struct cper_sec_pcie *)(gdata+1);
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
......@@ -477,6 +517,17 @@ static void ghes_do_proc(struct ghes *ghes,
}
#endif
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
log_arm_hw_error(err);
} else {
void *err = acpi_hest_get_payload(gdata);
log_non_standard_event(sec_type, fru_id, fru_text,
sec_sev, err,
gdata->error_data_length);
}
}
}
......@@ -649,6 +700,31 @@ static void ghes_estatus_cache_add(
rcu_read_unlock();
}
static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
{
int rc;
u64 val = 0;
rc = apei_read(&val, &gv2->read_ack_register);
if (rc)
return rc;
val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
return apei_write(val, &gv2->read_ack_register);
}
static void __ghes_panic(struct ghes *ghes)
{
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
/* reboot to log the error! */
if (!panic_timeout)
panic_timeout = ghes_panic_timeout;
panic("Fatal hardware error!");
}
static int ghes_proc(struct ghes *ghes)
{
int rc;
......@@ -656,11 +732,26 @@ static int ghes_proc(struct ghes *ghes)
rc = ghes_read_estatus(ghes, 0);
if (rc)
goto out;
if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
__ghes_panic(ghes);
}
if (!ghes_estatus_cached(ghes->estatus)) {
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
}
ghes_do_proc(ghes, ghes->estatus);
/*
* GHESv2 type HEST entries introduce support for error acknowledgment,
* so only acknowledge the error if this support is present.
*/
if (is_hest_type_generic_v2(ghes)) {
rc = ghes_ack_error(ghes->generic_v2);
if (rc)
return rc;
}
out:
ghes_clear_estatus(ghes);
return rc;
......@@ -722,6 +813,55 @@ static struct notifier_block ghes_notifier_hed = {
.notifier_call = ghes_notify_hed,
};
#ifdef CONFIG_ACPI_APEI_SEA
static LIST_HEAD(ghes_sea);
/*
* Return 0 only if one of the SEA error sources successfully reported an error
* record sent from the firmware.
*/
int ghes_notify_sea(void)
{
struct ghes *ghes;
int ret = -ENOENT;
rcu_read_lock();
list_for_each_entry_rcu(ghes, &ghes_sea, list) {
if (!ghes_proc(ghes))
ret = 0;
}
rcu_read_unlock();
return ret;
}
static void ghes_sea_add(struct ghes *ghes)
{
mutex_lock(&ghes_list_mutex);
list_add_rcu(&ghes->list, &ghes_sea);
mutex_unlock(&ghes_list_mutex);
}
static void ghes_sea_remove(struct ghes *ghes)
{
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
mutex_unlock(&ghes_list_mutex);
synchronize_rcu();
}
#else /* CONFIG_ACPI_APEI_SEA */
static inline void ghes_sea_add(struct ghes *ghes)
{
pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
ghes->generic->header.source_id);
}
static inline void ghes_sea_remove(struct ghes *ghes)
{
pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
ghes->generic->header.source_id);
}
#endif /* CONFIG_ACPI_APEI_SEA */
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
/*
* printk is not safe in NMI context. So in NMI handler, we allocate
......@@ -742,8 +882,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
static LIST_HEAD(ghes_nmi);
static int ghes_panic_timeout __read_mostly = 30;
static void ghes_proc_in_irq(struct irq_work *irq_work)
{
struct llist_node *llnode, *next;
......@@ -829,18 +967,6 @@ static void __process_error(struct ghes *ghes)
#endif
}
static void __ghes_panic(struct ghes *ghes)
{
oops_begin();
ghes_print_queued_estatus();
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
/* reboot to log the error! */
if (panic_timeout == 0)
panic_timeout = ghes_panic_timeout;
panic("Fatal hardware error!");
}
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes;
......@@ -858,8 +984,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
}
sev = ghes_severity(ghes->estatus->error_severity);
if (sev >= GHES_SEV_PANIC)
if (sev >= GHES_SEV_PANIC) {
oops_begin();
ghes_print_queued_estatus();
__ghes_panic(ghes);
}
if (!(ghes->flags & GHES_TO_CLEAR))
continue;
......@@ -970,6 +1099,14 @@ static int ghes_probe(struct platform_device *ghes_dev)
case ACPI_HEST_NOTIFY_GPIO:
break;
case ACPI_HEST_NOTIFY_SEA:
if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
generic->header.source_id);
rc = -ENOTSUPP;
goto err;
}
break;
case ACPI_HEST_NOTIFY_NMI:
if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
......@@ -1038,6 +1175,9 @@ static int ghes_probe(struct platform_device *ghes_dev)
mutex_unlock(&ghes_list_mutex);
break;
case ACPI_HEST_NOTIFY_SEA:
ghes_sea_add(ghes);
break;
case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_add(ghes);
break;
......@@ -1046,6 +1186,9 @@ static int ghes_probe(struct platform_device *ghes_dev)
}
platform_set_drvdata(ghes_dev, ghes);
/* Handle any pending errors right away */
ghes_proc(ghes);
return 0;
err_edac_unreg:
ghes_edac_unregister(ghes);
......@@ -1085,6 +1228,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
synchronize_rcu();
break;
case ACPI_HEST_NOTIFY_SEA:
ghes_sea_remove(ghes);
break;
case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_remove(ghes);
break;
......
......@@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
[ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
};
static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
......@@ -141,7 +142,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void
{
int *count = data;
if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
(*count)++;
return 0;
}
......@@ -152,7 +154,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
struct ghes_arr *ghes_arr = data;
int rc, i;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
return 0;
if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
......
......@@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
return NULL;
}
static acpi_status
iort_match_type_callback(struct acpi_iort_node *node, void *context)
{
return AE_OK;
}
bool iort_node_match(u8 type)
{
struct acpi_iort_node *node;
node = iort_scan_node(type, iort_match_type_callback, NULL);
return node != NULL;
}
static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
void *context)
{
......
......@@ -17,6 +17,8 @@ config DEVMEM
config DEVKMEM
bool "/dev/kmem virtual device support"
# On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write
depends on !ARM64
help
Say Y here if you want to support the /dev/kmem device. The
/dev/kmem device is rarely used, but can be used for certain
......
......@@ -32,6 +32,10 @@
#include <linux/acpi.h>
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/printk.h>
#include <linux/bcd.h>
#include <acpi/ghes.h>
#include <ras/ras_event.h>
#define INDENT_SP " "
......@@ -107,12 +111,15 @@ void cper_print_bits(const char *pfx, unsigned int bits,
static const char * const proc_type_strs[] = {
"IA32/X64",
"IA64",
"ARM",
};
static const char * const proc_isa_strs[] = {
"IA32",
"IA64",
"X64",
"ARM A32/T32",
"ARM A64",
};
static const char * const proc_error_type_strs[] = {
......@@ -181,6 +188,122 @@ static void cper_print_proc_generic(const char *pfx,
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
}
#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
static const char * const arm_reg_ctx_strs[] = {
"AArch32 general purpose registers",
"AArch32 EL1 context registers",
"AArch32 EL2 context registers",
"AArch32 secure context registers",
"AArch64 general purpose registers",
"AArch64 EL1 context registers",
"AArch64 EL2 context registers",
"AArch64 EL3 context registers",
"Misc. system register structure",
};
static void cper_print_proc_arm(const char *pfx,
const struct cper_sec_proc_arm *proc)
{
int i, len, max_ctx_type;
struct cper_arm_err_info *err_info;
struct cper_arm_ctx_info *ctx_info;
char newpfx[64];
printk("%sMIDR: 0x%016llx\n", pfx, proc->midr);
len = proc->section_length - (sizeof(*proc) +
proc->err_info_num * (sizeof(*err_info)));
if (len < 0) {
printk("%ssection length: %d\n", pfx, proc->section_length);
printk("%ssection length is too small\n", pfx);
printk("%sfirmware-generated error record is incorrect\n", pfx);
printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num);
return;
}
if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n",
pfx, proc->mpidr);
if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
printk("%serror affinity level: %d\n", pfx,
proc->affinity_level);
if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
printk("%srunning state: 0x%x\n", pfx, proc->running_state);
printk("%sPower State Coordination Interface state: %d\n",
pfx, proc->psci_state);
}
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
err_info = (struct cper_arm_err_info *)(proc + 1);
for (i = 0; i < proc->err_info_num; i++) {
printk("%sError info structure %d:\n", pfx, i);
printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1);
if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) {
if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST)
printk("%sfirst error captured\n", newpfx);
if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST)
printk("%slast error captured\n", newpfx);
if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED)
printk("%spropagated error captured\n",
newpfx);
if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW)
printk("%soverflow occurred, error info is incomplete\n",
newpfx);
}
printk("%serror_type: %d, %s\n", newpfx, err_info->type,
err_info->type < ARRAY_SIZE(proc_error_type_strs) ?
proc_error_type_strs[err_info->type] : "unknown");
if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO)
printk("%serror_info: 0x%016llx\n", newpfx,
err_info->error_info);
if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR)
printk("%svirtual fault address: 0x%016llx\n",
newpfx, err_info->virt_fault_addr);
if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR)
printk("%sphysical fault address: 0x%016llx\n",
newpfx, err_info->physical_fault_addr);
err_info += 1;
}
ctx_info = (struct cper_arm_ctx_info *)err_info;
max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1;
for (i = 0; i < proc->context_info_num; i++) {
int size = sizeof(*ctx_info) + ctx_info->size;
printk("%sContext info structure %d:\n", pfx, i);
if (len < size) {
printk("%ssection length is too small\n", newpfx);
printk("%sfirmware-generated error record is incorrect\n", pfx);
return;
}
if (ctx_info->type > max_ctx_type) {
printk("%sInvalid context type: %d (max: %d)\n",
newpfx, ctx_info->type, max_ctx_type);
return;
}
printk("%sregister context type: %s\n", newpfx,
arm_reg_ctx_strs[ctx_info->type]);
print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4,
(ctx_info + 1), ctx_info->size, 0);
len -= size;
ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size);
}
if (len > 0) {
printk("%sVendor specific error info has %u bytes:\n", pfx,
len);
print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info,
len, true);
}
}
#endif
static const char * const mem_err_type_strs[] = {
"unknown",
"no error",
......@@ -386,13 +509,38 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
}
static void cper_estatus_print_section(
const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
static void cper_print_tstamp(const char *pfx,
struct acpi_hest_generic_data_v300 *gdata)
{
__u8 hour, min, sec, day, mon, year, century, *timestamp;
if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
timestamp = (__u8 *)&(gdata->time_stamp);
sec = bcd2bin(timestamp[0]);
min = bcd2bin(timestamp[1]);
hour = bcd2bin(timestamp[2]);
day = bcd2bin(timestamp[4]);
mon = bcd2bin(timestamp[5]);
year = bcd2bin(timestamp[6]);
century = bcd2bin(timestamp[7]);
printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
(timestamp[3] & 0x1 ? "precise " : "imprecise "),
century, year, mon, day, hour, min, sec);
}
}
static void
cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
int sec_no)
{
uuid_le *sec_type = (uuid_le *)gdata->section_type;
__u16 severity;
char newpfx[64];
if (acpi_hest_get_version(gdata) >= 3)
cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
severity = gdata->error_severity;
printk("%s""Error %d, type: %s\n", pfx, sec_no,
cper_severity_str(severity));
......@@ -403,14 +551,16 @@ static void cper_estatus_print_section(
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
printk("%s""section_type: general processor error\n", newpfx);
if (gdata->error_data_length >= sizeof(*proc_err))
cper_print_proc_generic(newpfx, proc_err);
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
printk("%s""section_type: memory error\n", newpfx);
if (gdata->error_data_length >=
sizeof(struct cper_sec_mem_err_old))
......@@ -419,14 +569,32 @@ static void cper_estatus_print_section(
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie = (void *)(gdata + 1);
struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
printk("%s""section_type: PCIe error\n", newpfx);
if (gdata->error_data_length >= sizeof(*pcie))
cper_print_pcie(newpfx, pcie, gdata);
else
goto err_section_too_small;
} else
printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
printk("%ssection_type: ARM processor error\n", newpfx);
if (gdata->error_data_length >= sizeof(*arm_err))
cper_print_proc_arm(newpfx, arm_err);
else
goto err_section_too_small;
#endif
} else {
const void *err = acpi_hest_get_payload(gdata);
printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
printk("%ssection length: %#x\n", newpfx,
gdata->error_data_length);
print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
gdata->error_data_length, true);
}
return;
......@@ -438,7 +606,7 @@ void cper_estatus_print(const char *pfx,
const struct acpi_hest_generic_status *estatus)
{
struct acpi_hest_generic_data *gdata;
unsigned int data_len, gedata_len;
unsigned int data_len;
int sec_no = 0;
char newpfx[64];
__u16 severity;
......@@ -452,11 +620,11 @@ void cper_estatus_print(const char *pfx,
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
while (data_len >= sizeof(*gdata)) {
gedata_len = gdata->error_data_length;
while (data_len >= acpi_hest_get_size(gdata)) {
cper_estatus_print_section(newpfx, gdata, sec_no);
data_len -= gedata_len + sizeof(*gdata);
gdata = (void *)(gdata + 1) + gedata_len;
data_len -= acpi_hest_get_record_size(gdata);
gdata = acpi_hest_get_next(gdata);
sec_no++;
}
}
......@@ -486,12 +654,14 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
return rc;
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
while (data_len >= sizeof(*gdata)) {
gedata_len = gdata->error_data_length;
if (gedata_len > data_len - sizeof(*gdata))
while (data_len >= acpi_hest_get_size(gdata)) {
gedata_len = acpi_hest_get_error_length(gdata);
if (gedata_len > data_len - acpi_hest_get_size(gdata))
return -EINVAL;
data_len -= gedata_len + sizeof(*gdata);
gdata = (void *)(gdata + 1) + gedata_len;
data_len -= acpi_hest_get_record_size(gdata);
gdata = acpi_hest_get_next(gdata);
}
if (data_len)
return -EINVAL;
......
......@@ -39,7 +39,6 @@ config ARM_GIC_V3_ITS
bool
depends on PCI
depends on PCI_MSI
select ACPI_IORT if ACPI
config ARM_NVIC
bool
......
......@@ -3,9 +3,10 @@
#
menu "Performance monitor support"
depends on PERF_EVENTS
config ARM_PMU
depends on PERF_EVENTS && (ARM || ARM64)
depends on ARM || ARM64
bool "ARM PMU framework"
default y
help
......@@ -18,7 +19,7 @@ config ARM_PMU_ACPI
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
depends on ARCH_QCOM && ARM64 && ACPI
help
Provides support for the L2 cache performance monitor unit (PMU)
in Qualcomm Technologies processors.
......@@ -27,7 +28,7 @@ config QCOM_L2_PMU
config QCOM_L3_PMU
bool "Qualcomm Technologies L3-cache PMU"
depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
depends on ARCH_QCOM && ARM64 && ACPI
select QCOM_IRQ_COMBINER
help
Provides support for the L3 cache performance monitor unit (PMU)
......@@ -36,7 +37,7 @@ config QCOM_L3_PMU
monitoring L3 cache events.
config XGENE_PMU
depends on PERF_EVENTS && ARCH_XGENE
depends on ARCH_XGENE
bool "APM X-Gene SoC PMU"
default n
help
......
......@@ -37,6 +37,8 @@
#define CSW_CSWCR 0x0000
#define CSW_CSWCR_DUALMCB_MASK BIT(0)
#define CSW_CSWCR_MCB0_ROUTING(x) (((x) & 0x0C) >> 2)
#define CSW_CSWCR_MCB1_ROUTING(x) (((x) & 0x30) >> 4)
#define MCBADDRMR 0x0000
#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
......@@ -50,8 +52,17 @@
#define PCPPMU_INT_L3C BIT(2)
#define PCPPMU_INT_IOB BIT(3)
#define PCPPMU_V3_INTMASK 0x00FF33FF
#define PCPPMU_V3_INTENMASK 0xFFFFFFFF
#define PCPPMU_V3_INTCLRMASK 0xFF00CC00
#define PCPPMU_V3_INT_MCU 0x000000FF
#define PCPPMU_V3_INT_MCB 0x00000300
#define PCPPMU_V3_INT_L3C 0x00FF0000
#define PCPPMU_V3_INT_IOB 0x00003000
#define PMU_MAX_COUNTERS 4
#define PMU_CNT_MAX_PERIOD 0x100000000ULL
#define PMU_CNT_MAX_PERIOD 0xFFFFFFFFULL
#define PMU_V3_CNT_MAX_PERIOD 0xFFFFFFFFFFFFFFFFULL
#define PMU_OVERFLOW_MASK 0xF
#define PMU_PMCR_E BIT(0)
#define PMU_PMCR_P BIT(1)
......@@ -73,6 +84,10 @@
#define PMU_PMOVSR 0xC80
#define PMU_PMCR 0xE04
/* PMU registers for V3 */
#define PMU_PMOVSCLR 0xC80
#define PMU_PMOVSSET 0xCC0
#define to_pmu_dev(p) container_of(p, struct xgene_pmu_dev, pmu)
#define GET_CNTR(ev) (ev->hw.idx)
#define GET_EVENTID(ev) (ev->hw.config & 0xFFULL)
......@@ -96,14 +111,33 @@ struct xgene_pmu_dev {
struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
};
struct xgene_pmu_ops {
void (*mask_int)(struct xgene_pmu *pmu);
void (*unmask_int)(struct xgene_pmu *pmu);
u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
};
struct xgene_pmu {
struct device *dev;
int version;
void __iomem *pcppmu_csr;
u32 mcb_active_mask;
u32 mc_active_mask;
u32 l3c_active_mask;
cpumask_t cpu;
raw_spinlock_t lock;
const struct xgene_pmu_ops *ops;
struct list_head l3cpmus;
struct list_head iobpmus;
struct list_head mcbpmus;
......@@ -125,11 +159,13 @@ struct xgene_pmu_data {
enum xgene_pmu_version {
PCP_PMU_V1 = 1,
PCP_PMU_V2,
PCP_PMU_V3,
};
enum xgene_pmu_dev_type {
PMU_TYPE_L3C = 0,
PMU_TYPE_IOB,
PMU_TYPE_IOB_SLOW,
PMU_TYPE_MCB,
PMU_TYPE_MC,
};
......@@ -195,6 +231,56 @@ static const struct attribute_group mc_pmu_format_attr_group = {
.attrs = mc_pmu_format_attrs,
};
static struct attribute *l3c_pmu_v3_format_attrs[] = {
XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
NULL,
};
static struct attribute *iob_pmu_v3_format_attrs[] = {
XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
NULL,
};
static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
NULL,
};
static struct attribute *mcb_pmu_v3_format_attrs[] = {
XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
NULL,
};
static struct attribute *mc_pmu_v3_format_attrs[] = {
XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
NULL,
};
static const struct attribute_group l3c_pmu_v3_format_attr_group = {
.name = "format",
.attrs = l3c_pmu_v3_format_attrs,
};
static const struct attribute_group iob_pmu_v3_format_attr_group = {
.name = "format",
.attrs = iob_pmu_v3_format_attrs,
};
static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
.name = "format",
.attrs = iob_slow_pmu_v3_format_attrs,
};
static const struct attribute_group mcb_pmu_v3_format_attr_group = {
.name = "format",
.attrs = mcb_pmu_v3_format_attrs,
};
static const struct attribute_group mc_pmu_v3_format_attr_group = {
.name = "format",
.attrs = mc_pmu_v3_format_attrs,
};
/*
* sysfs event attributes
*/
......@@ -311,6 +397,219 @@ static const struct attribute_group mc_pmu_events_attr_group = {
.attrs = mc_pmu_events_attrs,
};
static struct attribute *l3c_pmu_v3_events_attrs[] = {
XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
XGENE_PMU_EVENT_ATTR(read-hit, 0x01),
XGENE_PMU_EVENT_ATTR(read-miss, 0x02),
XGENE_PMU_EVENT_ATTR(index-flush-eviction, 0x03),
XGENE_PMU_EVENT_ATTR(write-caused-replacement, 0x04),
XGENE_PMU_EVENT_ATTR(write-not-caused-replacement, 0x05),
XGENE_PMU_EVENT_ATTR(clean-eviction, 0x06),
XGENE_PMU_EVENT_ATTR(dirty-eviction, 0x07),
XGENE_PMU_EVENT_ATTR(read, 0x08),
XGENE_PMU_EVENT_ATTR(write, 0x09),
XGENE_PMU_EVENT_ATTR(request, 0x0a),
XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall, 0x0b),
XGENE_PMU_EVENT_ATTR(tq-full, 0x0c),
XGENE_PMU_EVENT_ATTR(ackq-full, 0x0d),
XGENE_PMU_EVENT_ATTR(wdb-full, 0x0e),
XGENE_PMU_EVENT_ATTR(odb-full, 0x10),
XGENE_PMU_EVENT_ATTR(wbq-full, 0x11),
XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall, 0x12),
XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall, 0x13),
XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall, 0x14),
XGENE_PMU_EVENT_ATTR(total-insertion, 0x15),
XGENE_PMU_EVENT_ATTR(sip-insertions-r-set, 0x16),
XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear, 0x17),
XGENE_PMU_EVENT_ATTR(dip-insertions-r-set, 0x18),
XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear, 0x19),
XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set, 0x1a),
XGENE_PMU_EVENT_ATTR(egression, 0x1b),
XGENE_PMU_EVENT_ATTR(replacement, 0x1c),
XGENE_PMU_EVENT_ATTR(old-replacement, 0x1d),
XGENE_PMU_EVENT_ATTR(young-replacement, 0x1e),
XGENE_PMU_EVENT_ATTR(r-set-replacement, 0x1f),
XGENE_PMU_EVENT_ATTR(r-clear-replacement, 0x20),
XGENE_PMU_EVENT_ATTR(old-r-replacement, 0x21),
XGENE_PMU_EVENT_ATTR(old-nr-replacement, 0x22),
XGENE_PMU_EVENT_ATTR(young-r-replacement, 0x23),
XGENE_PMU_EVENT_ATTR(young-nr-replacement, 0x24),
XGENE_PMU_EVENT_ATTR(bloomfilter-clearing, 0x25),
XGENE_PMU_EVENT_ATTR(generation-flip, 0x26),
XGENE_PMU_EVENT_ATTR(vcc-droop-detected, 0x27),
NULL,
};
static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all, 0x01),
XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd, 0x02),
XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr, 0x03),
XGENE_PMU_EVENT_ATTR(pa-all-cp-req, 0x04),
XGENE_PMU_EVENT_ATTR(pa-cp-blk-req, 0x05),
XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req, 0x06),
XGENE_PMU_EVENT_ATTR(pa-cp-rd-req, 0x07),
XGENE_PMU_EVENT_ATTR(pa-cp-wr-req, 0x08),
XGENE_PMU_EVENT_ATTR(ba-all-req, 0x09),
XGENE_PMU_EVENT_ATTR(ba-rd-req, 0x0a),
XGENE_PMU_EVENT_ATTR(ba-wr-req, 0x0b),
XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued, 0x10),
XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued, 0x11),
XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12),
XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13),
XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable, 0x14),
XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15),
XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req, 0x16),
XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req, 0x17),
XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data, 0x18),
XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS, 0x1b),
XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence, 0x1c),
XGENE_PMU_EVENT_ATTR(pa-barrier-cycles, 0x1d),
XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops, 0x20),
XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop, 0x21),
XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit, 0x22),
XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop, 0x23),
XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit, 0x24),
XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop, 0x25),
XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit, 0x26),
XGENE_PMU_EVENT_ATTR(pa-req-buffer-full, 0x28),
XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full, 0x29),
XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a),
XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full, 0x2b),
XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c),
XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full, 0x2d),
XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e),
XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure, 0x2f),
NULL,
};
static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req, 0x01),
XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req, 0x02),
XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req, 0x03),
XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req, 0x04),
XGENE_PMU_EVENT_ATTR(ba-all-axi-req, 0x07),
XGENE_PMU_EVENT_ATTR(ba-axi-rd-req, 0x08),
XGENE_PMU_EVENT_ATTR(ba-axi-wr-req, 0x09),
XGENE_PMU_EVENT_ATTR(ba-free-list-empty, 0x10),
NULL,
};
static struct attribute *mcb_pmu_v3_events_attrs[] = {
XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
XGENE_PMU_EVENT_ATTR(req-receive, 0x01),
XGENE_PMU_EVENT_ATTR(rd-req-recv, 0x02),
XGENE_PMU_EVENT_ATTR(rd-req-recv-2, 0x03),
XGENE_PMU_EVENT_ATTR(wr-req-recv, 0x04),
XGENE_PMU_EVENT_ATTR(wr-req-recv-2, 0x05),
XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu, 0x06),
XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2, 0x07),
XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu, 0x08),
XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2, 0x09),
XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req, 0x0d),
XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2, 0x0e),
XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu, 0x0f),
XGENE_PMU_EVENT_ATTR(gack-recv, 0x10),
XGENE_PMU_EVENT_ATTR(rd-gack-recv, 0x11),
XGENE_PMU_EVENT_ATTR(wr-gack-recv, 0x12),
XGENE_PMU_EVENT_ATTR(cancel-rd-gack, 0x13),
XGENE_PMU_EVENT_ATTR(cancel-wr-gack, 0x14),
XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall, 0x15),
XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked, 0x16),
XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall, 0x17),
XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked, 0x18),
XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked, 0x19),
XGENE_PMU_EVENT_ATTR(mcu-req-table-full, 0x1a),
XGENE_PMU_EVENT_ATTR(mcu-stat-table-full, 0x1b),
XGENE_PMU_EVENT_ATTR(mcu-wr-table-full, 0x1c),
XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp, 0x1d),
XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp, 0x1e),
XGENE_PMU_EVENT_ATTR(mcu-retryack-resp, 0x1f),
XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp, 0x20),
XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload, 0x21),
XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass, 0x22),
XGENE_PMU_EVENT_ATTR(volt-droop-detect, 0x23),
NULL,
};
static struct attribute *mc_pmu_v3_events_attrs[] = {
XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
XGENE_PMU_EVENT_ATTR(act-sent, 0x01),
XGENE_PMU_EVENT_ATTR(pre-sent, 0x02),
XGENE_PMU_EVENT_ATTR(rd-sent, 0x03),
XGENE_PMU_EVENT_ATTR(rda-sent, 0x04),
XGENE_PMU_EVENT_ATTR(wr-sent, 0x05),
XGENE_PMU_EVENT_ATTR(wra-sent, 0x06),
XGENE_PMU_EVENT_ATTR(pd-entry-vld, 0x07),
XGENE_PMU_EVENT_ATTR(sref-entry-vld, 0x08),
XGENE_PMU_EVENT_ATTR(prea-sent, 0x09),
XGENE_PMU_EVENT_ATTR(ref-sent, 0x0a),
XGENE_PMU_EVENT_ATTR(rd-rda-sent, 0x0b),
XGENE_PMU_EVENT_ATTR(wr-wra-sent, 0x0c),
XGENE_PMU_EVENT_ATTR(raw-hazard, 0x0d),
XGENE_PMU_EVENT_ATTR(war-hazard, 0x0e),
XGENE_PMU_EVENT_ATTR(waw-hazard, 0x0f),
XGENE_PMU_EVENT_ATTR(rar-hazard, 0x10),
XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard, 0x11),
XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld, 0x12),
XGENE_PMU_EVENT_ATTR(lprd-req-vld, 0x13),
XGENE_PMU_EVENT_ATTR(hprd-req-vld, 0x14),
XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld, 0x15),
XGENE_PMU_EVENT_ATTR(wr-req-vld, 0x16),
XGENE_PMU_EVENT_ATTR(partial-wr-req-vld, 0x17),
XGENE_PMU_EVENT_ATTR(rd-retry, 0x18),
XGENE_PMU_EVENT_ATTR(wr-retry, 0x19),
XGENE_PMU_EVENT_ATTR(retry-gnt, 0x1a),
XGENE_PMU_EVENT_ATTR(rank-change, 0x1b),
XGENE_PMU_EVENT_ATTR(dir-change, 0x1c),
XGENE_PMU_EVENT_ATTR(rank-dir-change, 0x1d),
XGENE_PMU_EVENT_ATTR(rank-active, 0x1e),
XGENE_PMU_EVENT_ATTR(rank-idle, 0x1f),
XGENE_PMU_EVENT_ATTR(rank-pd, 0x20),
XGENE_PMU_EVENT_ATTR(rank-sref, 0x21),
XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh, 0x22),
XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh, 0x23),
XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh, 0x24),
XGENE_PMU_EVENT_ATTR(phy-updt-complt, 0x25),
XGENE_PMU_EVENT_ATTR(tz-fail, 0x26),
XGENE_PMU_EVENT_ATTR(dram-errc, 0x27),
XGENE_PMU_EVENT_ATTR(dram-errd, 0x28),
XGENE_PMU_EVENT_ATTR(rd-enq, 0x29),
XGENE_PMU_EVENT_ATTR(wr-enq, 0x2a),
XGENE_PMU_EVENT_ATTR(tmac-limit-reached, 0x2b),
XGENE_PMU_EVENT_ATTR(tmaw-tracker-full, 0x2c),
NULL,
};
static const struct attribute_group l3c_pmu_v3_events_attr_group = {
.name = "events",
.attrs = l3c_pmu_v3_events_attrs,
};
static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
.name = "events",
.attrs = iob_fast_pmu_v3_events_attrs,
};
static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
.name = "events",
.attrs = iob_slow_pmu_v3_events_attrs,
};
static const struct attribute_group mcb_pmu_v3_events_attr_group = {
.name = "events",
.attrs = mcb_pmu_v3_events_attrs,
};
static const struct attribute_group mc_pmu_v3_events_attr_group = {
.name = "events",
.attrs = mc_pmu_v3_events_attrs,
};
/*
* sysfs cpumask attributes
*/
......@@ -334,7 +633,7 @@ static const struct attribute_group pmu_cpumask_attr_group = {
};
/*
* Per PMU device attribute groups
* Per PMU device attribute groups of PMU v1 and v2
*/
static const struct attribute_group *l3c_pmu_attr_groups[] = {
&l3c_pmu_format_attr_group,
......@@ -364,6 +663,44 @@ static const struct attribute_group *mc_pmu_attr_groups[] = {
NULL
};
/*
* Per PMU device attribute groups of PMU v3
*/
static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
&l3c_pmu_v3_format_attr_group,
&pmu_cpumask_attr_group,
&l3c_pmu_v3_events_attr_group,
NULL
};
static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
&iob_pmu_v3_format_attr_group,
&pmu_cpumask_attr_group,
&iob_fast_pmu_v3_events_attr_group,
NULL
};
static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
&iob_slow_pmu_v3_format_attr_group,
&pmu_cpumask_attr_group,
&iob_slow_pmu_v3_events_attr_group,
NULL
};
static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
&mcb_pmu_v3_format_attr_group,
&pmu_cpumask_attr_group,
&mcb_pmu_v3_events_attr_group,
NULL
};
static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
&mc_pmu_v3_format_attr_group,
&pmu_cpumask_attr_group,
&mc_pmu_v3_events_attr_group,
NULL
};
static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
{
int cntr;
......@@ -387,22 +724,66 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
{
writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
{
writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx)
static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
{
writel(PCPPMU_V3_INTCLRMASK,
xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
int idx)
{
return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}
static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
int idx)
{
u32 lo, hi;
/*
* v3 has 64-bit counter registers composed by 2 32-bit registers
* This can be a problem if the counter increases and carries
* out of bit [31] between 2 reads. The extra reads would help
* to prevent this issue.
*/
do {
hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
} while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
return (((u64)hi << 32) | lo);
}
static inline void
xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}
static inline void
xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
{
u32 cnt_lo, cnt_hi;
cnt_hi = upper_32_bits(val);
cnt_lo = lower_32_bits(val);
/* v3 has 64-bit counter registers composed by 2 32-bit registers */
xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo);
xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi);
}
static inline void
xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
{
......@@ -415,12 +796,18 @@ xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val)
writel(val, pmu_dev->inf->csr + PMU_PMAMR0);
}
static inline void
xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
static inline void
xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
}
static inline void
xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
static inline void
xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
{
......@@ -491,20 +878,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
static void xgene_perf_pmu_enable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
pmu_dev->max_counters);
if (!enabled)
return;
xgene_pmu_start_counters(pmu_dev);
xgene_pmu->ops->start_counters(pmu_dev);
}
static void xgene_perf_pmu_disable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
xgene_pmu_stop_counters(pmu_dev);
xgene_pmu->ops->stop_counters(pmu_dev);
}
static int xgene_perf_event_init(struct perf_event *event)
......@@ -572,49 +961,56 @@ static int xgene_perf_event_init(struct perf_event *event)
static void xgene_perf_enable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event));
xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
GET_EVENTID(event));
xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
if (pmu_dev->inf->type == PMU_TYPE_IOB)
xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event)));
xgene_pmu->ops->write_agent1msk(pmu_dev,
~((u32)GET_AGENT1ID(event)));
xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event));
xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event));
xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
}
static void xgene_perf_disable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event));
xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event));
xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
}
static void xgene_perf_event_set_period(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
/*
* The X-Gene PMU counters have a period of 2^32. To account for the
* possiblity of extreme interrupt latency we program for a period of
* half that. Hopefully we can handle the interrupt before another 2^31
* For 32 bit counter, it has a period of 2^32. To account for the
* possibility of extreme interrupt latency we program for a period of
* half that. Hopefully, we can handle the interrupt before another 2^31
* events occur and the counter overtakes its previous value.
* For 64 bit counter, we don't expect it overflow.
*/
u64 val = 1ULL << 31;
local64_set(&hw->prev_count, val);
xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val);
xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
}
static void xgene_perf_event_update(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
u64 delta, prev_raw_count, new_raw_count;
again:
prev_raw_count = local64_read(&hw->prev_count);
new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event));
new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));
if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
......@@ -633,6 +1029,7 @@ static void xgene_perf_read(struct perf_event *event)
static void xgene_perf_start(struct perf_event *event, int flags)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
......@@ -646,8 +1043,8 @@ static void xgene_perf_start(struct perf_event *event, int flags)
if (flags & PERF_EF_RELOAD) {
u64 prev_raw_count = local64_read(&hw->prev_count);
xgene_pmu_write_counter(pmu_dev, GET_CNTR(event),
(u32) prev_raw_count);
xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
prev_raw_count);
}
xgene_perf_enable_event(event);
......@@ -713,7 +1110,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
{
struct xgene_pmu *xgene_pmu;
pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1;
if (pmu_dev->parent->version == PCP_PMU_V3)
pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
else
pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
/* First version PMU supports only single event counter */
xgene_pmu = pmu_dev->parent;
if (xgene_pmu->version == PCP_PMU_V1)
......@@ -736,8 +1136,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
};
/* Hardware counter init */
xgene_pmu_stop_counters(pmu_dev);
xgene_pmu_reset_counters(pmu_dev);
xgene_pmu->ops->stop_counters(pmu_dev);
xgene_pmu->ops->reset_counters(pmu_dev);
return perf_pmu_register(&pmu_dev->pmu, name, -1);
}
......@@ -758,20 +1158,38 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
switch (pmu->inf->type) {
case PMU_TYPE_L3C:
pmu->attr_groups = l3c_pmu_attr_groups;
if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
goto dev_err;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = l3c_pmu_v3_attr_groups;
else
pmu->attr_groups = l3c_pmu_attr_groups;
break;
case PMU_TYPE_IOB:
pmu->attr_groups = iob_pmu_attr_groups;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
else
pmu->attr_groups = iob_pmu_attr_groups;
break;
case PMU_TYPE_IOB_SLOW:
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
break;
case PMU_TYPE_MCB:
if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
goto dev_err;
pmu->attr_groups = mcb_pmu_attr_groups;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = mcb_pmu_v3_attr_groups;
else
pmu->attr_groups = mcb_pmu_attr_groups;
break;
case PMU_TYPE_MC:
if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
goto dev_err;
pmu->attr_groups = mc_pmu_attr_groups;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = mc_pmu_v3_attr_groups;
else
pmu->attr_groups = mc_pmu_attr_groups;
break;
default:
return -EINVAL;
......@@ -795,18 +1213,27 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
{
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
void __iomem *csr = pmu_dev->inf->csr;
u32 pmovsr;
int idx;
pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
xgene_pmu->ops->stop_counters(pmu_dev);
if (xgene_pmu->version == PCP_PMU_V3)
pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
else
pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
if (!pmovsr)
return;
goto out;
/* Clear interrupt flag */
if (xgene_pmu->version == PCP_PMU_V1)
writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR);
writel(0x0, csr + PMU_PMOVSR);
else if (xgene_pmu->version == PCP_PMU_V2)
writel(pmovsr, csr + PMU_PMOVSR);
else
writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR);
writel(pmovsr, csr + PMU_PMOVSCLR);
for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
struct perf_event *event = pmu_dev->pmu_counter_event[idx];
......@@ -818,10 +1245,14 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
xgene_perf_event_update(event);
xgene_perf_event_set_period(event);
}
out:
xgene_pmu->ops->start_counters(pmu_dev);
}
static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
{
u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
struct xgene_pmu_dev_ctx *ctx;
struct xgene_pmu *xgene_pmu = dev_id;
unsigned long flags;
......@@ -831,22 +1262,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
/* Get Interrupt PMU source */
val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
if (val & PCPPMU_INT_MCU) {
if (xgene_pmu->version == PCP_PMU_V3) {
intr_mcu = PCPPMU_V3_INT_MCU;
intr_mcb = PCPPMU_V3_INT_MCB;
intr_l3c = PCPPMU_V3_INT_L3C;
intr_iob = PCPPMU_V3_INT_IOB;
} else {
intr_mcu = PCPPMU_INT_MCU;
intr_mcb = PCPPMU_INT_MCB;
intr_l3c = PCPPMU_INT_L3C;
intr_iob = PCPPMU_INT_IOB;
}
if (val & intr_mcu) {
list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
if (val & PCPPMU_INT_MCB) {
if (val & intr_mcb) {
list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
if (val & PCPPMU_INT_L3C) {
if (val & intr_l3c) {
list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
if (val & PCPPMU_INT_IOB) {
if (val & intr_iob) {
list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
......@@ -857,8 +1299,8 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
struct resource *res;
......@@ -885,6 +1327,8 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(mcbb_csr);
}
xgene_pmu->l3c_active_mask = 0x1;
reg = readl(csw_csr + CSW_CSWCR);
if (reg & CSW_CSWCR_DUALMCB_MASK) {
/* Dual MCB active */
......@@ -905,8 +1349,56 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return 0;
}
static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr;
struct resource *res;
unsigned int reg;
u32 mcb0routing;
u32 mcb1routing;
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
csw_csr = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(csw_csr)) {
dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
return PTR_ERR(csw_csr);
}
reg = readl(csw_csr + CSW_CSWCR);
mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg);
if (reg & CSW_CSWCR_DUALMCB_MASK) {
/* Dual MCB active */
xgene_pmu->mcb_active_mask = 0x3;
/* Probe all active L3C(s), maximum is 8 */
xgene_pmu->l3c_active_mask = 0xFF;
/* Probe all active MC(s), maximum is 8 */
if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
xgene_pmu->mc_active_mask = 0xFF;
else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
xgene_pmu->mc_active_mask = 0x33;
else
xgene_pmu->mc_active_mask = 0x11;
} else {
/* Single MCB active */
xgene_pmu->mcb_active_mask = 0x1;
/* Probe all active L3C(s), maximum is 4 */
xgene_pmu->l3c_active_mask = 0x0F;
/* Probe all active MC(s), maximum is 4 */
if (mcb0routing == 0x2)
xgene_pmu->mc_active_mask = 0x0F;
else if (mcb0routing == 0x1)
xgene_pmu->mc_active_mask = 0x03;
else
xgene_pmu->mc_active_mask = 0x01;
}
return 0;
}
static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
struct regmap *csw_map, *mcba_map, *mcbb_map;
struct device_node *np = pdev->dev.of_node;
......@@ -930,6 +1422,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(mcbb_map);
}
xgene_pmu->l3c_active_mask = 0x1;
if (regmap_read(csw_map, CSW_CSWCR, &reg))
return -EINVAL;
......@@ -954,12 +1447,18 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return 0;
}
static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
if (has_acpi_companion(&pdev->dev))
return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
if (has_acpi_companion(&pdev->dev)) {
if (xgene_pmu->version == PCP_PMU_V3)
return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu,
pdev);
else
return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu,
pdev);
}
return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
}
static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
......@@ -969,6 +1468,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
case PMU_TYPE_IOB:
return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
case PMU_TYPE_IOB_SLOW:
return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);
case PMU_TYPE_MCB:
return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
case PMU_TYPE_MC:
......@@ -1047,9 +1548,40 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
return NULL;
}
static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
{"APMC0D5D", PMU_TYPE_L3C},
{"APMC0D5E", PMU_TYPE_IOB},
{"APMC0D5F", PMU_TYPE_MCB},
{"APMC0D60", PMU_TYPE_MC},
{"APMC0D84", PMU_TYPE_L3C},
{"APMC0D85", PMU_TYPE_IOB},
{"APMC0D86", PMU_TYPE_IOB_SLOW},
{"APMC0D87", PMU_TYPE_MCB},
{"APMC0D88", PMU_TYPE_MC},
{},
};
static const struct acpi_device_id *xgene_pmu_acpi_match_type(
const struct acpi_device_id *ids,
struct acpi_device *adev)
{
const struct acpi_device_id *match_id = NULL;
const struct acpi_device_id *id;
for (id = ids; id->id[0] || id->cls; id++) {
if (!acpi_match_device_ids(adev, id))
match_id = id;
else if (match_id)
break;
}
return match_id;
}
static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
void *data, void **return_value)
{
const struct acpi_device_id *acpi_id;
struct xgene_pmu *xgene_pmu = data;
struct xgene_pmu_dev_ctx *ctx;
struct acpi_device *adev;
......@@ -1059,17 +1591,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
if (acpi_bus_get_status(adev) || !adev->status.present)
return AE_OK;
if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
else
ctx = NULL;
acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
if (!acpi_id)
return AE_OK;
ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
if (!ctx)
return AE_OK;
......@@ -1086,6 +1612,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
case PMU_TYPE_IOB_SLOW:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
......@@ -1207,6 +1736,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
case PMU_TYPE_IOB_SLOW:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
......@@ -1235,6 +1767,40 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
.id = PCP_PMU_V2,
};
static const struct xgene_pmu_ops xgene_pmu_ops = {
.mask_int = xgene_pmu_mask_int,
.unmask_int = xgene_pmu_unmask_int,
.read_counter = xgene_pmu_read_counter32,
.write_counter = xgene_pmu_write_counter32,
.write_evttype = xgene_pmu_write_evttype,
.write_agentmsk = xgene_pmu_write_agentmsk,
.write_agent1msk = xgene_pmu_write_agent1msk,
.enable_counter = xgene_pmu_enable_counter,
.disable_counter = xgene_pmu_disable_counter,
.enable_counter_int = xgene_pmu_enable_counter_int,
.disable_counter_int = xgene_pmu_disable_counter_int,
.reset_counters = xgene_pmu_reset_counters,
.start_counters = xgene_pmu_start_counters,
.stop_counters = xgene_pmu_stop_counters,
};
static const struct xgene_pmu_ops xgene_pmu_v3_ops = {
.mask_int = xgene_pmu_v3_mask_int,
.unmask_int = xgene_pmu_v3_unmask_int,
.read_counter = xgene_pmu_read_counter64,
.write_counter = xgene_pmu_write_counter64,
.write_evttype = xgene_pmu_write_evttype,
.write_agentmsk = xgene_pmu_v3_write_agentmsk,
.write_agent1msk = xgene_pmu_v3_write_agent1msk,
.enable_counter = xgene_pmu_enable_counter,
.disable_counter = xgene_pmu_disable_counter,
.enable_counter_int = xgene_pmu_enable_counter_int,
.disable_counter_int = xgene_pmu_disable_counter_int,
.reset_counters = xgene_pmu_reset_counters,
.start_counters = xgene_pmu_start_counters,
.stop_counters = xgene_pmu_stop_counters,
};
static const struct of_device_id xgene_pmu_of_match[] = {
{ .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data },
{ .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data },
......@@ -1245,6 +1811,7 @@ MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
{"APMC0D5B", PCP_PMU_V1},
{"APMC0D5C", PCP_PMU_V2},
{"APMC0D83", PCP_PMU_V3},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
......@@ -1284,6 +1851,11 @@ static int xgene_pmu_probe(struct platform_device *pdev)
if (version < 0)
return -ENODEV;
if (version == PCP_PMU_V3)
xgene_pmu->ops = &xgene_pmu_v3_ops;
else
xgene_pmu->ops = &xgene_pmu_ops;
INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
INIT_LIST_HEAD(&xgene_pmu->iobpmus);
INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
......@@ -1317,7 +1889,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
raw_spin_lock_init(&xgene_pmu->lock);
/* Check for active MCBs and MCUs */
rc = xgene_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
if (rc) {
dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
xgene_pmu->mcb_active_mask = 0x1;
......@@ -1342,7 +1914,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
}
/* Enable interrupt */
xgene_pmu_unmask_int(xgene_pmu);
xgene_pmu->ops->unmask_int(xgene_pmu);
return 0;
......
......@@ -7,11 +7,24 @@
#include <linux/init.h>
#include <linux/ras.h>
#include <linux/uuid.h>
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
const char *fru_text, const u8 sev, const u8 *err,
const u32 len)
{
trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
}
void log_arm_hw_error(struct cper_sec_proc_arm *err)
{
trace_arm_event(err);
}
static int __init ras_init(void)
{
int rc = 0;
......@@ -27,7 +40,8 @@ subsys_initcall(ras_init);
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event);
EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event);
static int __init parse_ras_param(char *str)
{
......
......@@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (&m->list == &kclist_head) {
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
if (copy_to_user(buffer, buf, tsz))
......
#ifndef GHES_H
#define GHES_H
#include <acpi/apei.h>
#include <acpi/hed.h>
......@@ -13,7 +16,10 @@
#define GHES_EXITING 0x0002
struct ghes {
struct acpi_hest_generic *generic;
union {
struct acpi_hest_generic *generic;
struct acpi_hest_generic_v2 *generic_v2;
};
struct acpi_hest_generic_status *estatus;
u64 buffer_paddr;
unsigned long flags;
......@@ -70,3 +76,43 @@ static inline void ghes_edac_unregister(struct ghes *ghes)
{
}
#endif
static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
{
return gdata->revision >> 8;
}
static inline void *acpi_hest_get_payload(struct acpi_hest_generic_data *gdata)
{
if (acpi_hest_get_version(gdata) >= 3)
return (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1);
return gdata + 1;
}
static inline int acpi_hest_get_error_length(struct acpi_hest_generic_data *gdata)
{
return ((struct acpi_hest_generic_data *)(gdata))->error_data_length;
}
static inline int acpi_hest_get_size(struct acpi_hest_generic_data *gdata)
{
if (acpi_hest_get_version(gdata) >= 3)
return sizeof(struct acpi_hest_generic_data_v300);
return sizeof(struct acpi_hest_generic_data);
}
static inline int acpi_hest_get_record_size(struct acpi_hest_generic_data *gdata)
{
return (acpi_hest_get_size(gdata) + acpi_hest_get_error_length(gdata));
}
static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata)
{
return (void *)(gdata) + acpi_hest_get_record_size(gdata);
}
int ghes_notify_sea(void);
#endif /* GHES_H */
......@@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id);
struct fwnode_handle *iort_find_domain_token(int trans_id);
#ifdef CONFIG_ACPI_IORT
void acpi_iort_init(void);
bool iort_node_match(u8 type);
u32 iort_msi_map_rid(struct device *dev, u32 req_id);
struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
void acpi_configure_pmsi_domain(struct device *dev);
......@@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev);
const struct iommu_ops *iort_iommu_configure(struct device *dev);
#else
static inline void acpi_iort_init(void) { }
static inline bool iort_node_match(u8 type) { return false; }
static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
{ return req_id; }
static inline struct irq_domain *iort_get_device_domain(struct device *dev,
......
......@@ -180,6 +180,10 @@ enum {
#define CPER_SEC_PROC_IPF \
UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \
0x80, 0xC7, 0x3C, 0x88, 0x81)
/* Processor Specific: ARM */
#define CPER_SEC_PROC_ARM \
UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \
0x1D, 0x5D, 0x46, 0xB0)
/* Platform Memory */
#define CPER_SEC_PLATFORM_MEM \
UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
......@@ -255,6 +259,22 @@ enum {
#define CPER_PCIE_SLOT_SHIFT 3
#define CPER_ARM_VALID_MPIDR BIT(0)
#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1)
#define CPER_ARM_VALID_RUNNING_STATE BIT(2)
#define CPER_ARM_VALID_VENDOR_INFO BIT(3)
#define CPER_ARM_INFO_VALID_MULTI_ERR BIT(0)
#define CPER_ARM_INFO_VALID_FLAGS BIT(1)
#define CPER_ARM_INFO_VALID_ERR_INFO BIT(2)
#define CPER_ARM_INFO_VALID_VIRT_ADDR BIT(3)
#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR BIT(4)
#define CPER_ARM_INFO_FLAGS_FIRST BIT(0)
#define CPER_ARM_INFO_FLAGS_LAST BIT(1)
#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2)
#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3)
/*
* All tables and structs must be byte-packed to match CPER
* specification, since the tables are provided by the system BIOS
......@@ -340,6 +360,40 @@ struct cper_ia_proc_ctx {
__u64 mm_reg_addr;
};
/* ARM Processor Error Section */
struct cper_sec_proc_arm {
__u32 validation_bits;
__u16 err_info_num; /* Number of Processor Error Info */
__u16 context_info_num; /* Number of Processor Context Info Records*/
__u32 section_length;
__u8 affinity_level;
__u8 reserved[3]; /* must be zero */
__u64 mpidr;
__u64 midr;
__u32 running_state; /* Bit 0 set - Processor running. PSCI = 0 */
__u32 psci_state;
};
/* ARM Processor Error Information Structure */
struct cper_arm_err_info {
__u8 version;
__u8 length;
__u16 validation_bits;
__u8 type;
__u16 multiple_error;
__u8 flags;
__u64 error_info;
__u64 virt_fault_addr;
__u64 physical_fault_addr;
};
/* ARM Processor Context Information Structure */
struct cper_arm_ctx_info {
__u16 version;
__u16 type;
__u32 size;
};
/* Old Memory Error Section UEFI 2.1, 2.2 */
struct cper_sec_mem_err_old {
__u64 validation_bits;
......
......@@ -2,6 +2,8 @@
#define __RAS_H__
#include <asm/errno.h>
#include <linux/uuid.h>
#include <linux/cper.h>
#ifdef CONFIG_DEBUG_FS
int ras_userspace_consumers(void);
......@@ -22,4 +24,19 @@ static inline void __init cec_init(void) { }
static inline int cec_add_elem(u64 pfn) { return -ENODEV; }
#endif
#ifdef CONFIG_RAS
void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
void log_arm_hw_error(struct cper_sec_proc_arm *err);
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len)
{ return; }
static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
#endif
#endif /* __RAS_H__ */
......@@ -18,8 +18,10 @@
#include <uapi/linux/uuid.h>
#define UUID_SIZE 16
typedef struct {
__u8 b[16];
__u8 b[UUID_SIZE];
} uuid_t;
#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
......
......@@ -161,6 +161,96 @@ TRACE_EVENT(mc_event,
__get_str(driver_detail))
);
/*
* ARM Processor Events Report
*
* This event is generated when hardware detects an ARM processor error
* has occurred. UEFI 2.6 spec section N.2.4.4.
*/
TRACE_EVENT(arm_event,
TP_PROTO(const struct cper_sec_proc_arm *proc),
TP_ARGS(proc),
TP_STRUCT__entry(
__field(u64, mpidr)
__field(u64, midr)
__field(u32, running_state)
__field(u32, psci_state)
__field(u8, affinity)
),
TP_fast_assign(
if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
__entry->affinity = proc->affinity_level;
else
__entry->affinity = ~0;
if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
__entry->mpidr = proc->mpidr;
else
__entry->mpidr = 0ULL;
__entry->midr = proc->midr;
if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
__entry->running_state = proc->running_state;
__entry->psci_state = proc->psci_state;
} else {
__entry->running_state = ~0;
__entry->psci_state = ~0;
}
),
TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
"running state: %d; PSCI state: %d",
__entry->affinity, __entry->mpidr, __entry->midr,
__entry->running_state, __entry->psci_state)
);
/*
* Non-Standard Section Report
*
* This event is generated when hardware detected a hardware
* error event, which may be of non-standard section as defined
* in UEFI spec appendix "Common Platform Error Record", or may
* be of sections for which TRACE_EVENT is not defined.
*
*/
TRACE_EVENT(non_standard_event,
TP_PROTO(const uuid_le *sec_type,
const uuid_le *fru_id,
const char *fru_text,
const u8 sev,
const u8 *err,
const u32 len),
TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
TP_STRUCT__entry(
__array(char, sec_type, UUID_SIZE)
__array(char, fru_id, UUID_SIZE)
__string(fru_text, fru_text)
__field(u8, sev)
__field(u32, len)
__dynamic_array(u8, buf, len)
),
TP_fast_assign(
memcpy(__entry->sec_type, sec_type, UUID_SIZE);
memcpy(__entry->fru_id, fru_id, UUID_SIZE);
__assign_str(fru_text, fru_text);
__entry->sev = sev;
__entry->len = len;
memcpy(__get_dynamic_array(buf), err, len);
),
TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
__entry->sev, __entry->sec_type,
__entry->fru_id, __get_str(fru_text),
__entry->len,
__print_hex(__get_dynamic_array(buf), __entry->len))
);
/*
* PCIe AER Trace event
*
......
......@@ -29,6 +29,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/virt.h>
#include <asm/system_misc.h>
#include "trace.h"
......@@ -1430,6 +1431,25 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
kvm_set_pfn_accessed(pfn);
}
static bool is_abort_sea(unsigned long fault_status)
{
switch (fault_status) {
case FSC_SEA:
case FSC_SEA_TTW0:
case FSC_SEA_TTW1:
case FSC_SEA_TTW2:
case FSC_SEA_TTW3:
case FSC_SECC:
case FSC_SECC_TTW0:
case FSC_SECC_TTW1:
case FSC_SECC_TTW2:
case FSC_SECC_TTW3:
return true;
default:
return false;
}
}
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
......@@ -1452,19 +1472,29 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
gfn_t gfn;
int ret, idx;
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
/*
* The host kernel will handle the synchronous external abort. There
* is no need to pass the error into the guest.
*/
if (is_abort_sea(fault_status)) {
if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
return 1;
}
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
kvm_inject_vabt(vcpu);
return 1;
}
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
fault_status != FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment