Commit 04759194 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - VMAP_STACK support, allowing the kernel stacks to be allocated in the
   vmalloc space with a guard page for trapping stack overflows. One of
   the patches introduces THREAD_ALIGN and changes the generic
   alloc_thread_stack_node() to use this instead of THREAD_SIZE (no
   functional change for other architectures)

 - Contiguous PTE hugetlb support re-enabled (after being reverted a
   couple of times). We now have the semantics agreed in the generic mm
   layer together with API improvements so that the architecture code
   can detect between contiguous and non-contiguous huge PTEs

 - Initial support for persistent memory on ARM: DC CVAP instruction
   exposed to user space (HWCAP) and the in-kernel pmem API implemented

 - raid6 improvements for arm64: faster algorithm for the delta syndrome
   and implementation of the recovery routines using Neon

 - FP/SIMD refactoring and removal of support for Neon in interrupt
   context. This is in preparation for full SVE support

 - PTE accessors converted from inline asm to cmpxchg so that we can use
   LSE atomics if available (ARMv8.1)

 - Perf support for Cortex-A35 and A73

 - Non-urgent fixes and cleanups

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (75 commits)
  arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro
  arm64: introduce separated bits for mm_context_t flags
  arm64: hugetlb: Cleanup setup_hugepagesz
  arm64: Re-enable support for contiguous hugepages
  arm64: hugetlb: Override set_huge_swap_pte_at() to support contiguous hugepages
  arm64: hugetlb: Override huge_pte_clear() to support contiguous hugepages
  arm64: hugetlb: Handle swap entries in huge_pte_offset() for contiguous hugepages
  arm64: hugetlb: Add break-before-make logic for contiguous entries
  arm64: hugetlb: Spring clean huge pte accessors
  arm64: hugetlb: Introduce pte_pgprot helper
  arm64: hugetlb: set_huge_pte_at Add WARN_ON on !pte_present
  arm64: kexec: have own crash_smp_send_stop() for crash dump for nonpanic cores
  arm64: dma-mapping: Mark atomic_pool as __ro_after_init
  arm64: dma-mapping: Do not pass data to gen_pool_set_algo()
  arm64: Remove the !CONFIG_ARM64_HW_AFDBM alternative code paths
  arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect()
  arm64: Move PTE_RDONLY bit handling out of set_pte_at()
  kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to cmpxchg()
  arm64: Convert pte handling from inline asm to using (cmp)xchg
  arm64: neon/efi: Make EFI fpsimd save/restore variables static
  ...
parents 9e85ae6a d1be5c99
......@@ -179,6 +179,8 @@ infrastructure:
| FCMA | [19-16] | y |
|--------------------------------------------------|
| JSCVT | [15-12] | y |
|--------------------------------------------------|
| DPB | [3-0] | y |
x--------------------------------------------------x
Appendix I: Example
......
......@@ -9,9 +9,11 @@ Required properties:
- compatible : should be one of
"apm,potenza-pmu"
"arm,armv8-pmuv3"
"arm,cortex-a73-pmu"
"arm,cortex-a72-pmu"
"arm,cortex-a57-pmu"
"arm,cortex-a53-pmu"
"arm,cortex-a35-pmu"
"arm,cortex-a17-pmu"
"arm,cortex-a15-pmu"
"arm,cortex-a12-pmu"
......
......@@ -75,6 +75,7 @@ config ARM64
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
select HAVE_ARM_SMCCC
select HAVE_EBPF_JIT
select HAVE_C_RECORDMCOUNT
......@@ -960,6 +961,18 @@ config ARM64_UAO
regular load/store instructions if the cpu does not implement the
feature.
config ARM64_PMEM
bool "Enable support for persistent memory"
select ARCH_HAS_PMEM_API
select ARCH_HAS_UACCESS_FLUSHCACHE
help
Say Y to enable support for the persistent memory API based on the
ARMv8.2 DCPoP feature.
The feature is detected at runtime, and the kernel will use DC CVAC
operations if DC CVAP is not supported (following the behaviour of
DC CVAP itself if the system does not define a point of persistence).
endmenu
config ARM64_MODULE_CMODEL_LARGE
......
......@@ -20,7 +20,6 @@ generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
generic-y += simd.h
generic-y += sizes.h
generic-y += switch_to.h
generic-y += trace_clock.h
......
#ifndef __ASM_ASM_BUG_H
/*
* Copyright (C) 2017 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define __ASM_ASM_BUG_H
#include <asm/brk-imm.h>
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
#define __BUGVERBOSE_LOCATION(file, line) \
.pushsection .rodata.str,"aMS",@progbits,1; \
2: .string file; \
.popsection; \
\
.long 2b - 0b; \
.short line;
#else
#define _BUGVERBOSE_LOCATION(file, line)
#endif
#ifdef CONFIG_GENERIC_BUG
#define __BUG_ENTRY(flags) \
.pushsection __bug_table,"aw"; \
.align 2; \
0: .long 1f - 0b; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
.popsection; \
1:
#else
#define __BUG_ENTRY(flags)
#endif
#define ASM_BUG_FLAGS(flags) \
__BUG_ENTRY(flags) \
brk BUG_BRK_IMM
#define ASM_BUG() ASM_BUG_FLAGS(0)
#endif /* __ASM_ASM_BUG_H */
......@@ -230,12 +230,18 @@ lr .req x30 // link register
.endm
/*
* @dst: Result of per_cpu(sym, smp_processor_id())
* @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for
* non-module code
* @sym: The name of the per-cpu variable
* @tmp: scratch register
*/
.macro adr_this_cpu, dst, sym, tmp
#ifndef MODULE
adrp \tmp, \sym
add \dst, \tmp, #:lo12:\sym
#else
adr_l \dst, \sym
#endif
mrs \tmp, tpidr_el1
add \dst, \dst, \tmp
.endm
......@@ -352,6 +358,12 @@ alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
dc \op, \kaddr
alternative_else
dc civac, \kaddr
alternative_endif
.elseif (\op == cvap)
alternative_if ARM64_HAS_DCPOP
sys 3, c7, c12, 1, \kaddr // dc cvap
alternative_else
dc cvac, \kaddr
alternative_endif
.else
dc \op, \kaddr
......@@ -403,6 +415,17 @@ alternative_endif
.size __pi_##x, . - x; \
ENDPROC(x)
/*
* Annotate a function as being unsuitable for kprobes.
*/
#ifdef CONFIG_KPROBES
#define NOKPROBE(x) \
.pushsection "_kprobe_blacklist", "aw"; \
.quad x; \
.popsection;
#else
#define NOKPROBE(x)
#endif
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
......
......@@ -18,41 +18,12 @@
#ifndef _ARCH_ARM64_ASM_BUG_H
#define _ARCH_ARM64_ASM_BUG_H
#include <asm/brk-imm.h>
#include <linux/stringify.h>
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
#define __BUGVERBOSE_LOCATION(file, line) \
".pushsection .rodata.str,\"aMS\",@progbits,1\n" \
"2: .string \"" file "\"\n\t" \
".popsection\n\t" \
\
".long 2b - 0b\n\t" \
".short " #line "\n\t"
#else
#define _BUGVERBOSE_LOCATION(file, line)
#endif
#ifdef CONFIG_GENERIC_BUG
#define __BUG_ENTRY(flags) \
".pushsection __bug_table,\"aw\"\n\t" \
".align 2\n\t" \
"0: .long 1f - 0b\n\t" \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
".short " #flags "\n\t" \
".popsection\n" \
"1: "
#else
#define __BUG_ENTRY(flags) ""
#endif
#include <asm/asm-bug.h>
#define __BUG_FLAGS(flags) \
asm volatile ( \
__BUG_ENTRY(flags) \
"brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \
);
asm volatile (__stringify(ASM_BUG_FLAGS(flags)));
#define BUG() do { \
__BUG_FLAGS(0); \
......
......@@ -67,7 +67,9 @@
*/
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
extern void __inval_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
extern void __clean_dcache_area_pop(void *addr, size_t len);
extern void __clean_dcache_area_pou(void *addr, size_t len);
extern long __flush_cache_user_range(unsigned long start, unsigned long end);
extern void sync_icache_aliases(void *kaddr, unsigned long len);
......@@ -150,6 +152,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
}
int set_memory_valid(unsigned long addr, unsigned long size, int enable);
int set_memory_valid(unsigned long addr, int numpages, int enable);
#endif
......@@ -39,7 +39,8 @@
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
#define ARM64_WORKAROUND_858921 19
#define ARM64_WORKAROUND_CAVIUM_30115 20
#define ARM64_HAS_DCPOP 21
#define ARM64_NCAPS 21
#define ARM64_NCAPS 22
#endif /* __ASM_CPUCAPS_H */
......@@ -3,7 +3,9 @@
#include <asm/boot.h>
#include <asm/cpufeature.h>
#include <asm/fpsimd.h>
#include <asm/io.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/neon.h>
#include <asm/ptrace.h>
......@@ -20,8 +22,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() \
({ \
kernel_neon_begin(); \
efi_virtmap_load(); \
__efi_fpsimd_begin(); \
})
#define arch_efi_call_virt(p, f, args...) \
......@@ -33,8 +35,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_teardown() \
({ \
__efi_fpsimd_end(); \
efi_virtmap_unload(); \
kernel_neon_end(); \
})
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
......@@ -48,6 +50,13 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
*/
#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */
/*
* In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
* kernel need greater alignment than we require the segments to be padded to.
*/
#define EFI_KIMG_ALIGN \
(SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
/* on arm64, the FDT may be located anywhere in system RAM */
static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
{
......
......@@ -139,7 +139,6 @@ typedef struct user_fpsimd_state elf_fpregset_t;
#define SET_PERSONALITY(ex) \
({ \
clear_bit(TIF_32BIT, &current->mm->context.flags); \
clear_thread_flag(TIF_32BIT); \
current->personality &= ~READ_IMPLIES_EXEC; \
})
......@@ -195,7 +194,6 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
*/
#define COMPAT_SET_PERSONALITY(ex) \
({ \
set_bit(TIF_32BIT, &current->mm->context.flags); \
set_thread_flag(TIF_32BIT); \
})
#define COMPAT_ARCH_DLINFO
......
......@@ -77,16 +77,23 @@
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
#define ESR_ELx_IL (UL(1) << 25)
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR (UL(1) << 6)
#define ESR_ELx_WNR_SHIFT (6)
#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
/* Shared ISS field definitions for Data/Instruction aborts */
#define ESR_ELx_FnV (UL(1) << 10)
#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_S1PTW (UL(1) << 7)
#define ESR_ELx_SET_SHIFT (11)
#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
#define ESR_ELx_FnV_SHIFT (10)
#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
#define ESR_ELx_EA_SHIFT (9)
#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
#define ESR_ELx_S1PTW_SHIFT (7)
#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
#define ESR_ELx_FSC (0x3F)
......@@ -97,15 +104,20 @@
#define ESR_ELx_FSC_PERM (0x0C)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV (UL(1) << 24)
#define ESR_ELx_ISV_SHIFT (24)
#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
#define ESR_ELx_SAS_SHIFT (22)
#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
#define ESR_ELx_SSE (UL(1) << 21)
#define ESR_ELx_SSE_SHIFT (21)
#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
#define ESR_ELx_SRT_SHIFT (16)
#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
#define ESR_ELx_SF (UL(1) << 15)
#define ESR_ELx_AR (UL(1) << 14)
#define ESR_ELx_CM (UL(1) << 8)
#define ESR_ELx_SF_SHIFT (15)
#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
#define ESR_ELx_AR_SHIFT (14)
#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
......@@ -157,9 +169,10 @@
/*
* User space cache operations have the following sysreg encoding
* in System instructions.
* op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
* op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
*/
#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5
......@@ -209,6 +222,13 @@
#ifndef __ASSEMBLY__
#include <asm/types.h>
static inline bool esr_is_data_abort(u32 esr)
{
const u32 ec = ESR_ELx_EC(esr);
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
const char *esr_get_class_string(u32 esr);
#endif /* __ASSEMBLY */
......
......@@ -41,16 +41,6 @@ struct fpsimd_state {
unsigned int cpu;
};
/*
* Struct for stacking the bottom 'n' FP/SIMD registers.
*/
struct fpsimd_partial_state {
u32 fpsr;
u32 fpcr;
u32 num_regs;
__uint128_t vregs[32];
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
......@@ -77,9 +67,9 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
#endif
......
......@@ -75,59 +75,3 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
mrs x\tmpnr1, fpsr
str w\numnr, [\state, #8]
mrs x\tmpnr2, fpcr
stp w\tmpnr1, w\tmpnr2, [\state]
adr x\tmpnr1, 0f
add \state, \state, x\numnr, lsl #4
sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
br x\tmpnr1
stp q30, q31, [\state, #-16 * 30 - 16]
stp q28, q29, [\state, #-16 * 28 - 16]
stp q26, q27, [\state, #-16 * 26 - 16]
stp q24, q25, [\state, #-16 * 24 - 16]
stp q22, q23, [\state, #-16 * 22 - 16]
stp q20, q21, [\state, #-16 * 20 - 16]
stp q18, q19, [\state, #-16 * 18 - 16]
stp q16, q17, [\state, #-16 * 16 - 16]
stp q14, q15, [\state, #-16 * 14 - 16]
stp q12, q13, [\state, #-16 * 12 - 16]
stp q10, q11, [\state, #-16 * 10 - 16]
stp q8, q9, [\state, #-16 * 8 - 16]
stp q6, q7, [\state, #-16 * 6 - 16]
stp q4, q5, [\state, #-16 * 4 - 16]
stp q2, q3, [\state, #-16 * 2 - 16]
stp q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
ldp w\tmpnr1, w\tmpnr2, [\state]
msr fpsr, x\tmpnr1
fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
adr x\tmpnr1, 0f
ldr w\tmpnr2, [\state, #8]
add \state, \state, x\tmpnr2, lsl #4
sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
br x\tmpnr1
ldp q30, q31, [\state, #-16 * 30 - 16]
ldp q28, q29, [\state, #-16 * 28 - 16]
ldp q26, q27, [\state, #-16 * 26 - 16]
ldp q24, q25, [\state, #-16 * 24 - 16]
ldp q22, q23, [\state, #-16 * 22 - 16]
ldp q20, q21, [\state, #-16 * 20 - 16]
ldp q18, q19, [\state, #-16 * 18 - 16]
ldp q16, q17, [\state, #-16 * 16 - 16]
ldp q14, q15, [\state, #-16 * 14 - 16]
ldp q12, q13, [\state, #-16 * 12 - 16]
ldp q10, q11, [\state, #-16 * 10 - 16]
ldp q8, q9, [\state, #-16 * 8 - 16]
ldp q6, q7, [\state, #-16 * 6 - 16]
ldp q4, q5, [\state, #-16 * 4 - 16]
ldp q2, q3, [\state, #-16 * 2 - 16]
ldp q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
......@@ -18,7 +18,6 @@
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
#include <asm-generic/hugetlb.h>
#include <asm/page.h>
static inline pte_t huge_ptep_get(pte_t *ptep)
......@@ -82,6 +81,14 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
#define huge_pte_clear huge_pte_clear
extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
#define set_huge_swap_pte_at set_huge_swap_pte_at
#include <asm-generic/hugetlb.h>
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static inline bool gigantic_page_supported(void) { return true; }
......
#ifndef __ASM_IRQ_H
#define __ASM_IRQ_H
#define IRQ_STACK_SIZE THREAD_SIZE
#define IRQ_STACK_START_SP THREAD_START_SP
#ifndef __ASSEMBLER__
#include <linux/percpu.h>
#include <asm-generic/irq.h>
#include <asm/thread_info.h>
struct pt_regs;
DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
/*
* The highest address on the stack, and the first to be used. Used to
* find the dummy-stack frame put down by el?_irq() in entry.S, which
* is structured as follows:
*
* ------------
* | | <- irq_stack_ptr
* top ------------
* | x19 | <- irq_stack_ptr - 0x08
* ------------
* | x29 | <- irq_stack_ptr - 0x10
* ------------
*
* where x19 holds a copy of the task stack pointer where the struct pt_regs
* from kernel_entry can be found.
*
*/
#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
/*
* The offset from irq_stack_ptr where entry.S will store the original
* stack pointer. Used by unwind_frame() and dump_backtrace().
*/
#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
static inline int nr_legacy_irqs(void)
......@@ -47,14 +14,5 @@ static inline int nr_legacy_irqs(void)
return 0;
}
static inline bool on_irq_stack(unsigned long sp, int cpu)
{
/* variable names the same as kernel/stacktrace.c */
unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
unsigned long high = low + IRQ_STACK_START_SP;
return (low <= sp && sp <= high);
}
#endif /* !__ASSEMBLER__ */
#endif
......@@ -175,18 +175,15 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
pteval_t pteval;
unsigned long tmp;
asm volatile("// kvm_set_s2pte_readonly\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" and %0, %0, %3 // clear PTE_S2_RDWR\n"
" orr %0, %0, %4 // set PTE_S2_RDONLY\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
: "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
pteval_t old_pteval, pteval;
pteval = READ_ONCE(pte_val(*pte));
do {
old_pteval = pteval;
pteval &= ~PTE_S2_RDWR;
pteval |= PTE_S2_RDONLY;
pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
} while (pteval != old_pteval);
}
static inline bool kvm_s2pte_readonly(pte_t *pte)
......
......@@ -25,6 +25,7 @@
#include <linux/const.h>
#include <linux/types.h>
#include <asm/bug.h>
#include <asm/page-def.h>
#include <asm/sizes.h>
/*
......@@ -103,6 +104,58 @@
#define KASAN_SHADOW_SIZE (0)
#endif
#define MIN_THREAD_SHIFT 14
/*
* VMAP'd stacks are allocated at page granularity, so we must ensure that such
* stacks are a multiple of page size.
*/
#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
#define THREAD_SHIFT PAGE_SHIFT
#else
#define THREAD_SHIFT MIN_THREAD_SHIFT
#endif
#if THREAD_SHIFT >= PAGE_SHIFT
#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
#endif
#define THREAD_SIZE (UL(1) << THREAD_SHIFT)
/*
* By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
* assembly.
*/
#ifdef CONFIG_VMAP_STACK
#define THREAD_ALIGN (2 * THREAD_SIZE)
#else
#define THREAD_ALIGN THREAD_SIZE
#endif
#define IRQ_STACK_SIZE THREAD_SIZE
#define OVERFLOW_STACK_SIZE SZ_4K
/*
* Alignment of kernel segments (e.g. .text, .data).
*/
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
/*
* 4 KB granule: 1 level 2 entry
* 16 KB granule: 128 level 3 entries, with contiguous bit
* 64 KB granule: 32 level 3 entries, with contiguous bit
*/
#define SEGMENT_ALIGN SZ_2M
#else
/*
* 4 KB granule: 16 level 3 entries, with contiguous bit
* 16 KB granule: 4 level 3 entries, without contiguous bit
* 64 KB granule: 1 level 3 entry
*/
#define SEGMENT_ALIGN SZ_64K
#endif
/*
* Memory types available.
*/
......
......@@ -16,6 +16,8 @@
#ifndef __ASM_MMU_H
#define __ASM_MMU_H
#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
typedef struct {
atomic64_t id;
void *vdso;
......
......@@ -8,12 +8,22 @@
* published by the Free Software Foundation.
*/
#ifndef __ASM_NEON_H
#define __ASM_NEON_H
#include <linux/types.h>
#include <asm/fpsimd.h>
#define cpu_has_neon() system_supports_fpsimd()
#define kernel_neon_begin() kernel_neon_begin_partial(32)
void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_begin(void);
void kernel_neon_end(void);
/*
* Temporary macro to allow the crypto code to compile. Note that the
* semantics of kernel_neon_begin_partial() are now different from the
* original as it does not allow being called in an interrupt context.
*/
#define kernel_neon_begin_partial(num_regs) kernel_neon_begin()
#endif /* ! __ASM_NEON_H */
......@@ -7,9 +7,6 @@
#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
/* currently, arm64 implements flat NUMA topology */
#define parent_node(node) (node)
int __node_distance(int from, int to);
#define node_distance(a, b) __node_distance(a, b)
......
/*
* Based on arch/arm/include/asm/page.h
*
* Copyright (C) 1995-2003 Russell King
* Copyright (C) 2017 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PAGE_DEF_H
#define __ASM_PAGE_DEF_H
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
#define CONT_MASK (~(CONT_SIZE-1))
#endif /* __ASM_PAGE_DEF_H */
......@@ -19,17 +19,7 @@
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
#define CONT_MASK (~(CONT_SIZE-1))
#include <asm/page-def.h>
#ifndef __ASSEMBLY__
......
......@@ -63,23 +63,21 @@
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY
#define __P010 PAGE_READONLY
#define __P011 PAGE_READONLY
#define __P100 PAGE_EXECONLY
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY_EXEC
#define __P110 PAGE_READONLY_EXEC
#define __P111 PAGE_READONLY_EXEC
#define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
......
......@@ -39,6 +39,7 @@
#ifndef __ASSEMBLY__
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <linux/mmdebug.h>
......@@ -84,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
#else
#define pte_hw_dirty(pte) (0)
#endif
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
......@@ -124,12 +121,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
static inline pte_t pte_wrprotect(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_WRITE));
pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
return pte;
}
static inline pte_t pte_mkwrite(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_WRITE));
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
......@@ -168,11 +169,6 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline pte_t pte_clear_rdonly(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
}
static inline pte_t pte_mkpresent(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
......@@ -220,22 +216,15 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
if (pte_present(pte)) {
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
if (pte_user_exec(pte) && !pte_special(pte))
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
}
/*
* If the existing pte is valid, check for potential race with
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
pte_valid(*ptep) && pte_valid(pte)) {
if (pte_valid(*ptep) && pte_valid(pte)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
......@@ -571,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
#ifdef CONFIG_ARM64_HW_AFDBM
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
......@@ -593,20 +581,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int __ptep_test_and_clear_young(pte_t *ptep)
{
pteval_t pteval;
unsigned int tmp, res;
pte_t old_pte, pte;
asm volatile("// __ptep_test_and_clear_young\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
" and %0, %0, %4 // clear PTE_AF\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
pte = READ_ONCE(*ptep);
do {
old_pte = pte;
pte = pte_mkold(pte);
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
} while (pte_val(pte) != pte_val(old_pte));
return res;
return pte_young(pte);
}
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
......@@ -630,17 +615,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
pteval_t old_pteval;
unsigned int tmp;
asm volatile("// ptep_get_and_clear\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" stxr %w1, xzr, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
return __pte(old_pteval);
return __pte(xchg_relaxed(&pte_val(*ptep), 0));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
......@@ -653,27 +628,32 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
* ptep_set_wrprotect - mark read-only while preserving the hardware update of
* the Access Flag.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pteval_t pteval;
unsigned long tmp;
pte_t old_pte, pte;
asm volatile("// ptep_set_wrprotect\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
" csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
" orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
" and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
: "cc");
/*
* ptep_set_wrprotect() is only called on CoW mappings which are
* private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
* PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
* !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
* protection_map[]. There is no race with the hardware update of the
* dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
* is set.
*/
VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
"%s: potential race with hardware DBM", __func__);
pte = READ_ONCE(*ptep);
do {
old_pte = pte;
pte = pte_wrprotect(pte);
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
} while (pte_val(pte) != pte_val(old_pte));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
......@@ -684,7 +664,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#endif
#endif /* CONFIG_ARM64_HW_AFDBM */
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
......
......@@ -112,7 +112,7 @@ void tls_preserve_current_state(void);
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
memset(regs, 0, sizeof(*regs));
regs->syscallno = ~0UL;
forget_syscall(regs);
regs->pc = pc;
}
......@@ -159,7 +159,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk))
......
......@@ -72,8 +72,19 @@
#define COMPAT_PT_TEXT_ADDR 0x10000
#define COMPAT_PT_DATA_ADDR 0x10004
#define COMPAT_PT_TEXT_END_ADDR 0x10008
/*
* If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
* a syscall -- i.e., its most recent entry into the kernel from
* userspace was not via SVC, or otherwise a tracer cancelled the syscall.
*
* This must have the value -1, for ABI compatibility with ptrace etc.
*/
#define NO_SYSCALL (-1)
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <linux/types.h>
/* sizeof(struct user) for AArch32 */
#define COMPAT_USER_SZ 296
......@@ -116,11 +127,29 @@ struct pt_regs {
};
};
u64 orig_x0;
u64 syscallno;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif
u64 orig_addr_limit;
u64 unused; // maintain 16 byte alignment
u64 stackframe[2];
};
static inline bool in_syscall(struct pt_regs const *regs)
{
return regs->syscallno != NO_SYSCALL;
}
static inline void forget_syscall(struct pt_regs *regs)
{
regs->syscallno = NO_SYSCALL;
}
#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
#define arch_has_single_step() (1)
......
......@@ -22,8 +22,6 @@
#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500
extern const compat_ulong_t aarch32_sigret_code[6];
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
......
/*
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#ifdef CONFIG_KERNEL_MODE_NEON
DECLARE_PER_CPU(bool, kernel_neon_busy);
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
* instructions or access the SIMD register file
*
* Callers must not assume that the result remains true beyond the next
* preempt_enable() or return from softirq context.
*/
static __must_check inline bool may_use_simd(void)
{
/*
* The raw_cpu_read() is racy if called with preemption enabled.
* This is not a bug: kernel_neon_busy is only set when
* preemption is disabled, so we cannot migrate to another CPU
* while it is set, nor can we migrate to a CPU where it is set.
* So, if we find it clear on some CPU then we're guaranteed to
* find it clear on any CPU we could migrate to.
*
* If we are in between kernel_neon_begin()...kernel_neon_end(),
* the flag will be set, but preemption is also disabled, so we
* can't migrate to another CPU and spuriously see it become
* false.
*/
return !in_irq() && !irqs_disabled() && !in_nmi() &&
!raw_cpu_read(kernel_neon_busy);
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
static __must_check inline bool may_use_simd(void) {
return false;
}
#endif /* ! CONFIG_KERNEL_MODE_NEON */
#endif
......@@ -148,7 +148,7 @@ static inline void cpu_panic_kernel(void)
*/
bool cpus_are_stuck_in_kernel(void);
extern void smp_send_crash_stop(void);
extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
#endif /* ifndef __ASSEMBLY__ */
......
......@@ -16,11 +16,15 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
struct task_struct;
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <asm/memory.h>
#include <asm/ptrace.h>
struct stackframe {
unsigned long fp;
unsigned long sp;
unsigned long pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned int graph;
......@@ -32,4 +36,57 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_irq_stack(unsigned long sp)
{
unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
unsigned long high = low + IRQ_STACK_SIZE;
if (!low)
return false;
return (low <= sp && sp < high);
}
static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
return (low <= sp && sp < high);
}
#ifdef CONFIG_VMAP_STACK
DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
static inline bool on_overflow_stack(unsigned long sp)
{
unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
return (low <= sp && sp < high);
}
#else
static inline bool on_overflow_stack(unsigned long sp) { return false; }
#endif
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp)
{
if (on_task_stack(tsk, sp))
return true;
if (tsk != current || preemptible())
return false;
if (on_irq_stack(sp))
return true;
if (on_overflow_stack(sp))
return true;
return false;
}
#endif /* __ASM_STACKTRACE_H */
......@@ -52,6 +52,10 @@ extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
void memcpy_flushcache(void *dst, const void *src, size_t cnt);
#endif
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
......
......@@ -329,6 +329,7 @@
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
#define ID_AA64PFR0_GIC_SHIFT 24
......
......@@ -23,19 +23,11 @@
#include <linux/compiler.h>
#ifdef CONFIG_ARM64_4K_PAGES
#define THREAD_SIZE_ORDER 2
#elif defined(CONFIG_ARM64_16K_PAGES)
#define THREAD_SIZE_ORDER 0
#endif
#define THREAD_SIZE 16384
#define THREAD_START_SP (THREAD_SIZE - 16)
#ifndef __ASSEMBLY__
struct task_struct;
#include <asm/memory.h>
#include <asm/stack_pointer.h>
#include <asm/types.h>
......@@ -68,6 +60,9 @@ struct thread_info {
#define thread_saved_fp(tsk) \
((unsigned long)(tsk->thread.cpu_context.fp))
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
#endif
/*
......
......@@ -53,4 +53,9 @@ static inline int in_exception_text(unsigned long ptr)
return in ? : __in_irqentry_text(ptr);
}
static inline int in_entry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__entry_text_start &&
ptr < (unsigned long)&__entry_text_end;
}
#endif
......@@ -350,4 +350,16 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
struct page;
void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
#endif
#endif /* __ASM_UACCESS_H */
......@@ -35,5 +35,6 @@
#define HWCAP_JSCVT (1 << 13)
#define HWCAP_FCMA (1 << 14)
#define HWCAP_LRCPC (1 << 15)
#define HWCAP_DCPOP (1 << 16)
#endif /* _UAPI__ASM_HWCAP_H */
......@@ -75,6 +75,7 @@ int main(void)
DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
......
......@@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
......@@ -888,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.min_field_value = 0,
.matches = has_no_fpsimd,
},
#ifdef CONFIG_ARM64_PMEM
{
.desc = "Data cache clean to Point of Persistence",
.capability = ARM64_HAS_DCPOP,
.def_scope = SCOPE_SYSTEM,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.field_pos = ID_AA64ISAR1_DPB_SHIFT,
.min_field_value = 1,
},
#endif
{},
};
......@@ -916,6 +928,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
......
......@@ -68,6 +68,7 @@ static const char *const hwcap_str[] = {
"jscvt",
"fcma",
"lrcpc",
"dcpop",
NULL
};
......
......@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Save the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_save_partial_state)
fpsimd_save_partial x0, 1, 8, 9
ret
ENDPROC(fpsimd_save_partial_state)
/*
* Load the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_load_partial_state)
fpsimd_restore_partial x0, 8, 9
ret
ENDPROC(fpsimd_load_partial_state)
#endif
This diff is collapsed.
......@@ -17,16 +17,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bottom_half.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/sched/signal.h>
#include <linux/signal.h>
#include <linux/hardirq.h>
#include <asm/fpsimd.h>
#include <asm/cputype.h>
#include <asm/simd.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
......@@ -62,6 +65,13 @@
* CPU currently contain the most recent userland FPSIMD state of the current
* task.
*
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
* save the task's FPSIMD context back to task_struct from softirq context.
* To prevent this from racing with the manipulation of the task's FPSIMD state
* from task context and thereby corrupting the state, it is necessary to
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
* flag with local_bh_disable() unless softirqs are already masked.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
......@@ -161,11 +171,14 @@ void fpsimd_flush_thread(void)
{
if (!system_supports_fpsimd())
return;
preempt_disable();
local_bh_disable();
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
preempt_enable();
local_bh_enable();
}
/*
......@@ -176,10 +189,13 @@ void fpsimd_preserve_current_state(void)
{
if (!system_supports_fpsimd())
return;
preempt_disable();
local_bh_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
preempt_enable();
local_bh_enable();
}
/*
......@@ -191,15 +207,18 @@ void fpsimd_restore_current_state(void)
{
if (!system_supports_fpsimd())
return;
preempt_disable();
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
fpsimd_load_state(st);
this_cpu_write(fpsimd_last_state, st);
__this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
preempt_enable();
local_bh_enable();
}
/*
......@@ -211,15 +230,18 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
preempt_disable();
local_bh_disable();
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
this_cpu_write(fpsimd_last_state, st);
__this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
preempt_enable();
local_bh_enable();
}
/*
......@@ -232,52 +254,122 @@ void fpsimd_flush_task_state(struct task_struct *t)
#ifdef CONFIG_KERNEL_MODE_NEON
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
DEFINE_PER_CPU(bool, kernel_neon_busy);
EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
/*
* Kernel-side NEON support functions
*/
void kernel_neon_begin_partial(u32 num_regs)
/*
* kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
* context
*
* Must not be called unless may_use_simd() returns true.
* Task context in the FPSIMD registers is saved back to memory as necessary.
*
* A matching call to kernel_neon_end() must be made before returning from the
* calling context.
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
void kernel_neon_begin(void)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
BUG_ON(num_regs > 32);
fpsimd_save_partial_state(s, roundup(num_regs, 2));
} else {
/*
* Save the userland FPSIMD state if we have one and if we
* haven't done so already. Clear fpsimd_last_state to indicate
* that there is no longer userland FPSIMD state in the
* registers.
*/
preempt_disable();
if (current->mm &&
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
BUG_ON(!may_use_simd());
local_bh_disable();
__this_cpu_write(kernel_neon_busy, true);
/* Save unsaved task fpsimd state, if any: */
if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
this_cpu_write(fpsimd_last_state, NULL);
}
/* Invalidate any task state remaining in the fpsimd regs: */
__this_cpu_write(fpsimd_last_state, NULL);
preempt_disable();
local_bh_enable();
}
EXPORT_SYMBOL(kernel_neon_begin_partial);
EXPORT_SYMBOL(kernel_neon_begin);
/*
* kernel_neon_end(): give the CPU FPSIMD registers back to the current task
*
* Must be called from a context in which kernel_neon_begin() was previously
* called, with no call to kernel_neon_end() in the meantime.
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*/
void kernel_neon_end(void)
{
bool busy;
if (!system_supports_fpsimd())
return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
fpsimd_load_partial_state(s);
} else {
busy = __this_cpu_xchg(kernel_neon_busy, false);
WARN_ON(!busy); /* No matching kernel_neon_begin()? */
preempt_enable();
}
}
EXPORT_SYMBOL(kernel_neon_end);
static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
/*
* EFI runtime services support functions
*
* The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
* This means that for EFI (and only for EFI), we have to assume that FPSIMD
* is always used rather than being an optional accelerator.
*
* These functions provide the necessary support for ensuring FPSIMD
* save/restore in the contexts from which EFI is used.
*
* Do not use them for any other purpose -- if tempted to do so, you are
* either doing something wrong or you need to propose some refactoring.
*/
/*
* __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
*/
void __efi_fpsimd_begin(void)
{
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
if (may_use_simd())
kernel_neon_begin();
else {
fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
__this_cpu_write(efi_fpsimd_state_used, true);
}
}
/*
* __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
*/
void __efi_fpsimd_end(void)
{
if (!system_supports_fpsimd())
return;
if (__this_cpu_xchg(efi_fpsimd_state_used, false))
fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
else
kernel_neon_end();
}
#endif /* CONFIG_KERNEL_MODE_NEON */
#ifdef CONFIG_CPU_PM
......
......@@ -143,8 +143,8 @@ preserve_boot_args:
dmb sy // needed before dc ivac with
// MMU off
add x1, x0, #0x20 // 4 x 8 bytes
b __inval_cache_range // tail call
mov x1, #0x20 // 4 x 8 bytes
b __inval_dcache_area // tail call
ENDPROC(preserve_boot_args)
/*
......@@ -221,20 +221,20 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
bl __inval_dcache_area
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
cmp x0, x6
b.lo 1b
subs x1, x1, #64
b.ne 1b
mov x7, SWAPPER_MM_MMUFLAGS
......@@ -307,9 +307,9 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
dmb sy
bl __inval_cache_range
bl __inval_dcache_area
ret x28
ENDPROC(__create_page_tables)
......@@ -361,6 +361,9 @@ __primary_switched:
ret // to __primary_switch()
0:
#endif
add sp, sp, #16
mov x29, #0
mov x30, #0
b start_kernel
ENDPROC(__primary_switched)
......@@ -616,6 +619,7 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
msr sp_el0, x2
mov x29, #0
mov x30, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
......
......@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_pte, pte_clear_rdonly(pte));
set_pte(dst_pte, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
......@@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
}
}
......
......@@ -23,15 +23,16 @@
#include <linux/kernel_stat.h>
#include <linux/irq.h>
#include <linux/memory.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
unsigned long irq_err_count;
/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
int arch_show_interrupts(struct seq_file *p, int prec)
{
......@@ -50,8 +51,43 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
handle_arch_irq = handle_irq;
}
#ifdef CONFIG_VMAP_STACK
static void init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
/*
* To ensure that VMAP'd stack overflow detection works
* correctly, the IRQ stacks need to have the same
* alignment as other stacks.
*/
p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP, PAGE_KERNEL,
0, cpu_to_node(cpu),
__builtin_return_address(0));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
#else
/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
static void init_irq_stacks(void)
{
int cpu;
for_each_possible_cpu(cpu)
per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
}
#endif
void __init init_IRQ(void)
{
init_irq_stacks();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
......
......@@ -252,7 +252,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
/* shutdown non-crashing cpus */
smp_send_crash_stop();
crash_smp_send_stop();
/* for crashing cpu */
crash_save_cpu(regs, smp_processor_id());
......
......@@ -162,7 +162,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
frame.fp = regs->regs[29];
frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
......
This diff is collapsed.
......@@ -40,7 +40,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
probe_opcode_t insn;
/* TODO: Currently we do not support AARCH32 instruction probing */
if (test_bit(TIF_32BIT, &mm->context.flags))
if (mm->context.flags & MMCF_AARCH32)
return -ENOTSUPP;
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
......
......@@ -384,15 +384,12 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(p, &frame))
if (unwind_frame(p, &frame))
goto out;
if (!in_sched_functions(frame.pc)) {
ret = frame.pc;
......@@ -419,3 +416,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
else
return randomize_page(mm->brk, SZ_1G);
}
/*
* Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
*/
void arch_setup_new_exec(void)
{
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
}
......@@ -42,6 +42,7 @@
#include <asm/compat.h>
#include <asm/debug-monitors.h>
#include <asm/pgtable.h>
#include <asm/stacktrace.h>
#include <asm/syscall.h>
#include <asm/traps.h>
#include <asm/system_misc.h>
......@@ -127,7 +128,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
{
return ((addr & ~(THREAD_SIZE - 1)) ==
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
on_irq_stack(addr, raw_smp_processor_id());
on_irq_stack(addr);
}
/**
......@@ -1363,7 +1364,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
regs->syscallno = ~0UL;
forget_syscall(regs);
regs->regs[regno] = saved_reg;
}
......
......@@ -42,7 +42,6 @@ void *return_address(unsigned int level)
data.addr = NULL;
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_stack_pointer;
frame.pc = (unsigned long)return_address; /* dummy */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
......
......@@ -37,6 +37,7 @@
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
#include <asm/ptrace.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
......@@ -388,7 +389,7 @@ static int restore_sigframe(struct pt_regs *regs,
/*
* Avoid sys_rt_sigreturn() restarting.
*/
regs->syscallno = ~0UL;
forget_syscall(regs);
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
......@@ -674,13 +675,12 @@ static void do_signal(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
int syscall = (int)regs->syscallno;
struct ksignal ksig;
/*
* If we were from a system call, check for system call restarting...
*/
if (syscall >= 0) {
if (in_syscall(regs)) {
continue_addr = regs->pc;
restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
retval = regs->regs[0];
......@@ -688,7 +688,7 @@ static void do_signal(struct pt_regs *regs)
/*
* Avoid additional syscall restarting via ret_to_user.
*/
regs->syscallno = ~0UL;
forget_syscall(regs);
/*
* Prepare for system call restart. We do this here so that a
......@@ -732,7 +732,7 @@ static void do_signal(struct pt_regs *regs)
* Handle restarting a different system call. As above, if a debugger
* has chosen to restart at a different PC, ignore the restart.
*/
if (syscall >= 0 && regs->pc == restart_addr) {
if (in_syscall(regs) && regs->pc == restart_addr) {
if (retval == -ERESTART_RESTARTBLOCK)
setup_restart_syscall(regs);
user_rewind_single_step(current);
......
......@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
/*
* Avoid compat_sys_sigreturn() restarting.
*/
regs->syscallno = ~0UL;
forget_syscall(regs);
err |= !valid_user_regs(&regs->user_regs, current);
......
......@@ -154,7 +154,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
......@@ -977,11 +977,21 @@ void smp_send_stop(void)
}
#ifdef CONFIG_KEXEC_CORE
void smp_send_crash_stop(void)
void crash_smp_send_stop(void)
{
static int cpus_stopped;
cpumask_t mask;
unsigned long timeout;
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
*/
if (cpus_stopped)
return;
cpus_stopped = 1;
if (num_online_cpus() == 1)
return;
......
......@@ -42,33 +42,17 @@
*/
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
unsigned long high, low;
unsigned long fp = frame->fp;
unsigned long irq_stack_ptr;
if (fp & 0xf)
return -EINVAL;
if (!tsk)
tsk = current;
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
*/
if (tsk == current && !preemptible())
irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
else
irq_stack_ptr = 0;
low = frame->sp;
/* irq stacks are not THREAD_SIZE aligned */
if (on_irq_stack(frame->sp, raw_smp_processor_id()))
high = irq_stack_ptr;
else
high = ALIGN(low, THREAD_SIZE) - 0x20;
if (fp < low || fp > high || fp & 0xf)
if (!on_accessible_stack(tsk, fp))
return -EINVAL;
frame->sp = fp + 0x10;
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
......@@ -86,34 +70,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
* Check whether we are going to walk through from interrupt stack
* to task stack.
* If we reach the end of the stack - and its an interrupt stack,
* unpack the dummy frame to find the original elr.
*
* Check the frame->fp we read from the bottom of the irq_stack,
* and the original task stack pointer are both in current->stack.
*/
if (frame->sp == irq_stack_ptr) {
struct pt_regs *irq_args;
unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
if (object_is_on_stack((void *)orig_sp) &&
object_is_on_stack((void *)frame->fp)) {
frame->sp = orig_sp;
/* orig_sp is the saved pt_regs, find the elr */
irq_args = (struct pt_regs *)orig_sp;
frame->pc = irq_args->pc;
} else {
/*
* This frame has a non-standard format, and we
* didn't fix it, because the data looked wrong.
* Refuse to output this frame.
* Frames created upon entry from EL0 have NULL FP and PC values, so
* don't bother reporting these. Frames created by __noreturn functions
* might have a valid FP even if PC is bogus, so only terminate where
* both are NULL.
*/
if (!frame->fp && !frame->pc)
return -EINVAL;
}
}
return 0;
}
......@@ -167,7 +130,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
data.no_sched_functions = 0;
frame.fp = regs->regs[29];
frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
......@@ -192,12 +154,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (tsk != current) {
data.no_sched_functions = 1;
frame.fp = thread_saved_fp(tsk);
frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
} else {
data.no_sched_functions = 0;
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_stack_pointer;
frame.pc = (unsigned long)save_stack_trace_tsk;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
......
......@@ -50,7 +50,6 @@ unsigned long profile_pc(struct pt_regs *regs)
return regs->pc;
frame.fp = regs->regs[29];
frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = -1; /* no task info */
......
......@@ -32,6 +32,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/sizes.h>
#include <linux/syscalls.h>
#include <linux/mm_types.h>
......@@ -41,6 +42,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
......@@ -143,7 +145,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
unsigned long irq_stack_ptr;
int skip;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
......@@ -154,25 +155,14 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!try_get_task_stack(tsk))
return;
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
*/
if (tsk == current && !preemptible())
irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
else
irq_stack_ptr = 0;
if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_stack_pointer;
frame.pc = (unsigned long)dump_backtrace;
} else {
/*
* task blocked in __switch_to
*/
frame.fp = thread_saved_fp(tsk);
frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
......@@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
skip = !!regs;
printk("Call trace:\n");
while (1) {
unsigned long where = frame.pc;
unsigned long stack;
int ret;
/* skip until specified stack frame */
if (!skip) {
dump_backtrace_entry(where);
dump_backtrace_entry(frame.pc);
} else if (frame.fp == regs->regs[29]) {
skip = 0;
/*
......@@ -203,18 +192,10 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
ret = unwind_frame(tsk, &frame);
if (ret < 0)
break;
stack = frame.sp;
if (in_exception_text(where)) {
/*
* If we switched to the irq_stack before calling this
* exception handler, then the pt_regs will be on the
* task stack. The easiest way to tell is if the large
* pt_regs would overlap with the end of the irq_stack.
*/
if (stack < irq_stack_ptr &&
(stack + sizeof(struct pt_regs)) > irq_stack_ptr)
stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
if (in_entry_text(frame.pc)) {
stack = frame.fp - offsetof(struct pt_regs, stackframe);
if (on_accessible_stack(tsk, stack))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs));
}
......@@ -257,8 +238,6 @@ static int __die(const char *str, int err, struct pt_regs *regs)
end_of_stack(tsk));
if (!user_mode(regs)) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(KERN_EMERG, regs);
}
......@@ -484,6 +463,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */
__user_cache_maint("dc civac", address, ret);
break;
case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */
__user_cache_maint("sys 3, c7, c12, 1", address, ret);
break;
case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */
__user_cache_maint("dc civac", address, ret);
break;
......@@ -593,7 +575,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
if (show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: syscall %d\n", current->comm,
task_pid_nr(current), (int)regs->syscallno);
task_pid_nr(current), regs->syscallno);
dump_instr("", regs);
if (user_mode(regs))
__show_regs(regs);
......@@ -689,6 +671,43 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
force_sig_info(info.si_signo, &info, current);
}
#ifdef CONFIG_VMAP_STACK
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
asmlinkage void handle_bad_stack(struct pt_regs *regs)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned int esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
pr_emerg("FAR: 0x%016lx\n", far);
pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
tsk_stk, tsk_stk + THREAD_SIZE);
pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n",
irq_stk, irq_stk + THREAD_SIZE);
pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
__show_regs(regs);
/*
* We use nmi_panic to limit the potential for recusive overflows, and
* to get a better stack trace.
*/
nmi_panic(NULL, "kernel stack overflow");
cpu_park_loop();
}
#endif
void __pte_error(const char *file, int line, unsigned long val)
{
pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
......
......@@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
}
#endif /* CONFIG_COMPAT */
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
unsigned long vdso_size = vdso_end - vdso_start;
if (vdso_size != new_size)
return -EINVAL;
current->mm->context.vdso = (void *)new_vma->vm_start;
return 0;
}
static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
{
.name = "[vvar]",
},
{
.name = "[vdso]",
.mremap = vdso_mremap,
},
};
......
......@@ -72,22 +72,6 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#define PECOFF_EDATA_PADDING
#endif
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
/*
* 4 KB granule: 1 level 2 entry
* 16 KB granule: 128 level 3 entries, with contiguous bit
* 64 KB granule: 32 level 3 entries, with contiguous bit
*/
#define SEGMENT_ALIGN SZ_2M
#else
/*
* 4 KB granule: 16 level 3 entries, with contiguous bit
* 16 KB granule: 4 level 3 entries, without contiguous bit
* 64 KB granule: 1 level 3 entry
*/
#define SEGMENT_ALIGN SZ_64K
#endif
SECTIONS
{
/*
......@@ -192,7 +176,7 @@ SECTIONS
_data = .;
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
/*
* Data written with the MMU off but read with the MMU on requires
......
......@@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void)
* Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
if (tmp)
val |= VTCR_EL2_HA;
/*
......
......@@ -17,3 +17,5 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
-fcall-saved-x18
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
/*
* Copyright (C) 2017 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/uaccess.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
void memcpy_flushcache(void *dst, const void *src, size_t cnt)
{
/*
* We assume this should not be called with @dst pointing to
* non-cacheable memory, such that we don't need an explicit
* barrier to order the cache maintenance against the memcpy.
*/
memcpy(dst, src, cnt);
__clean_dcache_area_pop(dst, cnt);
}
EXPORT_SYMBOL_GPL(memcpy_flushcache);
void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len)
{
memcpy_flushcache(to, page_address(page) + offset, len);
}
unsigned long __copy_user_flushcache(void *to, const void __user *from,
unsigned long n)
{
unsigned long rc = __arch_copy_from_user(to, from, n);
/* See above */
__clean_dcache_area_pop(to, n - rc);
return rc;
}
......@@ -109,20 +109,25 @@ ENTRY(__clean_dcache_area_pou)
ENDPROC(__clean_dcache_area_pou)
/*
* __dma_inv_area(start, size)
* - start - virtual start address of region
* __inval_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
* - kaddr - kernel address
* - size - size in question
*/
__dma_inv_area:
add x1, x1, x0
ENTRY(__inval_dcache_area)
/* FALLTHROUGH */
/*
* __inval_cache_range(start, end)
* - start - start address of region
* - end - end address of region
* __dma_inv_area(start, size)
* - start - virtual start address of region
* - size - size in question
*/
ENTRY(__inval_cache_range)
__dma_inv_area:
add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
......@@ -140,7 +145,7 @@ ENTRY(__inval_cache_range)
b.lo 2b
dsb sy
ret
ENDPIPROC(__inval_cache_range)
ENDPIPROC(__inval_dcache_area)
ENDPROC(__dma_inv_area)
/*
......@@ -166,6 +171,20 @@ __dma_clean_area:
ENDPIPROC(__clean_dcache_area_poc)
ENDPROC(__dma_clean_area)
/*
* __clean_dcache_area_pop(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
* are cleaned to the PoP.
*
* - kaddr - kernel address
* - size - size in question
*/
ENTRY(__clean_dcache_area_pop)
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
ENDPIPROC(__clean_dcache_area_pop)
/*
* __dma_flush_area(start, size)
*
......
......@@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
return prot;
}
static struct gen_pool *atomic_pool;
static struct gen_pool *atomic_pool __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
......@@ -425,7 +425,7 @@ static int __init atomic_pool_init(void)
gen_pool_set_algo(atomic_pool,
gen_pool_first_fit_order_align,
(void *)PAGE_SHIFT);
NULL);
pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
atomic_pool_size / 1024);
......
......@@ -34,6 +34,7 @@
#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
......@@ -82,6 +83,49 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
}
#endif
static void data_abort_decode(unsigned int esr)
{
pr_alert("Data abort info:\n");
if (esr & ESR_ELx_ISV) {
pr_alert(" Access size = %u byte(s)\n",
1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT));
pr_alert(" SSE = %lu, SRT = %lu\n",
(esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT,
(esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT);
pr_alert(" SF = %lu, AR = %lu\n",
(esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
(esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
} else {
pr_alert(" ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK);
}
pr_alert(" CM = %lu, WnR = %lu\n",
(esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
(esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
}
/*
* Decode mem abort information
*/
static void mem_abort_decode(unsigned int esr)
{
pr_alert("Mem abort info:\n");
pr_alert(" Exception class = %s, IL = %u bits\n",
esr_get_class_string(esr),
(esr & ESR_ELx_IL) ? 32 : 16);
pr_alert(" SET = %lu, FnV = %lu\n",
(esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
(esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT);
pr_alert(" EA = %lu, S1PTW = %lu\n",
(esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
(esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
if (esr_is_data_abort(esr))
data_abort_decode(esr);
}
/*
* Dump out the page tables associated with 'addr' in the currently active mm.
*/
......@@ -139,7 +183,6 @@ void show_pte(unsigned long addr)
pr_cont("\n");
}
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* This function sets the access flags (dirty, accessed), as well as write
* permission, and only to a more permissive setting.
......@@ -154,18 +197,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
pteval_t old_pteval;
unsigned int tmp;
pteval_t old_pteval, pteval;
if (pte_same(*ptep, entry))
return 0;
/* only preserve the access flags and write permission */
pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
/* set PTE_RDONLY if actual read-only or clean PTE */
if (!pte_write(entry) || !pte_sw_dirty(entry))
pte_val(entry) |= PTE_RDONLY;
pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
/*
* Setting the flags must be done atomically to avoid racing with the
......@@ -174,21 +212,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
asm volatile("// ptep_set_access_flags\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n"
" orr %0, %0, %4 // set flags\n"
" eor %0, %0, %3 // negate final PTE_RDONLY\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
: "L" (PTE_RDONLY), "r" (pte_val(entry)));
pteval = READ_ONCE(pte_val(*ptep));
do {
old_pteval = pteval;
pteval ^= PTE_RDONLY;
pteval |= pte_val(entry);
pteval ^= PTE_RDONLY;
pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
flush_tlb_fix_spurious_fault(vma, address);
return 1;
}
#endif
static bool is_el1_instruction_abort(unsigned int esr)
{
......@@ -248,6 +283,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
mem_abort_decode(esr);
show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
......@@ -705,6 +742,8 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
mem_abort_decode(esr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
......
......@@ -83,3 +83,19 @@ EXPORT_SYMBOL(flush_dcache_page);
* Additional functions defined in assembly.
*/
EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size)
{
/* Ensure order against any prior non-cacheable writes */
dmb(osh);
__clean_dcache_area_pop(addr, size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
__inval_dcache_area(addr, size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
......@@ -41,6 +41,16 @@ int pud_huge(pud_t pud)
#endif
}
/*
* Select all bits except the pfn
*/
static inline pgprot_t pte_pgprot(pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
}
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
......@@ -58,15 +68,107 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
return CONT_PTES;
}
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
{
int contig_ptes = 0;
*pgsize = size;
switch (size) {
#ifdef CONFIG_ARM64_4K_PAGES
case PUD_SIZE:
#endif
case PMD_SIZE:
contig_ptes = 1;
break;
case CONT_PMD_SIZE:
*pgsize = PMD_SIZE;
contig_ptes = CONT_PMDS;
break;
case CONT_PTE_SIZE:
*pgsize = PAGE_SIZE;
contig_ptes = CONT_PTES;
break;
}
return contig_ptes;
}
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
* before we can change any entries. See ARM DDI 0487A.k_iss10775,
* "Misprogramming of the Contiguous bit", page D4-1762.
*
* This helper performs the break step.
*/
static pte_t get_clear_flush(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long pgsize,
unsigned long ncontig)
{
struct vm_area_struct vma = { .vm_mm = mm };
pte_t orig_pte = huge_ptep_get(ptep);
bool valid = pte_valid(orig_pte);
unsigned long i, saddr = addr;
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
pte_t pte = ptep_get_and_clear(mm, addr, ptep);
/*
* If HW_AFDBM is enabled, then the HW could turn on
* the dirty bit for any page in the set, so check
* them all. All hugetlb entries are already young.
*/
if (pte_dirty(pte))
orig_pte = pte_mkdirty(orig_pte);
}
if (valid)
flush_tlb_range(&vma, saddr, addr);
return orig_pte;
}
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
* before we can change any entries. See ARM DDI 0487A.k_iss10775,
* "Misprogramming of the Contiguous bit", page D4-1762.
*
* This helper performs the break step for use cases where the
* original pte is not needed.
*/
static void clear_flush(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long pgsize,
unsigned long ncontig)
{
struct vm_area_struct vma = { .vm_mm = mm };
unsigned long i, saddr = addr;
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
pte_clear(mm, addr, ptep);
flush_tlb_range(&vma, saddr, addr);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
size_t pgsize;
int i;
int ncontig;
unsigned long pfn;
unsigned long pfn, dpfn;
pgprot_t hugeprot;
/*
* Code needs to be expanded to handle huge swap and migration
* entries. Needed for HUGETLB and MEMORY_FAILURE.
*/
WARN_ON(!pte_present(pte));
if (!pte_cont(pte)) {
set_pte_at(mm, addr, ptep, pte);
return;
......@@ -74,17 +176,30 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
pfn = pte_pfn(pte);
hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
for (i = 0; i < ncontig; i++) {
dpfn = pgsize >> PAGE_SHIFT;
hugeprot = pte_pgprot(pte);
clear_flush(mm, addr, ptep, pgsize, ncontig);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
pte_val(pfn_pte(pfn, hugeprot)));
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
ptep++;
pfn += pgsize >> PAGE_SHIFT;
addr += pgsize;
}
}
void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz)
{
int i, ncontig;
size_t pgsize;
ncontig = num_contig_ptes(sz, &pgsize);
for (i = 0; i < ncontig; i++, ptep++)
set_pte(ptep, pte);
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
......@@ -144,19 +259,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
if (sz != PUD_SIZE && pud_none(*pud))
return NULL;
/* swap or huge page */
if (!pud_present(*pud) || pud_huge(*pud))
/* hugepage or swap? */
if (pud_huge(*pud) || !pud_present(*pud))
return (pte_t *)pud;
/* table; check the next level */
if (sz == CONT_PMD_SIZE)
addr &= CONT_PMD_MASK;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
pmd_none(*pmd))
return NULL;
if (!pmd_present(*pmd) || pmd_huge(*pmd))
if (pmd_huge(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
if (sz == CONT_PTE_SIZE) {
pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
return pte;
}
return NULL;
}
......@@ -176,111 +300,133 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
return entry;
}
void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
{
int i, ncontig;
size_t pgsize;
ncontig = num_contig_ptes(sz, &pgsize);
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
pte_clear(mm, addr, ptep);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pte_t pte;
if (pte_cont(*ptep)) {
int ncontig, i;
int ncontig;
size_t pgsize;
bool is_dirty = false;
pte_t orig_pte = huge_ptep_get(ptep);
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
/* save the 1st pte to return */
pte = ptep_get_and_clear(mm, addr, ptep);
for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
/*
* If HW_AFDBM is enabled, then the HW could
* turn on the dirty bit for any of the page
* in the set, so check them all.
*/
++ptep;
if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
is_dirty = true;
}
if (is_dirty)
return pte_mkdirty(pte);
else
return pte;
} else {
if (!pte_cont(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
}
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
if (pte_cont(pte)) {
int ncontig, i, changed = 0;
size_t pgsize = 0;
unsigned long pfn = pte_pfn(pte);
/* Select all bits except the pfn */
pgprot_t hugeprot =
__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
pte_val(pte));
unsigned long pfn = pte_pfn(pte), dpfn;
pgprot_t hugeprot;
pte_t orig_pte;
pfn = pte_pfn(pte);
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
&pgsize);
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
changed |= ptep_set_access_flags(vma, addr, ptep,
pfn_pte(pfn,
hugeprot),
dirty);
pfn += pgsize >> PAGE_SHIFT;
}
return changed;
} else {
if (!pte_cont(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
}
ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
if (!pte_same(orig_pte, pte))
changed = 1;
/* Make sure we don't lose the dirty state */
if (pte_dirty(orig_pte))
pte = pte_mkdirty(pte);
hugeprot = pte_pgprot(pte);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
return changed;
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
if (pte_cont(*ptep)) {
unsigned long pfn, dpfn;
pgprot_t hugeprot;
int ncontig, i;
size_t pgsize = 0;
size_t pgsize;
pte_t pte;
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
ptep_set_wrprotect(mm, addr, ptep);
} else {
if (!pte_cont(*ptep)) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
pte = pte_wrprotect(pte);
hugeprot = pte_pgprot(pte);
pfn = pte_pfn(pte);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}
void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
if (pte_cont(*ptep)) {
int ncontig, i;
size_t pgsize = 0;
size_t pgsize;
int ncontig;
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
&pgsize);
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
ptep_clear_flush(vma, addr, ptep);
} else {
if (!pte_cont(*ptep)) {
ptep_clear_flush(vma, addr, ptep);
return;
}
ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
}
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
switch (ps) {
#ifdef CONFIG_ARM64_4K_PAGES
case PUD_SIZE:
#endif
case PMD_SIZE * CONT_PMDS:
case PMD_SIZE:
case PAGE_SIZE * CONT_PTES:
hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
return 1;
}
hugetlb_bad_size();
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
return 0;
}
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
#ifdef CONFIG_ARM64_64K_PAGES
static __init int add_default_hugepagesz(void)
{
if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
hugetlb_add_hstate(CONT_PTE_SHIFT);
return 0;
}
arch_initcall(add_default_hugepagesz);
#endif
......@@ -588,7 +588,8 @@ void acpi_configure_pmsi_domain(struct device *dev)
dev_set_msi_domain(dev, msi_domain);
}
static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
static int __maybe_unused __get_pci_rid(struct pci_dev *pdev, u16 alias,
void *data)
{
u32 *rid = data;
......@@ -633,8 +634,7 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
{
int err = 0;
if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus &&
!dev->iommu_group)
if (ops->add_device && dev->bus && !dev->iommu_group)
err = ops->add_device(dev);
return err;
......@@ -648,20 +648,19 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
{ return 0; }
#endif
static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
struct acpi_iort_node *node,
static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
u32 streamid)
{
const struct iommu_ops *ops = NULL;
int ret = -ENODEV;
const struct iommu_ops *ops;
struct fwnode_handle *iort_fwnode;
if (node) {
if (!node)
return -ENODEV;
iort_fwnode = iort_get_fwnode(node);
if (!iort_fwnode)
return NULL;
return -ENODEV;
ops = iommu_ops_from_fwnode(iort_fwnode);
/*
* If the ops look-up fails, this means that either
* the SMMU drivers have not been probed yet or that
......@@ -670,14 +669,28 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
* in the kernel or not, defer the IOMMU configuration
* or just abort it.
*/
ops = iommu_ops_from_fwnode(iort_fwnode);
if (!ops)
return iort_iommu_driver_enabled(node->type) ?
ERR_PTR(-EPROBE_DEFER) : NULL;
-EPROBE_DEFER : -ENODEV;
ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
}
return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
}
struct iort_pci_alias_info {
struct device *dev;
struct acpi_iort_node *node;
};
static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
{
struct iort_pci_alias_info *info = data;
struct acpi_iort_node *parent;
u32 streamid;
return ret ? NULL : ops;
parent = iort_node_map_id(info->node, alias, &streamid,
IORT_IOMMU_TYPE);
return iort_iommu_xlate(info->dev, parent, streamid);
}
/**
......@@ -713,9 +726,9 @@ void iort_set_dma_mask(struct device *dev)
const struct iommu_ops *iort_iommu_configure(struct device *dev)
{
struct acpi_iort_node *node, *parent;
const struct iommu_ops *ops = NULL;
const struct iommu_ops *ops;
u32 streamid = 0;
int err;
int err = -ENODEV;
/*
* If we already translated the fwspec there
......@@ -727,21 +740,16 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
if (dev_is_pci(dev)) {
struct pci_bus *bus = to_pci_dev(dev)->bus;
u32 rid;
pci_for_each_dma_alias(to_pci_dev(dev), __get_pci_rid,
&rid);
struct iort_pci_alias_info info = { .dev = dev };
node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
iort_match_node_callback, &bus->dev);
if (!node)
return NULL;
parent = iort_node_map_id(node, rid, &streamid,
IORT_IOMMU_TYPE);
ops = iort_iommu_xlate(dev, parent, streamid);
info.node = node;
err = pci_for_each_dma_alias(to_pci_dev(dev),
iort_pci_iommu_init, &info);
} else {
int i = 0;
......@@ -750,31 +758,30 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
if (!node)
return NULL;
parent = iort_node_map_platform_id(node, &streamid,
IORT_IOMMU_TYPE, i++);
while (parent) {
ops = iort_iommu_xlate(dev, parent, streamid);
if (IS_ERR_OR_NULL(ops))
return ops;
do {
parent = iort_node_map_platform_id(node, &streamid,
IORT_IOMMU_TYPE,
i++);
}
if (parent)
err = iort_iommu_xlate(dev, parent, streamid);
} while (parent && !err);
}
/*
* If we have reason to believe the IOMMU driver missed the initial
* add_device callback for dev, replay it to get things in order.
*/
if (!err) {
ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
err = iort_add_device_replay(ops, dev);
if (err)
ops = ERR_PTR(err);
}
/* Ignore all other errors apart from EPROBE_DEFER */
if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
if (err == -EPROBE_DEFER) {
ops = ERR_PTR(err);
} else if (err) {
dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
ops = NULL;
}
......@@ -908,6 +915,27 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
}
#if defined(CONFIG_ACPI_NUMA) && defined(ACPI_IORT_SMMU_V3_PXM_VALID)
/*
* set numa proximity domain for smmuv3 device
*/
static void __init arm_smmu_v3_set_proximity(struct device *dev,
struct acpi_iort_node *node)
{
struct acpi_iort_smmu_v3 *smmu;
smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n",
smmu->base_address,
smmu->pxm);
}
}
#else
#define arm_smmu_v3_set_proximity NULL
#endif
static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
{
struct acpi_iort_smmu *smmu;
......@@ -977,13 +1005,16 @@ struct iort_iommu_config {
int (*iommu_count_resources)(struct acpi_iort_node *node);
void (*iommu_init_resources)(struct resource *res,
struct acpi_iort_node *node);
void (*iommu_set_proximity)(struct device *dev,
struct acpi_iort_node *node);
};
static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
.name = "arm-smmu-v3",
.iommu_is_coherent = arm_smmu_v3_is_coherent,
.iommu_count_resources = arm_smmu_v3_count_resources,
.iommu_init_resources = arm_smmu_v3_init_resources
.iommu_init_resources = arm_smmu_v3_init_resources,
.iommu_set_proximity = arm_smmu_v3_set_proximity,
};
static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
......@@ -1028,6 +1059,9 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
if (!pdev)
return -ENOMEM;
if (ops->iommu_set_proximity)
ops->iommu_set_proximity(&pdev->dev, node);
count = ops->iommu_count_resources(node);
r = kcalloc(count, sizeof(*r), GFP_KERNEL);
......
......@@ -11,6 +11,7 @@
*/
#include <linux/efi.h>
#include <asm/efi.h>
#include <asm/memory.h>
#include <asm/sections.h>
#include <asm/sysreg.h>
......@@ -81,9 +82,10 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
/*
* If CONFIG_DEBUG_ALIGN_RODATA is not set, produce a
* displacement in the interval [0, MIN_KIMG_ALIGN) that
* is a multiple of the minimal segment alignment (SZ_64K)
* doesn't violate this kernel's de-facto alignment
* constraints.
*/
u32 mask = (MIN_KIMG_ALIGN - 1) & ~(SZ_64K - 1);
u32 mask = (MIN_KIMG_ALIGN - 1) & ~(EFI_KIMG_ALIGN - 1);
u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
(phys_seed >> 32) & mask : TEXT_OFFSET;
......
......@@ -47,6 +47,9 @@ armpmu_map_cache_event(const unsigned (*cache_map)
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
if (!cache_map)
return -ENOENT;
ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
......@@ -63,6 +66,9 @@ armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
if (config >= PERF_COUNT_HW_MAX)
return -EINVAL;
if (!event_map)
return -ENOENT;
mapping = (*event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
}
......
......@@ -1147,7 +1147,6 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
{
struct device *dev = xgene_pmu->dev;
struct xgene_pmu_dev *pmu;
int rc;
pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL);
if (!pmu)
......@@ -1159,7 +1158,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
switch (pmu->inf->type) {
case PMU_TYPE_L3C:
if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
goto dev_err;
return -ENODEV;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = l3c_pmu_v3_attr_groups;
else
......@@ -1177,7 +1176,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
break;
case PMU_TYPE_MCB:
if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
goto dev_err;
return -ENODEV;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = mcb_pmu_v3_attr_groups;
else
......@@ -1185,7 +1184,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
break;
case PMU_TYPE_MC:
if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
goto dev_err;
return -ENODEV;
if (xgene_pmu->version == PCP_PMU_V3)
pmu->attr_groups = mc_pmu_v3_attr_groups;
else
......@@ -1195,19 +1194,14 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
return -EINVAL;
}
rc = xgene_init_perf(pmu, ctx->name);
if (rc) {
if (xgene_init_perf(pmu, ctx->name)) {
dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name);
goto dev_err;
return -ENODEV;
}
dev_info(dev, "%s PMU registered\n", ctx->name);
return rc;
dev_err:
devm_kfree(dev, pmu);
return -ENODEV;
return 0;
}
static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
......@@ -1515,13 +1509,13 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
acpi_dev_free_resource_list(&resource_list);
if (rc < 0) {
dev_err(dev, "PMU type %d: No resource address found\n", type);
goto err;
return NULL;
}
dev_csr = devm_ioremap_resource(dev, &res);
if (IS_ERR(dev_csr)) {
dev_err(dev, "PMU type %d: Fail to map resource\n", type);
goto err;
return NULL;
}
/* A PMU device node without enable-bit-index is always enabled */
......@@ -1535,7 +1529,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
if (!ctx->name) {
dev_err(dev, "PMU type %d: Fail to get device name\n", type);
goto err;
return NULL;
}
inf = &ctx->inf;
inf->type = type;
......@@ -1543,9 +1537,6 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
inf->enable_mask = 1 << enable_bit;
return ctx;
err:
devm_kfree(dev, ctx);
return NULL;
}
static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
......@@ -1663,20 +1654,20 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
void __iomem *dev_csr;
struct resource res;
int enable_bit;
int rc;
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return NULL;
rc = of_address_to_resource(np, 0, &res);
if (rc < 0) {
if (of_address_to_resource(np, 0, &res) < 0) {
dev_err(dev, "PMU type %d: No resource address found\n", type);
goto err;
return NULL;
}
dev_csr = devm_ioremap_resource(dev, &res);
if (IS_ERR(dev_csr)) {
dev_err(dev, "PMU type %d: Fail to map resource\n", type);
goto err;
return NULL;
}
/* A PMU device node without enable-bit-index is always enabled */
......@@ -1686,17 +1677,15 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
if (!ctx->name) {
dev_err(dev, "PMU type %d: Fail to get device name\n", type);
goto err;
return NULL;
}
inf = &ctx->inf;
inf->type = type;
inf->csr = dev_csr;
inf->enable_mask = 1 << enable_bit;
return ctx;
err:
devm_kfree(dev, ctx);
return NULL;
}
static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
......@@ -1868,22 +1857,20 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(xgene_pmu->pcppmu_csr)) {
dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
rc = PTR_ERR(xgene_pmu->pcppmu_csr);
goto err;
return PTR_ERR(xgene_pmu->pcppmu_csr);
}
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
dev_err(&pdev->dev, "No IRQ resource\n");
rc = -EINVAL;
goto err;
return -EINVAL;
}
rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr,
IRQF_NOBALANCING | IRQF_NO_THREAD,
dev_name(&pdev->dev), xgene_pmu);
if (rc) {
dev_err(&pdev->dev, "Could not request IRQ %d\n", irq);
goto err;
return rc;
}
raw_spin_lock_init(&xgene_pmu->lock);
......@@ -1903,42 +1890,29 @@ static int xgene_pmu_probe(struct platform_device *pdev)
rc = irq_set_affinity(irq, &xgene_pmu->cpu);
if (rc) {
dev_err(&pdev->dev, "Failed to set interrupt affinity!\n");
goto err;
return rc;
}
/* Walk through the tree for all PMU perf devices */
rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev);
if (rc) {
dev_err(&pdev->dev, "No PMU perf devices found!\n");
goto err;
return rc;
}
/* Enable interrupt */
xgene_pmu->ops->unmask_int(xgene_pmu);
return 0;
err:
if (xgene_pmu->pcppmu_csr)
devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr);
devm_kfree(&pdev->dev, xgene_pmu);
return rc;
}
static void
xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
{
struct xgene_pmu_dev_ctx *ctx;
struct device *dev = xgene_pmu->dev;
struct xgene_pmu_dev *pmu_dev;
list_for_each_entry(ctx, pmus, next) {
pmu_dev = ctx->pmu_dev;
if (pmu_dev->inf->csr)
devm_iounmap(dev, pmu_dev->inf->csr);
devm_kfree(dev, ctx);
devm_kfree(dev, pmu_dev);
perf_pmu_unregister(&ctx->pmu_dev->pmu);
}
}
......@@ -1951,10 +1925,6 @@ static int xgene_pmu_remove(struct platform_device *pdev)
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus);
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
if (xgene_pmu->pcppmu_csr)
devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr);
devm_kfree(&pdev->dev, xgene_pmu);
return 0;
}
......
......@@ -121,6 +121,7 @@ extern const struct raid6_recov_calls raid6_recov_ssse3;
extern const struct raid6_recov_calls raid6_recov_avx2;
extern const struct raid6_recov_calls raid6_recov_avx512;
extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
......
......@@ -38,6 +38,10 @@ enum {
#ifdef __KERNEL__
#ifndef THREAD_ALIGN
#define THREAD_ALIGN THREAD_SIZE
#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
__GFP_ZERO)
......
......@@ -88,6 +88,7 @@
#include <linux/sysctl.h>
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
......@@ -217,7 +218,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
return s->addr;
}
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP,
PAGE_KERNEL,
......
......@@ -5,7 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
......@@ -26,7 +26,9 @@ NEON_FLAGS := -ffreestanding
ifeq ($(ARCH),arm)
NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
endif
CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
ifeq ($(ARCH),arm64)
CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
......
......@@ -112,6 +112,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#endif
#ifdef CONFIG_S390
&raid6_recov_s390xc,
#endif
#if defined(CONFIG_KERNEL_MODE_NEON)
&raid6_recov_neon,
#endif
&raid6_recov_intx1,
NULL
......
......@@ -46,8 +46,12 @@ static inline unative_t SHLBYTE(unative_t v)
*/
static inline unative_t MASK(unative_t v)
{
const uint8x16_t temp = NBYTES(0);
return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
return (unative_t)vshrq_n_s8((int8x16_t)v, 7);
}
static inline unative_t PMUL(unative_t v, unative_t u)
{
return (unative_t)vmulq_p8((poly8x16_t)v, (poly8x16_t)u);
}
void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
......@@ -110,7 +114,30 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
wq$$ = veorq_u8(w1$$, wd$$);
}
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
for ( z = start-1 ; z >= 3 ; z -= 4 ) {
w2$$ = vshrq_n_u8(wq$$, 4);
w1$$ = vshlq_n_u8(wq$$, 4);
w2$$ = PMUL(w2$$, x1d);
wq$$ = veorq_u8(w1$$, w2$$);
}
switch (z) {
case 2:
w2$$ = vshrq_n_u8(wq$$, 5);
w1$$ = vshlq_n_u8(wq$$, 3);
w2$$ = PMUL(w2$$, x1d);
wq$$ = veorq_u8(w1$$, w2$$);
break;
case 1:
w2$$ = vshrq_n_u8(wq$$, 6);
w1$$ = vshlq_n_u8(wq$$, 2);
w2$$ = PMUL(w2$$, x1d);
wq$$ = veorq_u8(w1$$, w2$$);
break;
case 0:
w2$$ = MASK(wq$$);
w1$$ = SHLBYTE(wq$$);
......
/*
* Copyright (C) 2012 Intel Corporation
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/raid/pq.h>
#ifdef __KERNEL__
#include <asm/neon.h>
#else
#define kernel_neon_begin()
#define kernel_neon_end()
#define cpu_has_neon() (1)
#endif
static int raid6_has_neon(void)
{
return cpu_has_neon();
}
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8_t *dq, const uint8_t *pbmul,
const uint8_t *qmul);
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
const uint8_t *qmul);
static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
int failb, void **ptrs)
{
u8 *p, *q, *dp, *dq;
const u8 *pbmul; /* P multiplier table for B data */
const u8 *qmul; /* Q multiplier table (for both) */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 2] = dp;
dq = (u8 *)ptrs[failb];
ptrs[failb] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dp;
ptrs[failb] = dq;
ptrs[disks - 2] = p;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
raid6_gfexp[failb]]];
kernel_neon_begin();
__raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
kernel_neon_end();
}
static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
void **ptrs)
{
u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dq;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
kernel_neon_begin();
__raid6_datap_recov_neon(bytes, p, q, dq, qmul);
kernel_neon_end();
}
const struct raid6_recov_calls raid6_recov_neon = {
.data2 = raid6_2data_recov_neon,
.datap = raid6_datap_recov_neon,
.valid = raid6_has_neon,
.name = "neon",
.priority = 10,
};
/*
* Copyright (C) 2012 Intel Corporation
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <arm_neon.h>
static const uint8x16_t x0f = {
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
};
#ifdef CONFIG_ARM
/*
* AArch32 does not provide this intrinsic natively because it does not
* implement the underlying instruction. AArch32 only provides a 64-bit
* wide vtbl.8 instruction, so use that instead.
*/
static uint8x16_t vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
{
union {
uint8x16_t val;
uint8x8x2_t pair;
} __a = { a };
return vcombine_u8(vtbl2_u8(__a.pair, vget_low_u8(b)),
vtbl2_u8(__a.pair, vget_high_u8(b)));
}
#endif
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8_t *dq, const uint8_t *pbmul,
const uint8_t *qmul)
{
uint8x16_t pm0 = vld1q_u8(pbmul);
uint8x16_t pm1 = vld1q_u8(pbmul + 16);
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
/*
* while ( bytes-- ) {
* uint8_t px, qx, db;
*
* px = *p ^ *dp;
* qx = qmul[*q ^ *dq];
* *dq++ = db = pbmul[px] ^ qx;
* *dp++ = db ^ px;
* p++; q++;
* }
*/
while (bytes) {
uint8x16_t vx, vy, px, qx, db;
px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
qx = veorq_u8(vx, vy);
vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
vx = veorq_u8(vx, vy);
db = veorq_u8(vx, qx);
vst1q_u8(dq, db);
vst1q_u8(dp, veorq_u8(db, px));
bytes -= 16;
p += 16;
q += 16;
dp += 16;
dq += 16;
}
}
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
const uint8_t *qmul)
{
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
/*
* while (bytes--) {
* *p++ ^= *dq = qmul[*q ^ *dq];
* q++; dq++;
* }
*/
while (bytes) {
uint8x16_t vx, vy;
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
vx = veorq_u8(vx, vy);
vy = veorq_u8(vx, vld1q_u8(p));
vst1q_u8(dq, vx);
vst1q_u8(p, vy);
bytes -= 16;
p += 16;
q += 16;
dq += 16;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment