Commit 4b1967c9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "The critical one here is a fix for fpsimd register corruption across
  signals which was introduced by the SVE support code (the register
  files overlap), but the others are worth having as well.

  Summary:

   - Fix FP register corruption when SVE is not available or in use

   - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y

   - Missing 'const' generating errors with LTO builds

   - Remove unsupported events from Cortex-A73 PMU description

   - Removal of stale and incorrect comments"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: context: Fix comments and remove pointless smp_wmb()
  arm64: cpu_ops: Add missing 'const' qualifiers
  arm64: perf: remove unsupported events for Cortex-A73
  arm64: fpsimd: Fix failure to restore FPSIMD state after signals
  arm64: pgd: Mark pgd_cache as __ro_after_init
  arm64: ftrace: emit ftrace-mod.o contents through code
  arm64: module-plts: factor out PLT generation code for ftrace
  arm64: mm: cleanup stale AIVIVT references
parents a0651c7f 3a33c760
...@@ -83,9 +83,6 @@ endif ...@@ -83,9 +83,6 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
endif
endif endif
# Default value # Default value
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
* *
* See Documentation/cachetlb.txt for more information. Please note that * See Documentation/cachetlb.txt for more information. Please note that
* the implementation assumes non-aliasing VIPT D-cache and (aliasing) * the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT or ASID-tagged VIVT I-cache. * VIPT I-cache.
* *
* flush_cache_mm(mm) * flush_cache_mm(mm)
* *
......
...@@ -32,7 +32,7 @@ struct mod_arch_specific { ...@@ -32,7 +32,7 @@ struct mod_arch_specific {
struct mod_plt_sec init; struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */ /* for CONFIG_DYNAMIC_FTRACE */
void *ftrace_trampoline; struct plt_entry *ftrace_trampoline;
}; };
#endif #endif
...@@ -45,4 +45,48 @@ extern u64 module_alloc_base; ...@@ -45,4 +45,48 @@ extern u64 module_alloc_base;
#define module_alloc_base ((u64)_etext - MODULES_VSIZE) #define module_alloc_base ((u64)_etext - MODULES_VSIZE)
#endif #endif
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
* IP1 (x17) may be inserted at any branch instruction that is
* exposed to a relocation that supports long branches. Since that
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
__le32 mov0; /* movn x16, #0x.... */
__le32 mov1; /* movk x16, #0x...., lsl #16 */
__le32 mov2; /* movk x16, #0x...., lsl #32 */
__le32 br; /* br x16 */
};
static inline struct plt_entry get_plt_entry(u64 val)
{
/*
* MOVK/MOVN/MOVZ opcode:
* +--------+------------+--------+-----------+-------------+---------+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
* +--------+------------+--------+-----------+-------------+---------+
*
* Rd := 0x10 (x16)
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
* sf := 1 (64-bit variant)
*/
return (struct plt_entry){
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
cpu_to_le32(0xd61f0200)
};
}
static inline bool plt_entries_equal(const struct plt_entry *a,
const struct plt_entry *b)
{
return a->mov0 == b->mov0 &&
a->mov1 == b->mov1 &&
a->mov2 == b->mov2;
}
#endif /* __ASM_MODULE_H */ #endif /* __ASM_MODULE_H */
...@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ...@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y) ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif endif
# will be included by each individual module but not by the core kernel itself
extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
...@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; ...@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops, &smp_spin_table_ops,
&cpu_psci_ops, &cpu_psci_ops,
NULL, NULL,
}; };
static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
&acpi_parking_protocol_ops, &acpi_parking_protocol_ops,
#endif #endif
...@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { ...@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
static const struct cpu_operations * __init cpu_get_ops(const char *name) static const struct cpu_operations * __init cpu_get_ops(const char *name)
{ {
const struct cpu_operations **ops; const struct cpu_operations *const *ops;
ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
......
...@@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) ...@@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
local_bh_disable(); local_bh_disable();
if (system_supports_sve() && test_thread_flag(TIF_SVE)) { current->thread.fpsimd_state = *state;
current->thread.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE))
fpsimd_to_sve(current); fpsimd_to_sve(current);
}
task_fpsimd_load(); task_fpsimd_load();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
......
/*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.section ".text.ftrace_trampoline", "ax"
.align 3
0: .quad 0
__ftrace_trampoline:
ldr x16, 0b
br x16
ENDPROC(__ftrace_trampoline)
...@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ...@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (offset < -SZ_128M || offset >= SZ_128M) { if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS #ifdef CONFIG_ARM64_MODULE_PLTS
unsigned long *trampoline; struct plt_entry trampoline;
struct module *mod; struct module *mod;
/* /*
...@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ...@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* is added in the future, but for now, the pr_err() below * is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only. * deals with a theoretical issue only.
*/ */
trampoline = (unsigned long *)mod->arch.ftrace_trampoline; trampoline = get_plt_entry(addr);
if (trampoline[0] != addr) { if (!plt_entries_equal(mod->arch.ftrace_trampoline,
if (trampoline[0] != 0) { &trampoline)) {
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&(struct plt_entry){})) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL; return -EINVAL;
} }
/* point the trampoline to our ftrace entry point */ /* point the trampoline to our ftrace entry point */
module_disable_ro(mod); module_disable_ro(mod);
trampoline[0] = addr; *mod->arch.ftrace_trampoline = trampoline;
module_enable_ro(mod, true); module_enable_ro(mod, true);
/* update trampoline before patching in the branch */ /* update trampoline before patching in the branch */
smp_wmb(); smp_wmb();
} }
addr = (unsigned long)&trampoline[1]; addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
#else /* CONFIG_ARM64_MODULE_PLTS */ #else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL; return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */ #endif /* CONFIG_ARM64_MODULE_PLTS */
......
...@@ -11,21 +11,6 @@ ...@@ -11,21 +11,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/sort.h> #include <linux/sort.h>
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
* IP1 (x17) may be inserted at any branch instruction that is
* exposed to a relocation that supports long branches. Since that
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
__le32 mov0; /* movn x16, #0x.... */
__le32 mov1; /* movk x16, #0x...., lsl #16 */
__le32 mov2; /* movk x16, #0x...., lsl #32 */
__le32 br; /* br x16 */
};
static bool in_init(const struct module *mod, void *loc) static bool in_init(const struct module *mod, void *loc)
{ {
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
...@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, ...@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
int i = pltsec->plt_num_entries; int i = pltsec->plt_num_entries;
u64 val = sym->st_value + rela->r_addend; u64 val = sym->st_value + rela->r_addend;
/* plt[i] = get_plt_entry(val);
* MOVK/MOVN/MOVZ opcode:
* +--------+------------+--------+-----------+-------------+---------+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
* +--------+------------+--------+-----------+-------------+---------+
*
* Rd := 0x10 (x16)
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
* sf := 1 (64-bit variant)
*/
plt[i] = (struct plt_entry){
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
cpu_to_le32(0xd61f0200)
};
/* /*
* Check if the entry we just created is a duplicate. Given that the * Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated. * relocations are sorted, this will be the last entry we allocated.
* (if one exists). * (if one exists).
*/ */
if (i > 0 && if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
plt[i].mov0 == plt[i - 1].mov0 &&
plt[i].mov1 == plt[i - 1].mov1 &&
plt[i].mov2 == plt[i - 1].mov2)
return (u64)&plt[i - 1]; return (u64)&plt[i - 1];
pltsec->plt_num_entries++; pltsec->plt_num_entries++;
...@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, ...@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0; unsigned long core_plts = 0;
unsigned long init_plts = 0; unsigned long init_plts = 0;
Elf64_Sym *syms = NULL; Elf64_Sym *syms = NULL;
Elf_Shdr *tramp = NULL;
int i; int i;
/* /*
...@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, ...@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.core.plt = sechdrs + i; mod->arch.core.plt = sechdrs + i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt = sechdrs + i; mod->arch.init.plt = sechdrs + i;
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB) else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr; syms = (Elf64_Sym *)sechdrs[i].sh_addr;
} }
...@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, ...@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts; mod->arch.init.plt_max_entries = init_plts;
if (tramp) {
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
tramp->sh_size = sizeof(struct plt_entry);
}
return 0; return 0;
} }
SECTIONS { SECTIONS {
.plt (NOLOAD) : { BYTE(0) } .plt (NOLOAD) : { BYTE(0) }
.init.plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) }
.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
} }
...@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] ...@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
}; };
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
......
...@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) ...@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
set_reserved_asid_bits(); set_reserved_asid_bits();
/*
* Ensure the generation bump is observed before we xchg the
* active_asids.
*/
smp_wmb();
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/* /*
...@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu) ...@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
per_cpu(reserved_asids, i) = asid; per_cpu(reserved_asids, i) = asid;
} }
/* Queue a TLB invalidate and flush the I-cache if necessary. */ /*
* Queue a TLB invalidation for each CPU to perform on next
* context-switch
*/
cpumask_setall(&tlb_flush_pending); cpumask_setall(&tlb_flush_pending);
} }
...@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) ...@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
asid = atomic64_read(&mm->context.id); asid = atomic64_read(&mm->context.id);
/* /*
* The memory ordering here is subtle. We rely on the control * The memory ordering here is subtle.
* dependency between the generation read and the update of * If our ASID matches the current generation, then we update
* active_asids to ensure that we are synchronised with a * our active_asids entry with a relaxed xchg. Racing with a
* parallel rollover (i.e. this pairs with the smp_wmb() in * concurrent rollover means that either:
* flush_context). *
* - We get a zero back from the xchg and end up waiting on the
* lock. Taking the lock synchronises with the rollover and so
* we are forced to see the updated generation.
*
* - We get a valid ASID back from the xchg, which means the
* relaxed xchg in flush_context will treat us as reserved
* because atomic RmWs are totally ordered for a given location.
*/ */
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
static struct kmem_cache *pgd_cache; static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment