Commit d109c4bb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "This addresses some problems with filesystem writeback due to the
  recently merged hardware DBM patches, which caused us to treat some
  read-only pages as dirty.

  There are also some other, less significant fixes that are described
  in the summary below:

  A mixture of fixes for regressions introduced during the merge window,
  some longer standing problems that we spotted and a couple of hardware
  errata.  The main changes are:

   - Fix fallout from the h/w DBM patches, causing filesystem writeback
     issues on both v8 and v8.1 CPUs

   - Workaround for Cortex-A53 erratum #843419 in the module loader

   - Fix for long-standing issue with compat big-endian signal handlers
     using the saved floating point state"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: errata: add module build workaround for erratum #843419
  arm64: compat: fix vfp save/restore across signal handlers in big-endian
  arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers
  arm64: head.S: initialise mdcr_el2 in el2_setup
  arm64: enable generic idle loop
  arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM
  arm64: Fix pte_modify() to preserve the hardware dirty information
  arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled
  arm64: dma-mapping: check whether cma area is initialized or not
parents 42dc2a30 df057cc7
...@@ -32,6 +32,7 @@ config ARM64 ...@@ -32,6 +32,7 @@ config ARM64
select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_PROBE select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL select GENERIC_IRQ_SHOW_LEVEL
...@@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719 ...@@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719
If unsure, say Y. If unsure, say Y.
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
depends on MODULES
default y
help
This option builds kernel modules using the large memory model in
order to avoid the use of the ADRP instruction, which can cause
a subsequent memory access to use an incorrect address on Cortex-A53
parts up to r0p4.
Note that the kernel itself must be linked with a version of ld
which fixes potentially affected ADRP instructions through the
use of veneers.
If unsure, say Y.
endmenu endmenu
......
...@@ -41,6 +41,10 @@ endif ...@@ -41,6 +41,10 @@ endif
CHECKFLAGS += -D__aarch64__ CHECKFLAGS += -D__aarch64__
ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
CFLAGS_MODULE += -mcmodel=large
endif
# Default value # Default value
head-y := arch/arm64/kernel/head.o head-y := arch/arm64/kernel/head.o
......
...@@ -26,13 +26,9 @@ ...@@ -26,13 +26,9 @@
* Software defined PTE bits definition. * Software defined PTE bits definition.
*/ */
#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#ifdef CONFIG_ARM64_HW_AFDBM
#define PTE_WRITE (PTE_DBM) /* same as DBM */
#else
#define PTE_WRITE (_AT(pteval_t, 1) << 57)
#endif
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/* /*
...@@ -146,7 +142,7 @@ extern struct page *empty_zero_page; ...@@ -146,7 +142,7 @@ extern struct page *empty_zero_page;
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#ifdef CONFIG_ARM64_HW_AFDBM #ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
#else #else
#define pte_hw_dirty(pte) (0) #define pte_hw_dirty(pte) (0)
#endif #endif
...@@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); ...@@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes: * the page fault mechanism. Checking the dirty status of a pte becomes:
* *
* PTE_DIRTY || !PTE_RDONLY * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/ */
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte) pte_t *ptep, pte_t pte)
...@@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
/* preserve the hardware dirty information */ /* preserve the hardware dirty information */
if (pte_hw_dirty(pte)) if (pte_hw_dirty(pte))
newprot |= PTE_DIRTY; pte = pte_mkdirty(pte);
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
return pte; return pte;
} }
......
...@@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, ...@@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self,
unsigned long action, void *data) unsigned long action, void *data)
{ {
int cpu = (unsigned long)data; int cpu = (unsigned long)data;
if (action == CPU_ONLINE) if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, clear_os_lock, NULL, 1); smp_call_function_single(cpu, clear_os_lock, NULL, 1);
return NOTIFY_OK; return NOTIFY_OK;
} }
......
...@@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems ...@@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
msr hstr_el2, xzr // Disable CP15 traps to EL2 msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif #endif
/* EL2 debug */
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
msr mdcr_el2, x0 // all PMU counters from EL1
/* Stage-2 translation */ /* Stage-2 translation */
msr vttbr_el2, xzr msr vttbr_el2, xzr
......
...@@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, ...@@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self,
void *hcpu) void *hcpu)
{ {
int cpu = (long)hcpu; int cpu = (long)hcpu;
if (action == CPU_ONLINE) if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
return NOTIFY_OK; return NOTIFY_OK;
} }
......
...@@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ...@@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
AARCH64_INSN_IMM_ADR); AARCH64_INSN_IMM_ADR);
break; break;
#ifndef CONFIG_ARM64_ERRATUM_843419
case R_AARCH64_ADR_PREL_PG_HI21_NC: case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false; overflow_check = false;
case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
AARCH64_INSN_IMM_ADR); AARCH64_INSN_IMM_ADR);
break; break;
#endif
case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false; overflow_check = false;
......
...@@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) ...@@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
/* /*
* VFP save/restore code. * VFP save/restore code.
*
* We have to be careful with endianness, since the fpsimd context-switch
* code operates on 128-bit (Q) register values whereas the compat ABI
* uses an array of 64-bit (D) registers. Consequently, we need to swap
* the two halves of each Q register when running on a big-endian CPU.
*/ */
union __fpsimd_vreg {
__uint128_t raw;
struct {
#ifdef __AARCH64EB__
u64 hi;
u64 lo;
#else
u64 lo;
u64 hi;
#endif
};
};
static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
{ {
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state; struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
compat_ulong_t magic = VFP_MAGIC; compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr, fpexc; compat_ulong_t fpscr, fpexc;
int err = 0; int i, err = 0;
/* /*
* Save the hardware registers to the fpsimd_state structure. * Save the hardware registers to the fpsimd_state structure.
...@@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) ...@@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
/* /*
* Now copy the FP registers. Since the registers are packed, * Now copy the FP registers. Since the registers are packed,
* we can copy the prefix we want (V0-V15) as it is. * we can copy the prefix we want (V0-V15) as it is.
* FIXME: Won't work if big endian.
*/ */
err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
sizeof(frame->ufp.fpregs)); union __fpsimd_vreg vreg = {
.raw = fpsimd->vregs[i >> 1],
};
__put_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
__put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
}
/* Create an AArch32 fpscr from the fpsr and the fpcr. */ /* Create an AArch32 fpscr from the fpsr and the fpcr. */
fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) |
...@@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) ...@@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
compat_ulong_t magic = VFP_MAGIC; compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr; compat_ulong_t fpscr;
int err = 0; int i, err = 0;
__get_user_error(magic, &frame->magic, err); __get_user_error(magic, &frame->magic, err);
__get_user_error(size, &frame->size, err); __get_user_error(size, &frame->size, err);
...@@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) ...@@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
return -EINVAL; return -EINVAL;
/* /* Copy the FP registers into the start of the fpsimd_state. */
* Copy the FP registers into the start of the fpsimd_state. for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
* FIXME: Won't work if big endian. union __fpsimd_vreg vreg;
*/
err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
sizeof(frame->ufp.fpregs)); __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
fpsimd.vregs[i >> 1] = vreg.raw;
}
/* Extract the fpsr and the fpcr from the fpscr */ /* Extract the fpsr and the fpcr from the fpscr */
__get_user_error(fpscr, &frame->ufp.fpscr, err); __get_user_error(fpscr, &frame->ufp.fpscr, err);
......
...@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, ...@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_ZONE_DMA) && if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32)) dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA; flags |= GFP_DMA;
if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
struct page *page; struct page *page;
void *addr; void *addr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment