Commit 72a20cee authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "Here are some arm64 fixes for -rc5.

  The only non-trivial change (in terms of the diffstat) is fixing our
  SVE ptrace API for big-endian machines, but the majority of this is
  actually the addition of much-needed comments and updates to the
  documentation to try to avoid this mess biting us again in future.

  There are still a couple of small things on the horizon, but nothing
  major at this point.

  Summary:

   - Fix broken SVE ptrace API when running in a big-endian configuration

   - Fix performance regression due to off-by-one in TLBI range checking

   - Fix build regression when using Clang"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64/sve: Fix missing SVE/FPSIMD endianness conversions
  arm64: tlbflush: Ensure start/end of address range are aligned to stride
  arm64: Don't unconditionally add -Wno-psabi to KBUILD_CFLAGS
parents fd6b99fa 41040cf7
...@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A. ...@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A.
is to connect to a target process first and then attempt a is to connect to a target process first and then attempt a
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
between userspace and the kernel, the register value is encoded in memory in
an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
byte offset i from the start of the memory representation. This affects for
example the signal frame (struct sve_context) and ptrace interface
(struct user_sve_header) and associated data.
Beware that on big-endian systems this results in a different byte order than
for the FPSIMD V-registers, which are stored as single host-endian 128-bit
values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
byte offset i. (struct fpsimd_context, struct user_fpsimd_state).
2. Vector length terminology 2. Vector length terminology
----------------------------- -----------------------------
...@@ -124,6 +136,10 @@ the SVE instruction set architecture. ...@@ -124,6 +136,10 @@ the SVE instruction set architecture.
size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
the members. the members.
* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
start of the register's representation in memory.
* If the SVE context is too big to fit in sigcontext.__reserved[], then extra * If the SVE context is too big to fit in sigcontext.__reserved[], then extra
space is allocated on the stack, an extra_context record is written in space is allocated on the stack, an extra_context record is written in
__reserved[] referencing this space. sve_context is then written in the __reserved[] referencing this space. sve_context is then written in the
......
...@@ -51,7 +51,7 @@ endif ...@@ -51,7 +51,7 @@ endif
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += -Wno-psabi KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
......
...@@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, ...@@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long asid = ASID(vma->vm_mm); unsigned long asid = ASID(vma->vm_mm);
unsigned long addr; unsigned long addr;
start = round_down(start, stride);
end = round_up(end, stride);
if ((end - start) >= (MAX_TLBI_OPS * stride)) { if ((end - start) >= (MAX_TLBI_OPS * stride)) {
flush_tlb_mm(vma->vm_mm); flush_tlb_mm(vma->vm_mm);
return; return;
......
...@@ -260,6 +260,13 @@ struct kvm_vcpu_events { ...@@ -260,6 +260,13 @@ struct kvm_vcpu_events {
KVM_REG_SIZE_U256 | \ KVM_REG_SIZE_U256 | \
((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
/*
* Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and
* KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness-
* invariant layout which differs from the layout used for the FPSIMD
* V-registers on big-endian systems: see sigcontext.h for more explanation.
*/
#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
......
...@@ -176,6 +176,10 @@ struct user_sve_header { ...@@ -176,6 +176,10 @@ struct user_sve_header {
* FPCR uint32_t FPCR * FPCR uint32_t FPCR
* *
* Additional data might be appended in the future. * Additional data might be appended in the future.
*
* The Z-, P- and FFR registers are represented in memory in an endianness-
* invariant layout which differs from the layout used for the FPSIMD
* V-registers on big-endian systems: see sigcontext.h for more explanation.
*/ */
#define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
......
...@@ -77,6 +77,15 @@ struct fpsimd_context { ...@@ -77,6 +77,15 @@ struct fpsimd_context {
__uint128_t vregs[32]; __uint128_t vregs[32];
}; };
/*
* Note: similarly to all other integer fields, each V-register is stored in an
* endianness-dependent format, with the byte at offset i from the start of the
* in-memory representation of the register value containing
*
* bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or
* bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts.
*/
/* ESR_EL1 context */ /* ESR_EL1 context */
#define ESR_MAGIC 0x45535201 #define ESR_MAGIC 0x45535201
...@@ -204,6 +213,11 @@ struct sve_context { ...@@ -204,6 +213,11 @@ struct sve_context {
* FFR uint16_t[vq] first-fault status register * FFR uint16_t[vq] first-fault status register
* *
* Additional data might be appended in the future. * Additional data might be appended in the future.
*
* Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR)
* is encoded in memory in an endianness-invariant format, with the byte at
* offset i from the start of the in-memory representation containing bits
* [(7 + 8 * i) : (8 * i)] of the register value.
*/ */
#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/swab.h>
#include <asm/esr.h> #include <asm/esr.h>
#include <asm/fpsimd.h> #include <asm/fpsimd.h>
...@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; } ...@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; }
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
#ifdef CONFIG_CPU_BIG_ENDIAN
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
u64 a = swab64(x);
u64 b = swab64(x >> 64);
return ((__uint128_t)a << 64) | b;
}
#else
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
return x;
}
#endif
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
/* /*
* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
* task->thread.sve_state. * task->thread.sve_state.
...@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task) ...@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state; void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t *p;
if (!system_supports_sve()) if (!system_supports_sve())
return; return;
vq = sve_vq_from_vl(task->thread.sve_vl); vq = sve_vq_from_vl(task->thread.sve_vl);
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(ZREG(sst, vq, i), &fst->vregs[i], p = (__uint128_t *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); *p = arm64_cpu_to_le128(fst->vregs[i]);
}
} }
/* /*
...@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task) ...@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task)
void const *sst = task->thread.sve_state; void const *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t const *p;
if (!system_supports_sve()) if (!system_supports_sve())
return; return;
vq = sve_vq_from_vl(task->thread.sve_vl); vq = sve_vq_from_vl(task->thread.sve_vl);
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(&fst->vregs[i], ZREG(sst, vq, i), p = (__uint128_t const *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); fst->vregs[i] = arm64_le128_to_cpu(*p);
}
} }
#ifdef CONFIG_ARM64_SVE #ifdef CONFIG_ARM64_SVE
...@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) ...@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state; void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t *p;
if (!test_tsk_thread_flag(task, TIF_SVE)) if (!test_tsk_thread_flag(task, TIF_SVE))
return; return;
...@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) ...@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(ZREG(sst, vq, i), &fst->vregs[i], p = (__uint128_t *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); *p = arm64_cpu_to_le128(fst->vregs[i]);
}
} }
int sve_set_vector_length(struct task_struct *task, int sve_set_vector_length(struct task_struct *task,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment