Commit 3d1bf78c authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/sve' into for-next/core

Optimise SVE switching for CPUs with 128-bit implementations.

* for-next/sve:
  arm64/sve: Skip flushing Z registers with 128 bit vectors
  arm64/sve: Use the sve_flush macros in sve_load_from_fpsimd_state()
  arm64/sve: Split _sve_flush macro into separate Z and predicate flushes
parents a4a49140 ad4711f9
...@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread) ...@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr, extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1); unsigned long vq_minus_1);
extern void sve_flush_live(void); extern void sve_flush_live(unsigned long vq_minus_1);
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
unsigned long vq_minus_1); unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void); extern unsigned int sve_get_vl(void);
......
...@@ -213,8 +213,10 @@ ...@@ -213,8 +213,10 @@
mov v\nz\().16b, v\nz\().16b mov v\nz\().16b, v\nz\().16b
.endm .endm
.macro sve_flush .macro sve_flush_z
_for n, 0, 31, _sve_flush_z \n _for n, 0, 31, _sve_flush_z \n
.endm
.macro sve_flush_p_ffr
_for n, 0, 15, _sve_pfalse \n _for n, 0, 15, _sve_pfalse \n
_sve_wrffr 0 _sve_wrffr 0
.endm .endm
......
...@@ -65,14 +65,22 @@ SYM_FUNC_END(sve_set_vq) ...@@ -65,14 +65,22 @@ SYM_FUNC_END(sve_set_vq)
SYM_FUNC_START(sve_load_from_fpsimd_state) SYM_FUNC_START(sve_load_from_fpsimd_state)
sve_load_vq x1, x2, x3 sve_load_vq x1, x2, x3
fpsimd_restore x0, 8 fpsimd_restore x0, 8
_for n, 0, 15, _sve_pfalse \n sve_flush_p_ffr
_sve_wrffr 0
ret ret
SYM_FUNC_END(sve_load_from_fpsimd_state) SYM_FUNC_END(sve_load_from_fpsimd_state)
/* Zero all SVE registers but the first 128-bits of each vector */ /*
* Zero all SVE registers but the first 128-bits of each vector
*
* VQ must already be configured by caller, any further updates of VQ
* will need to ensure that the register state remains valid.
*
* x0 = VQ - 1
*/
SYM_FUNC_START(sve_flush_live) SYM_FUNC_START(sve_flush_live)
sve_flush cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
sve_flush_z
1: sve_flush_p_ffr
ret ret
SYM_FUNC_END(sve_flush_live) SYM_FUNC_END(sve_flush_live)
......
...@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) ...@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
* disabling the trap, otherwise update our in-memory copy. * disabling the trap, otherwise update our in-memory copy.
*/ */
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); unsigned long vq_minus_one =
sve_flush_live(); sve_vq_from_vl(current->thread.sve_vl) - 1;
sve_set_vq(vq_minus_one);
sve_flush_live(vq_minus_one);
fpsimd_bind_task_to_cpu(); fpsimd_bind_task_to_cpu();
} else { } else {
fpsimd_to_sve(current); fpsimd_to_sve(current);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment