Commit a894e8ed authored by Palmer Dabbelt's avatar Palmer Dabbelt

Merge patch series "riscv: support kernel-mode Vector"

Andy Chiu <andy.chiu@sifive.com> says:

This series provides support running Vector in kernel mode.
Additionally, kernel-mode Vector can be configured to run without
turnning off preemption on a CONFIG_PREEMPT kernel. Along with the
suport, we add Vector optimized copy_{to,from}_user. And provide a
simple threshold to decide when to run the vectorized functions.

We decided to drop vectorized memcpy/memset/memmove for the moment due
to the concern of memory side-effect in kernel_vector_begin(). The
detailed description can be found at v9[0]

This series is composed by 4 parts:
 patch 1-4: adds basic support for kernel-mode Vector
 patch 5: includes vectorized copy_{to,from}_user into the kernel
 patch 6: refactor context switch code in fpu [1]
 patch 7-10: provides some code refactors and support for preemptible
             kernel-mode Vector.

This series can be merged if we feel any part of {1~4, 5, 6, 7~10} is
mature enough.

This patch is tested on a QEMU with V and verified that booting, normal
userspace operations all work as usual with thresholds set to 0. Also,
we test by launching multiple kernel threads which continuously executes
and verifies Vector operations in the background. The module that tests
these operation is expected to be upstream later.

* b4-shazam-merge:
  riscv: vector: allow kernel-mode Vector with preemption
  riscv: vector: use kmem_cache to manage vector context
  riscv: vector: use a mask to write vstate_ctrl
  riscv: vector: do not pass task_struct into riscv_v_vstate_{save,restore}()
  riscv: fpu: drop SR_SD bit checking
  riscv: lib: vectorize copy_to_user/copy_from_user
  riscv: sched: defer restoring Vector context for user
  riscv: Add vector extension XOR implementation
  riscv: vector: make Vector always available for softirq context
  riscv: Add support for kernel mode vector

Link: https://lore.kernel.org/r/20240115055929.4736-1-andy.chiu@sifive.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parents d4abde52 2080ff94
...@@ -527,6 +527,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE ...@@ -527,6 +527,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
If you don't know what to do here, say Y. If you don't know what to do here, say Y.
config RISCV_ISA_V_UCOPY_THRESHOLD
int "Threshold size for vectorized user copies"
depends on RISCV_ISA_V
default 768
help
Prefer using vectorized copy_to_user()/copy_from_user() when the
workload size exceeds this value.
config RISCV_ISA_V_PREEMPTIVE
bool "Run kernel-mode Vector with kernel preemption"
depends on PREEMPTION
depends on RISCV_ISA_V
default y
help
Usually, in-kernel SIMD routines are run with preemption disabled.
Functions which envoke long running SIMD thus must yield core's
vector unit to prevent blocking other tasks for too long.
This config allows kernel to run SIMD without explicitly disable
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.
config TOOLCHAIN_HAS_ZBB config TOOLCHAIN_HAS_ZBB
bool bool
default y default y
......
...@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b); ...@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b); long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b); long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
#ifdef CONFIG_MMU
asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
#endif /* CONFIG_MMU */
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2);
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3);
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4);
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
#endif /* CONFIG_RISCV_ISA_V */
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) #define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
......
...@@ -4,6 +4,23 @@ ...@@ -4,6 +4,23 @@
#define _ASM_RISCV_ENTRY_COMMON_H #define _ASM_RISCV_ENTRY_COMMON_H
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
#include <asm/thread_info.h>
#include <asm/vector.h>
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
/*
* We are already called with irq disabled, so go without
* keeping track of riscv_v_flags.
*/
riscv_v_vstate_restore(&current->thread.vstate, regs);
}
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
void handle_page_fault(struct pt_regs *regs); void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs); void handle_break(struct pt_regs *regs);
......
...@@ -73,6 +73,43 @@ ...@@ -73,6 +73,43 @@
struct task_struct; struct task_struct;
struct pt_regs; struct pt_regs;
/*
* We use a flag to track in-kernel Vector context. Currently the flag has the
* following meaning:
*
* - bit 0: indicates whether the in-kernel Vector context is active. The
* activation of this state disables the preemption. On a non-RT kernel, it
* also disable bh.
* - bits 8: is used for tracking preemptible kernel-mode Vector, when
* RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
* disable the preemption if the thread's kernel_vstate.datap is allocated.
* Instead, the kernel set this bit field. Then the trap entry/exit code
* knows if we are entering/exiting the context that owns preempt_v.
* - 0: the task is not using preempt_v
* - 1: the task is actively using preempt_v. But whether does the task own
* the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
* - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
* when preempt_v starts:
* - 0: the task is actively using, and own preempt_v context.
* - non-zero: the task was using preempt_v, but then took a trap within.
* Thus, the task does not own preempt_v. Any use of Vector will have to
* save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
* Vector.
* - bit 30: The in-kernel preempt_v context is saved, and requries to be
* restored when returning to the context that owns the preempt_v.
* - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
* trap entry code. Any context switches out-of current task need to save
* it to the task's in-kernel V context. Also, any traps nesting on-top-of
* preempt_v requesting to use V needs a save.
*/
#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000
#define RISCV_V_CTX_UNIT_DEPTH 0x00010000
#define RISCV_KERNEL_MODE_V 0x00000001
#define RISCV_PREEMPT_V 0x00000100
#define RISCV_PREEMPT_V_DIRTY 0x80000000
#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
/* CPU-specific state of a task */ /* CPU-specific state of a task */
struct thread_struct { struct thread_struct {
/* Callee-saved registers */ /* Callee-saved registers */
...@@ -81,9 +118,11 @@ struct thread_struct { ...@@ -81,9 +118,11 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */ unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate; struct __riscv_d_ext_state fstate;
unsigned long bad_cause; unsigned long bad_cause;
unsigned long vstate_ctrl; u32 riscv_v_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate; struct __riscv_v_ext_state vstate;
unsigned long align_ctl; unsigned long align_ctl;
struct __riscv_v_ext_state kernel_vstate;
}; };
/* Whitelist the fstate from the task_struct for hardened usercopy */ /* Whitelist the fstate from the task_struct for hardened usercopy */
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2023 SiFive
*/
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <linux/thread_info.h>
#include <asm/vector.h>
#ifdef CONFIG_RISCV_ISA_V
/*
* may_use_simd - whether it is allowable at this time to issue vector
* instructions or access the vector register file
*
* Callers must not assume that the result remains true beyond the next
* preempt_enable() or return from softirq context.
*/
static __must_check inline bool may_use_simd(void)
{
/*
* RISCV_KERNEL_MODE_V is only set while preemption is disabled,
* and is clear whenever preemption is enabled.
*/
if (in_hardirq() || in_nmi())
return false;
/*
* Nesting is acheived in preempt_v by spreading the control for
* preemptible and non-preemptible kernel-mode Vector into two fields.
* Always try to match with prempt_v if kernel V-context exists. Then,
* fallback to check non preempt_v if nesting happens, or if the config
* is not set.
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
if (!riscv_preempt_v_started(current))
return true;
}
/*
* Non-preemptible kernel-mode Vector temporarily disables bh. So we
* must not return true on irq_disabled(). Otherwise we would fail the
* lockdep check calling local_bh_enable()
*/
return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
}
#else /* ! CONFIG_RISCV_ISA_V */
static __must_check inline bool may_use_simd(void)
{
return false;
}
#endif /* ! CONFIG_RISCV_ISA_V */
#endif
...@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev, ...@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
struct pt_regs *regs; struct pt_regs *regs;
regs = task_pt_regs(prev); regs = task_pt_regs(prev);
if (unlikely(regs->status & SR_SD)) fstate_save(prev, regs);
fstate_save(prev, regs);
fstate_restore(next, task_pt_regs(next)); fstate_restore(next, task_pt_regs(next));
} }
......
...@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); ...@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_32BIT 11 /* compat-mode 32bit process */ #define TIF_32BIT 11 /* compat-mode 32bit process */
#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
#define _TIF_WORK_MASK \ #define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
......
...@@ -22,6 +22,18 @@ ...@@ -22,6 +22,18 @@
extern unsigned long riscv_v_vsize; extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void); int riscv_v_setup_vsize(void);
bool riscv_v_first_use_handler(struct pt_regs *regs); bool riscv_v_first_use_handler(struct pt_regs *regs);
void kernel_vector_begin(void);
void kernel_vector_end(void);
void get_cpu_vector_context(void);
void put_cpu_vector_context(void);
void riscv_v_thread_free(struct task_struct *tsk);
void __init riscv_v_setup_ctx_cache(void);
void riscv_v_thread_alloc(struct task_struct *tsk);
static inline u32 riscv_v_flags(void)
{
return READ_ONCE(current->thread.riscv_v_flags);
}
static __always_inline bool has_vector(void) static __always_inline bool has_vector(void)
{ {
...@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs) ...@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
__riscv_v_vstate_dirty(regs); __riscv_v_vstate_dirty(regs);
} }
static inline void riscv_v_vstate_save(struct task_struct *task, static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs) struct pt_regs *regs)
{ {
if ((regs->status & SR_VS) == SR_VS_DIRTY) { if ((regs->status & SR_VS) == SR_VS_DIRTY) {
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
__riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_save(vstate, vstate->datap);
__riscv_v_vstate_clean(regs); __riscv_v_vstate_clean(regs);
} }
} }
static inline void riscv_v_vstate_restore(struct task_struct *task, static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs) struct pt_regs *regs)
{ {
if ((regs->status & SR_VS) != SR_VS_OFF) { if ((regs->status & SR_VS) != SR_VS_OFF) {
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
__riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs); __riscv_v_vstate_clean(regs);
} }
} }
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
riscv_v_vstate_on(regs);
}
}
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static inline bool riscv_preempt_v_dirty(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
}
static inline bool riscv_preempt_v_restore(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
}
static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
{
barrier();
task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
}
static inline void riscv_preempt_v_set_restore(struct task_struct *task)
{
barrier();
task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
}
static inline bool riscv_preempt_v_started(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
}
#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
#define riscv_preempt_v_clear_dirty(tsk) do {} while (0)
#define riscv_preempt_v_set_restore(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
static inline void __switch_to_vector(struct task_struct *prev, static inline void __switch_to_vector(struct task_struct *prev,
struct task_struct *next) struct task_struct *next)
{ {
struct pt_regs *regs; struct pt_regs *regs;
regs = task_pt_regs(prev); if (riscv_preempt_v_started(prev)) {
riscv_v_vstate_save(prev, regs); if (riscv_preempt_v_dirty(prev)) {
riscv_v_vstate_restore(next, task_pt_regs(next)); __riscv_v_vstate_save(&prev->thread.kernel_vstate,
prev->thread.kernel_vstate.datap);
riscv_preempt_v_clear_dirty(prev);
}
} else {
regs = task_pt_regs(prev);
riscv_v_vstate_save(&prev->thread.vstate, regs);
}
if (riscv_preempt_v_started(next))
riscv_preempt_v_set_restore(next);
else
riscv_v_vstate_set_restore(next, task_pt_regs(next));
} }
void riscv_v_vstate_ctrl_init(struct task_struct *tsk); void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
...@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } ...@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#define riscv_v_vsize (0) #define riscv_v_vsize (0)
#define riscv_v_vstate_discard(regs) do {} while (0) #define riscv_v_vstate_discard(regs) do {} while (0)
#define riscv_v_vstate_save(task, regs) do {} while (0) #define riscv_v_vstate_save(vstate, regs) do {} while (0)
#define riscv_v_vstate_restore(task, regs) do {} while (0) #define riscv_v_vstate_restore(vstate, regs) do {} while (0)
#define __switch_to_vector(__prev, __next) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0)
#define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0)
#define riscv_v_vstate_on(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0)
#define riscv_v_thread_free(tsk) do {} while (0)
#define riscv_v_setup_ctx_cache() do {} while (0)
#define riscv_v_thread_alloc(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V */ #endif /* CONFIG_RISCV_ISA_V */
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#ifdef CONFIG_RISCV_ISA_V
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/asm-prototypes.h>
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2)
{
kernel_vector_begin();
xor_regs_2_(bytes, p1, p2);
kernel_vector_end();
}
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3)
{
kernel_vector_begin();
xor_regs_3_(bytes, p1, p2, p3);
kernel_vector_end();
}
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4)
{
kernel_vector_begin();
xor_regs_4_(bytes, p1, p2, p3, p4);
kernel_vector_end();
}
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5)
{
kernel_vector_begin();
xor_regs_5_(bytes, p1, p2, p3, p4, p5);
kernel_vector_end();
}
static struct xor_block_template xor_block_rvv = {
.name = "rvv",
.do_2 = xor_vector_2,
.do_3 = xor_vector_3,
.do_4 = xor_vector_4,
.do_5 = xor_vector_5
};
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_32regs); \
if (has_vector()) { \
xor_speed(&xor_block_rvv);\
} \
} while (0)
#endif
...@@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ ...@@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o obj-$(CONFIG_SMP) += cpu_ops.o
......
...@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception) ...@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
/* Load the kernel shadow call stack pointer if coming from userspace */ /* Load the kernel shadow call stack pointer if coming from userspace */
scs_load_current_if_task_changed s5 scs_load_current_if_task_changed s5
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
move a0, sp
call riscv_v_context_nesting_start
#endif
move a0, sp /* pt_regs */ move a0, sp /* pt_regs */
la ra, ret_from_exception la ra, ret_from_exception
...@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) ...@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
*/ */
csrw CSR_SCRATCH, tp csrw CSR_SCRATCH, tp
1: 1:
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
move a0, sp
call riscv_v_context_nesting_end
#endif
REG_L a0, PT_STATUS(sp) REG_L a0, PT_STATUS(sp)
/* /*
* The current load reservation is effectively part of the processor's * The current load reservation is effectively part of the processor's
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2021 SiFive
*/
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/simd.h>
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
#include <asm/asm-prototypes.h>
#endif
static inline void riscv_v_flags_set(u32 flags)
{
WRITE_ONCE(current->thread.riscv_v_flags, flags);
}
static inline void riscv_v_start(u32 flags)
{
int orig;
orig = riscv_v_flags();
BUG_ON((orig & flags) != 0);
riscv_v_flags_set(orig | flags);
barrier();
}
static inline void riscv_v_stop(u32 flags)
{
int orig;
barrier();
orig = riscv_v_flags();
BUG_ON((orig & flags) == 0);
riscv_v_flags_set(orig & ~flags);
}
/*
* Claim ownership of the CPU vector context for use by the calling context.
*
* The caller may freely manipulate the vector context metadata until
* put_cpu_vector_context() is called.
*/
void get_cpu_vector_context(void)
{
/*
* disable softirqs so it is impossible for softirqs to nest
* get_cpu_vector_context() when kernel is actively using Vector.
*/
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_disable();
else
preempt_disable();
riscv_v_start(RISCV_KERNEL_MODE_V);
}
/*
* Release the CPU vector context.
*
* Must be called from a context in which get_cpu_vector_context() was
* previously called, with no call to put_cpu_vector_context() in the
* meantime.
*/
void put_cpu_vector_context(void)
{
riscv_v_stop(RISCV_KERNEL_MODE_V);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static __always_inline u32 *riscv_v_flags_ptr(void)
{
return &current->thread.riscv_v_flags;
}
static inline void riscv_preempt_v_set_dirty(void)
{
*riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
}
static inline void riscv_preempt_v_reset_flags(void)
{
*riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
}
static inline void riscv_v_ctx_depth_inc(void)
{
*riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
}
static inline void riscv_v_ctx_depth_dec(void)
{
*riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
}
static inline u32 riscv_v_ctx_get_depth(void)
{
return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
}
static int riscv_v_stop_kernel_context(void)
{
if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
return 1;
riscv_preempt_v_clear_dirty(current);
riscv_v_stop(RISCV_PREEMPT_V);
return 0;
}
static int riscv_v_start_kernel_context(bool *is_nested)
{
struct __riscv_v_ext_state *kvstate, *uvstate;
kvstate = &current->thread.kernel_vstate;
if (!kvstate->datap)
return -ENOENT;
if (riscv_preempt_v_started(current)) {
WARN_ON(riscv_v_ctx_get_depth() == 0);
*is_nested = true;
get_cpu_vector_context();
if (riscv_preempt_v_dirty(current)) {
__riscv_v_vstate_save(kvstate, kvstate->datap);
riscv_preempt_v_clear_dirty(current);
}
riscv_preempt_v_set_restore(current);
return 0;
}
/* Transfer the ownership of V from user to kernel, then save */
riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
uvstate = &current->thread.vstate;
__riscv_v_vstate_save(uvstate, uvstate->datap);
}
riscv_preempt_v_clear_dirty(current);
return 0;
}
/* low-level V context handling code, called with irq disabled */
asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
{
int depth;
if (!riscv_preempt_v_started(current))
return;
depth = riscv_v_ctx_get_depth();
if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
riscv_preempt_v_set_dirty();
riscv_v_ctx_depth_inc();
}
asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
{
struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
u32 depth;
WARN_ON(!irqs_disabled());
if (!riscv_preempt_v_started(current))
return;
riscv_v_ctx_depth_dec();
depth = riscv_v_ctx_get_depth();
if (depth == 0) {
if (riscv_preempt_v_restore(current)) {
__riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
riscv_preempt_v_reset_flags();
}
}
}
#else
#define riscv_v_start_kernel_context(nested) (-ENOENT)
#define riscv_v_stop_kernel_context() (-ENOENT)
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
/*
* kernel_vector_begin(): obtain the CPU vector registers for use by the calling
* context
*
* Must not be called unless may_use_simd() returns true.
* Task context in the vector registers is saved back to memory as necessary.
*
* A matching call to kernel_vector_end() must be made before returning from the
* calling context.
*
* The caller may freely use the vector registers until kernel_vector_end() is
* called.
*/
void kernel_vector_begin(void)
{
bool nested = false;
if (WARN_ON(!has_vector()))
return;
BUG_ON(!may_use_simd());
if (riscv_v_start_kernel_context(&nested)) {
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
}
if (!nested)
riscv_v_vstate_set_restore(current, task_pt_regs(current));
riscv_v_enable();
}
EXPORT_SYMBOL_GPL(kernel_vector_begin);
/*
* kernel_vector_end(): give the CPU vector registers back to the current task
*
* Must be called from a context in which kernel_vector_begin() was previously
* called, with no call to kernel_vector_end() in the meantime.
*
* The caller must not use the vector registers after this function is called,
* unless kernel_vector_begin() is called again in the meantime.
*/
void kernel_vector_end(void)
{
if (WARN_ON(!has_vector()))
return;
riscv_v_disable();
if (riscv_v_stop_kernel_context())
put_cpu_vector_context();
}
EXPORT_SYMBOL_GPL(kernel_vector_end);
...@@ -171,6 +171,7 @@ void flush_thread(void) ...@@ -171,6 +171,7 @@ void flush_thread(void)
riscv_v_vstate_off(task_pt_regs(current)); riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap); kfree(current->thread.vstate.datap);
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif #endif
} }
...@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk) ...@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk)
{ {
/* Free the vector context of datap. */ /* Free the vector context of datap. */
if (has_vector()) if (has_vector())
kfree(tsk->thread.vstate.datap); riscv_v_thread_free(tsk);
} }
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
...@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) ...@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src; *dst = *src;
/* clear entire V context, including datap for a new task */ /* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0; return 0;
} }
...@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ...@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childregs->a0 = 0; /* Return value of fork() */ childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0; p->thread.s[0] = 0;
} }
p->thread.riscv_v_flags = 0;
if (has_vector())
riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork; p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */ p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0; return 0;
} }
void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}
...@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target, ...@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
* Ensure the vector registers have been saved to the memory before * Ensure the vector registers have been saved to the memory before
* copying them to membuf. * copying them to membuf.
*/ */
if (target == current) if (target == current) {
riscv_v_vstate_save(current, task_pt_regs(current)); get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
put_cpu_vector_context();
}
ptrace_vstate.vstart = vstate->vstart; ptrace_vstate.vstart = vstate->vstart;
ptrace_vstate.vl = vstate->vl; ptrace_vstate.vl = vstate->vl;
......
...@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) ...@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
/* datap is designed to be 16 byte aligned for better performance */ /* datap is designed to be 16 byte aligned for better performance */
WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
riscv_v_vstate_save(current, regs); get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, regs);
put_cpu_vector_context();
/* Copy everything of vstate but datap. */ /* Copy everything of vstate but datap. */
err = __copy_to_user(&state->v_state, &current->thread.vstate, err = __copy_to_user(&state->v_state, &current->thread.vstate,
offsetof(struct __riscv_v_ext_state, datap)); offsetof(struct __riscv_v_ext_state, datap));
...@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) ...@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
if (unlikely(err)) if (unlikely(err))
return err; return err;
riscv_v_vstate_restore(current, regs); riscv_v_vstate_set_restore(current, regs);
return err; return err;
} }
......
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
#include <asm/bug.h> #include <asm/bug.h>
static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
static struct kmem_cache *riscv_v_user_cachep;
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static struct kmem_cache *riscv_v_kernel_cachep;
#endif
unsigned long riscv_v_vsize __read_mostly; unsigned long riscv_v_vsize __read_mostly;
EXPORT_SYMBOL_GPL(riscv_v_vsize); EXPORT_SYMBOL_GPL(riscv_v_vsize);
...@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void) ...@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
return 0; return 0;
} }
void __init riscv_v_setup_ctx_cache(void)
{
if (!has_vector())
return;
riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
riscv_v_vsize, 16, SLAB_PANIC,
0, riscv_v_vsize, NULL);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
riscv_v_vsize, 16,
SLAB_PANIC, NULL);
#endif
}
static bool insn_is_vector(u32 insn_buf) static bool insn_is_vector(u32 insn_buf)
{ {
u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 opcode = insn_buf & __INSN_OPCODE_MASK;
...@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf) ...@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
return false; return false;
} }
static int riscv_v_thread_zalloc(void) static int riscv_v_thread_zalloc(struct kmem_cache *cache,
struct __riscv_v_ext_state *ctx)
{ {
void *datap; void *datap;
datap = kzalloc(riscv_v_vsize, GFP_KERNEL); datap = kmem_cache_zalloc(cache, GFP_KERNEL);
if (!datap) if (!datap)
return -ENOMEM; return -ENOMEM;
current->thread.vstate.datap = datap; ctx->datap = datap;
memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
datap));
return 0; return 0;
} }
void riscv_v_thread_alloc(struct task_struct *tsk)
{
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
#endif
}
void riscv_v_thread_free(struct task_struct *tsk)
{
if (tsk->thread.vstate.datap)
kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
if (tsk->thread.kernel_vstate.datap)
kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
#endif
}
#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
...@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, ...@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
if (inherit) if (inherit)
ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
tsk->thread.vstate_ctrl = ctrl; tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
tsk->thread.vstate_ctrl |= ctrl;
} }
bool riscv_v_vstate_ctrl_user_allowed(void) bool riscv_v_vstate_ctrl_user_allowed(void)
...@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) ...@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
* context where VS has been off. So, try to allocate the user's V * context where VS has been off. So, try to allocate the user's V
* context and resume execution. * context and resume execution.
*/ */
if (riscv_v_thread_zalloc()) { if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
force_sig(SIGBUS); force_sig(SIGBUS);
return true; return true;
} }
riscv_v_vstate_on(regs); riscv_v_vstate_on(regs);
riscv_v_vstate_restore(current, regs); riscv_v_vstate_set_restore(current, regs);
return true; return true;
} }
......
...@@ -6,8 +6,13 @@ lib-y += memmove.o ...@@ -6,8 +6,13 @@ lib-y += memmove.o
lib-y += strcmp.o lib-y += strcmp.o
lib-y += strlen.o lib-y += strlen.o
lib-y += strncmp.o lib-y += strncmp.o
lib-$(CONFIG_MMU) += uaccess.o ifeq ($(CONFIG_MMU), y)
lib-y += uaccess.o
lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
endif
lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2023 SiFive
* Author: Andy Chiu <andy.chiu@sifive.com>
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/vector.h>
#include <asm/simd.h>
#ifdef CONFIG_MMU
#include <asm/asm-prototypes.h>
#endif
#ifdef CONFIG_MMU
size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
int __asm_vector_usercopy(void *dst, void *src, size_t n);
int fallback_scalar_usercopy(void *dst, void *src, size_t n);
asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
{
size_t remain, copied;
/* skip has_vector() check because it has been done by the asm */
if (!may_use_simd())
goto fallback;
kernel_vector_begin();
remain = __asm_vector_usercopy(dst, src, n);
kernel_vector_end();
if (remain) {
copied = n - remain;
dst += copied;
src += copied;
n = remain;
goto fallback;
}
return remain;
fallback:
return fallback_scalar_usercopy(dst, src, n);
}
#endif
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/asm-extable.h> #include <asm/asm-extable.h>
#include <asm/csr.h> #include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
.macro fixup op reg addr lbl .macro fixup op reg addr lbl
100: 100:
...@@ -11,6 +13,13 @@ ...@@ -11,6 +13,13 @@
.endm .endm
SYM_FUNC_START(__asm_copy_to_user) SYM_FUNC_START(__asm_copy_to_user)
#ifdef CONFIG_RISCV_ISA_V
ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
tail enter_vector_usercopy
#endif
SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */ /* Enable access to user memory */
li t6, SR_SUM li t6, SR_SUM
...@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user) ...@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
sub a0, t5, a0 sub a0, t5, a0
ret ret
SYM_FUNC_END(__asm_copy_to_user) SYM_FUNC_END(__asm_copy_to_user)
SYM_FUNC_END(fallback_scalar_usercopy)
EXPORT_SYMBOL(__asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_to_user)
SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
EXPORT_SYMBOL(__asm_copy_from_user) EXPORT_SYMBOL(__asm_copy_from_user)
......
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm-generic/export.h>
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
#define pDst a0
#define pSrc a1
#define iNum a2
#define iVL a3
#define ELEM_LMUL_SETTING m8
#define vData v0
.macro fixup op reg addr lbl
100:
\op \reg, \addr
_asm_extable 100b, \lbl
.endm
SYM_FUNC_START(__asm_vector_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
csrs CSR_STATUS, t6
loop:
vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
fixup vle8.v vData, (pSrc), 10f
sub iNum, iNum, iVL
add pSrc, pSrc, iVL
fixup vse8.v vData, (pDst), 11f
add pDst, pDst, iVL
bnez iNum, loop
/* Exception fixup for vector load is shared with normal exit */
10:
/* Disable access to user memory */
csrc CSR_STATUS, t6
mv a0, iNum
ret
/* Exception fixup code for vector store. */
11:
/* Undo the subtraction after vle8.v */
add iNum, iNum, iVL
/* Make sure the scalar fallback skip already processed bytes */
csrr t2, CSR_VSTART
sub iNum, iNum, t2
j 10b
SYM_FUNC_END(__asm_vector_usercopy)
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/linkage.h>
#include <linux/export.h>
#include <asm/asm.h>
SYM_FUNC_START(xor_regs_2_)
vsetvli a3, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a3
vxor.vv v16, v0, v8
add a2, a2, a3
vse8.v v16, (a1)
add a1, a1, a3
bnez a0, xor_regs_2_
ret
SYM_FUNC_END(xor_regs_2_)
EXPORT_SYMBOL(xor_regs_2_)
SYM_FUNC_START(xor_regs_3_)
vsetvli a4, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a4
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a4
vxor.vv v16, v0, v16
add a3, a3, a4
vse8.v v16, (a1)
add a1, a1, a4
bnez a0, xor_regs_3_
ret
SYM_FUNC_END(xor_regs_3_)
EXPORT_SYMBOL(xor_regs_3_)
SYM_FUNC_START(xor_regs_4_)
vsetvli a5, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a5
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a5
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a5
vxor.vv v16, v0, v24
add a4, a4, a5
vse8.v v16, (a1)
add a1, a1, a5
bnez a0, xor_regs_4_
ret
SYM_FUNC_END(xor_regs_4_)
EXPORT_SYMBOL(xor_regs_4_)
SYM_FUNC_START(xor_regs_5_)
vsetvli a6, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a6
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a6
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a6
vxor.vv v0, v0, v24
vle8.v v8, (a5)
add a4, a4, a6
vxor.vv v16, v0, v8
add a5, a5, a6
vse8.v v16, (a1)
add a1, a1, a6
bnez a0, xor_regs_5_
ret
SYM_FUNC_END(xor_regs_5_)
EXPORT_SYMBOL(xor_regs_5_)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment