Commit 7c586a55 authored by Clément Léger's avatar Clément Léger Committed by Palmer Dabbelt

riscv: add floating point insn support to misaligned access emulation

This support is partially based of openSBI misaligned emulation floating
point instruction support. It provides support for the existing
floating point instructions (both for 32/64 bits as well as compressed
ones). Since floating point registers are not part of the pt_regs
struct, we need to modify them directly using some assembly. We also
dirty the pt_regs status in case we modify them to be sure context
switch will save FP state. With this support, Linux is on par with
openSBI support.
Signed-off-by: default avatarClément Léger <cleger@rivosinc.com>
Link: https://lore.kernel.org/r/20231004151405.521596-5-cleger@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 89c12fec
...@@ -104,3 +104,124 @@ ENTRY(__fstate_restore) ...@@ -104,3 +104,124 @@ ENTRY(__fstate_restore)
csrc CSR_STATUS, t1 csrc CSR_STATUS, t1
ret ret
ENDPROC(__fstate_restore) ENDPROC(__fstate_restore)
#define get_f32(which) fmv.x.s a0, which; j 2f
#define put_f32(which) fmv.s.x which, a1; j 2f
#if __riscv_xlen == 64
# define get_f64(which) fmv.x.d a0, which; j 2f
# define put_f64(which) fmv.d.x which, a1; j 2f
#else
# define get_f64(which) fsd which, 0(a1); j 2f
# define put_f64(which) fld which, 0(a1); j 2f
#endif
.macro fp_access_prologue
/*
* Compute jump offset to store the correct FP register since we don't
* have indirect FP register access
*/
sll t0, a0, 3
la t2, 1f
add t0, t0, t2
li t1, SR_FS
csrs CSR_STATUS, t1
jr t0
1:
.endm
.macro fp_access_epilogue
2:
csrc CSR_STATUS, t1
ret
.endm
#define fp_access_body(__access_func) \
__access_func(f0); \
__access_func(f1); \
__access_func(f2); \
__access_func(f3); \
__access_func(f4); \
__access_func(f5); \
__access_func(f6); \
__access_func(f7); \
__access_func(f8); \
__access_func(f9); \
__access_func(f10); \
__access_func(f11); \
__access_func(f12); \
__access_func(f13); \
__access_func(f14); \
__access_func(f15); \
__access_func(f16); \
__access_func(f17); \
__access_func(f18); \
__access_func(f19); \
__access_func(f20); \
__access_func(f21); \
__access_func(f22); \
__access_func(f23); \
__access_func(f24); \
__access_func(f25); \
__access_func(f26); \
__access_func(f27); \
__access_func(f28); \
__access_func(f29); \
__access_func(f30); \
__access_func(f31)
#ifdef CONFIG_RISCV_MISALIGNED
/*
* Disable compressed instructions set to keep a constant offset between FP
* load/store/move instructions
*/
.option norvc
/*
* put_f32_reg - Set a FP register from a register containing the value
* a0 = FP register index to be set
* a1 = value to be loaded in the FP register
*/
SYM_FUNC_START(put_f32_reg)
fp_access_prologue
fp_access_body(put_f32)
fp_access_epilogue
SYM_FUNC_END(put_f32_reg)
/*
* get_f32_reg - Get a FP register value and return it
* a0 = FP register index to be retrieved
*/
SYM_FUNC_START(get_f32_reg)
fp_access_prologue
fp_access_body(get_f32)
fp_access_epilogue
SYM_FUNC_END(get_f32_reg)
/*
* put_f64_reg - Set a 64 bits FP register from a value or a pointer.
* a0 = FP register index to be set
* a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
* load the value to a pointer).
*/
SYM_FUNC_START(put_f64_reg)
fp_access_prologue
fp_access_body(put_f64)
fp_access_epilogue
SYM_FUNC_END(put_f64_reg)
/*
* put_f64_reg - Get a 64 bits FP register value and returned it or store it to
* a pointer.
* a0 = FP register index to be retrieved
* a1 = If xlen == 32, pointer which should be loaded with the FP register value
* or unused if xlen == 64. In which case the FP register value is returned
* through a0
*/
SYM_FUNC_START(get_f64_reg)
fp_access_prologue
fp_access_body(get_f64)
fp_access_epilogue
SYM_FUNC_END(get_f64_reg)
#endif /* CONFIG_RISCV_MISALIGNED */
...@@ -153,6 +153,115 @@ ...@@ -153,6 +153,115 @@
#define PRECISION_S 0 #define PRECISION_S 0
#define PRECISION_D 1 #define PRECISION_D 1
#ifdef CONFIG_FPU
#define FP_GET_RD(insn) (insn >> 7 & 0x1F)
extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
unsigned long val)
{
unsigned long fp_reg = FP_GET_RD(insn);
put_f32_reg(fp_reg, val);
regs->status |= SR_FS_DIRTY;
return 0;
}
extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
{
unsigned long fp_reg = FP_GET_RD(insn);
unsigned long value;
#if __riscv_xlen == 32
value = (unsigned long) &val;
#else
value = val;
#endif
put_f64_reg(fp_reg, value);
regs->status |= SR_FS_DIRTY;
return 0;
}
#if __riscv_xlen == 32
extern void get_f64_reg(unsigned long fp_reg, u64 *value);
static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
u64 val;
get_f64_reg(fp_reg, &val);
regs->status |= SR_FS_DIRTY;
return val;
}
#else
extern unsigned long get_f64_reg(unsigned long fp_reg);
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
unsigned long val;
val = get_f64_reg(fp_reg);
regs->status |= SR_FS_DIRTY;
return val;
}
#endif
extern unsigned long get_f32_reg(unsigned long fp_reg);
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
unsigned long val;
val = get_f32_reg(fp_reg);
regs->status |= SR_FS_DIRTY;
return val;
}
#else /* CONFIG_FPU */
static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
unsigned long val) {}
static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
return 0;
}
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
struct pt_regs *regs)
{
return 0;
}
#endif
#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
#ifdef CONFIG_RISCV_M_MODE #ifdef CONFIG_RISCV_M_MODE
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
{ {
...@@ -362,15 +471,21 @@ int handle_misaligned_load(struct pt_regs *regs) ...@@ -362,15 +471,21 @@ int handle_misaligned_load(struct pt_regs *regs)
return -1; return -1;
} }
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
val.data_u64 = 0; val.data_u64 = 0;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
return -1; return -1;
} }
if (fp) if (!fp)
return -1; SET_RD(insn, regs, val.data_ulong << shift >> shift);
SET_RD(insn, regs, val.data_ulong << shift >> shift); else if (len == 8)
set_f64_rd(insn, regs, val.data_u64);
else
set_f32_rd(insn, regs, val.data_ulong);
regs->epc = epc + INSN_LEN(insn); regs->epc = epc + INSN_LEN(insn);
...@@ -383,7 +498,7 @@ int handle_misaligned_store(struct pt_regs *regs) ...@@ -383,7 +498,7 @@ int handle_misaligned_store(struct pt_regs *regs)
unsigned long epc = regs->epc; unsigned long epc = regs->epc;
unsigned long insn; unsigned long insn;
unsigned long addr = regs->badaddr; unsigned long addr = regs->badaddr;
int i, len = 0; int i, len = 0, fp = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
...@@ -400,6 +515,14 @@ int handle_misaligned_store(struct pt_regs *regs) ...@@ -400,6 +515,14 @@ int handle_misaligned_store(struct pt_regs *regs)
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
len = 8; len = 8;
#endif #endif
} else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2(insn, regs);
} else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2(insn, regs);
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
len = 2; len = 2;
#if defined(CONFIG_64BIT) #if defined(CONFIG_64BIT)
...@@ -418,11 +541,32 @@ int handle_misaligned_store(struct pt_regs *regs) ...@@ -418,11 +541,32 @@ int handle_misaligned_store(struct pt_regs *regs)
((insn >> SH_RD) & 0x1f)) { ((insn >> SH_RD) & 0x1f)) {
len = 4; len = 4;
val.data_ulong = GET_RS2C(insn, regs); val.data_ulong = GET_RS2C(insn, regs);
} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2S(insn, regs);
} else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
fp = 1;
len = 8;
val.data_u64 = GET_F64_RS2C(insn, regs);
#if !defined(CONFIG_64BIT)
} else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2S(insn, regs);
} else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
fp = 1;
len = 4;
val.data_ulong = GET_F32_RS2C(insn, regs);
#endif
} else { } else {
regs->epc = epc; regs->epc = epc;
return -1; return -1;
} }
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
return -1; return -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment