Commit e5075d8e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull more RISC-V updates from Palmer Dabbelt:

 - Support for tuning for systems with fast misaligned accesses.

 - Support for SBI-based suspend.

 - Support for the new SBI debug console extension.

 - The T-Head CMOs now use PA-based flushes.

 - Support for enabling the V extension in kernel code.

 - Optimized IP checksum routines.

 - Various ftrace improvements.

 - Support for archrandom, which depends on the Zkr extension.

 - The build is no longer broken under NET=n, KUNIT=y for ports that
   don't define their own ipv6 checksum.

* tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (56 commits)
  lib: checksum: Fix build with CONFIG_NET=n
  riscv: lib: Check if output in asm goto supported
  riscv: Fix build error on rv32 + XIP
  riscv: optimize ELF relocation function in riscv
  RISC-V: Implement archrandom when Zkr is available
  riscv: Optimize hweight API with Zbb extension
  riscv: add dependency among Image(.gz), loader(.bin), and vmlinuz.efi
  samples: ftrace: Add RISC-V support for SAMPLE_FTRACE_DIRECT[_MULTI]
  riscv: ftrace: Add DYNAMIC_FTRACE_WITH_DIRECT_CALLS support
  riscv: ftrace: Make function graph use ftrace directly
  riscv: select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
  lib/Kconfig.debug: Update AS_HAS_NON_CONST_LEB128 comment and name
  riscv: Restrict DWARF5 when building with LLVM to known working versions
  riscv: Hoist linker relaxation disabling logic into Kconfig
  kunit: Add tests for csum_ipv6_magic and ip_fast_csum
  riscv: Add checksum library
  riscv: Add checksum header
  riscv: Add static key for misaligned accesses
  asm-generic: Improve csum_fold
  RISC-V: selftests: cbo: Ensure asm operands match constraints
  ...
parents c25b24fa f24a7010
......@@ -63,8 +63,8 @@ properties:
mmu-type:
description:
Identifies the MMU address translation mode used on this
hart. These values originate from the RISC-V Privileged
Identifies the largest MMU address translation mode supported by
this hart. These values originate from the RISC-V Privileged
Specification document, available from
https://riscv.org/specifications/
$ref: /schemas/types.yaml#/definitions/string
......@@ -80,6 +80,11 @@ properties:
description:
The blocksize in bytes for the Zicbom cache operations.
riscv,cbop-block-size:
$ref: /schemas/types.yaml#/definitions/uint32
description:
The blocksize in bytes for the Zicbop cache operations.
riscv,cboz-block-size:
$ref: /schemas/types.yaml#/definitions/uint32
description:
......
......@@ -48,7 +48,7 @@ properties:
insensitive, letters in the riscv,isa string must be all
lowercase.
$ref: /schemas/types.yaml#/definitions/string
pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$
pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[0-9a-z])+)?(?:_[hsxz](?:[0-9a-z])+)*$
deprecated: true
riscv,isa-base:
......
......@@ -20,7 +20,7 @@
| openrisc: | .. |
| parisc: | TODO |
| powerpc: | TODO |
| riscv: | TODO |
| riscv: | ok |
| s390: | TODO |
| sh: | TODO |
| sparc: | TODO |
......
......@@ -53,6 +53,7 @@ config RISCV
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USES_CFI_TRAPS if CFI_CLANG
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
......@@ -66,9 +67,10 @@ config RISCV
select CLINT_TIMER if !MMU
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if CPU_IDLE || HIBERNATION
select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
select EDAC_SUPPORT
select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
select GENERIC_ARCH_TOPOLOGY
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
......@@ -115,6 +117,7 @@ config RISCV
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
......@@ -142,6 +145,8 @@ config RISCV
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK if !XIP_KERNEL
select HAVE_RSEQ
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
......@@ -183,6 +188,20 @@ config HAVE_SHADOW_CALL_STACK
# https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
depends on $(ld-option,--no-relax-gp)
config RISCV_USE_LINKER_RELAXATION
def_bool y
# https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985
depends on !LD_IS_LLD || LLD_VERSION >= 150000
# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6
config ARCH_HAS_BROKEN_DWARF5
def_bool y
depends on RISCV_USE_LINKER_RELAXATION
# https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a
depends on AS_IS_LLVM && AS_VERSION < 180000
# https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77
depends on LD_IS_LLD && LLD_VERSION < 180000
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
default 8
......@@ -529,6 +548,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
If you don't know what to do here, say Y.
config RISCV_ISA_V_UCOPY_THRESHOLD
int "Threshold size for vectorized user copies"
depends on RISCV_ISA_V
default 768
help
Prefer using vectorized copy_to_user()/copy_from_user() when the
workload size exceeds this value.
config RISCV_ISA_V_PREEMPTIVE
bool "Run kernel-mode Vector with kernel preemption"
depends on PREEMPTION
depends on RISCV_ISA_V
default y
help
Usually, in-kernel SIMD routines are run with preemption disabled.
Functions which envoke long running SIMD thus must yield core's
vector unit to prevent blocking other tasks for too long.
This config allows kernel to run SIMD without explicitly disable
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.
config TOOLCHAIN_HAS_ZBB
bool
default y
......@@ -655,6 +696,20 @@ config RISCV_MISALIGNED
load/store for both kernel and userspace. When disable, misaligned
accesses will generate SIGBUS in userspace and panic in kernel.
config RISCV_EFFICIENT_UNALIGNED_ACCESS
bool "Assume the CPU supports fast unaligned memory accesses"
depends on NONPORTABLE
select DCACHE_WORD_ACCESS if MMU
select HAVE_EFFICIENT_UNALIGNED_ACCESS
help
Say Y here if you want the kernel to assume that the CPU supports
efficient unaligned memory accesses. When enabled, this option
improves the performance of the kernel on such CPUs. However, the
kernel will run much more slowly, or will not be able to run at all,
on CPUs that do not support efficient unaligned memory accesses.
If unsure what to do here, say N.
endmenu # "Platform type"
menu "Kernel features"
......
......@@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO
depends on ERRATA_THEAD && MMU
select DMA_DIRECT_REMAP
select RISCV_DMA_NONCOHERENT
select RISCV_NONSTANDARD_CACHE_OPS
default y
help
This will apply the cache management errata to handle the
......
......@@ -43,8 +43,7 @@ else
KBUILD_LDFLAGS += -melf32lriscv
endif
ifeq ($(CONFIG_LD_IS_LLD),y)
ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y)
ifndef CONFIG_RISCV_USE_LINKER_RELAXATION
KBUILD_CFLAGS += -mno-relax
KBUILD_AFLAGS += -mno-relax
ifndef CONFIG_AS_IS_LLVM
......@@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS += -Wa,-mno-relax
endif
endif
endif
ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
KBUILD_LDFLAGS += --no-relax-gp
......@@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# unaligned accesses. While unaligned accesses are explicitly allowed in the
# RISC-V ISA, they're emulated by machine mode traps on all extant
# architectures. It's faster to have GCC emit only aligned accesses.
ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
endif
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
prepare: stack_protector_prepare
......@@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi
all: $(notdir $(KBUILD_IMAGE))
loader.bin: loader
Image.gz loader vmlinuz.efi: Image
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@$(kecho) ' Kernel: $(boot)/$@ is ready'
......
......@@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
......
......@@ -12,8 +12,10 @@
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/dma-noncoherent.h>
#include <asm/errata_list.h>
#include <asm/hwprobe.h>
#include <asm/io.h>
#include <asm/patch.h>
#include <asm/vendorid_list.h>
......@@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage,
return false;
}
/*
* th.dcache.ipa rs1 (invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01010 rs1 000 00000 0001011
* th.dcache.iva rs1 (invalidate, virtual address)
* 0000001 00110 rs1 000 00000 0001011
*
* th.dcache.cpa rs1 (clean, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01001 rs1 000 00000 0001011
* th.dcache.cva rs1 (clean, virtual address)
* 0000001 00101 rs1 000 00000 0001011
*
* th.dcache.cipa rs1 (clean then invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01011 rs1 000 00000 0001011
* th.dcache.civa rs1 (clean then invalidate, virtual address)
* 0000001 00111 rs1 000 00000 0001011
*
* th.sync.s (make sure all cache operations finished)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000000 11001 00000 000 00000 0001011
*/
#define THEAD_INVAL_A0 ".long 0x02a5000b"
#define THEAD_CLEAN_A0 ".long 0x0295000b"
#define THEAD_FLUSH_A0 ".long 0x02b5000b"
#define THEAD_SYNC_S ".long 0x0190000b"
#define THEAD_CMO_OP(_op, _start, _size, _cachesize) \
asm volatile("mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
THEAD_##_op##_A0 "\n\t" \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
THEAD_SYNC_S \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
: "a0")
static void thead_errata_cache_inv(phys_addr_t paddr, size_t size)
{
THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size);
}
static void thead_errata_cache_wback(phys_addr_t paddr, size_t size)
{
THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size);
}
static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size)
{
THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size);
}
static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = {
.wback = &thead_errata_cache_wback,
.inv = &thead_errata_cache_inv,
.wback_inv = &thead_errata_cache_wback_inv,
};
static bool errata_probe_cmo(unsigned int stage,
unsigned long arch_id, unsigned long impid)
{
......@@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage,
if (stage == RISCV_ALTERNATIVES_BOOT) {
riscv_cbom_block_size = L1_CACHE_BYTES;
riscv_noncoherent_supported();
riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops);
}
return true;
......@@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage,
if (errata_probe_pbmt(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
if (errata_probe_cmo(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_CMO);
errata_probe_cmo(stage, archid, impid);
if (errata_probe_pmu(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Based on arch/x86/include/asm/arch_hweight.h
*/
#ifndef _ASM_RISCV_HWEIGHT_H
#define _ASM_RISCV_HWEIGHT_H
#include <asm/alternative-macros.h>
#include <asm/hwcap.h>
#if (BITS_PER_LONG == 64)
#define CPOPW "cpopw "
#elif (BITS_PER_LONG == 32)
#define CPOPW "cpop "
#else
#error "Unexpected BITS_PER_LONG"
#endif
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
#ifdef CONFIG_RISCV_ISA_ZBB
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
asm (".option push\n"
".option arch,+zbb\n"
CPOPW "%0, %0\n"
".option pop\n"
: "+r" (w) : :);
return w;
legacy:
#endif
return __sw_hweight32(w);
}
static inline unsigned int __arch_hweight16(unsigned int w)
{
return __arch_hweight32(w & 0xffff);
}
static inline unsigned int __arch_hweight8(unsigned int w)
{
return __arch_hweight32(w & 0xff);
}
#if BITS_PER_LONG == 64
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
# ifdef CONFIG_RISCV_ISA_ZBB
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
: : : : legacy);
asm (".option push\n"
".option arch,+zbb\n"
"cpop %0, %0\n"
".option pop\n"
: "+r" (w) : :);
return w;
legacy:
# endif
return __sw_hweight64(w);
}
#else /* BITS_PER_LONG == 64 */
static inline unsigned long __arch_hweight64(__u64 w)
{
return __arch_hweight32((u32)w) +
__arch_hweight32((u32)(w >> 32));
}
#endif /* !(BITS_PER_LONG == 64) */
#endif /* _ASM_RISCV_HWEIGHT_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Kernel interface for the RISCV arch_random_* functions
*
* Copyright (c) 2023 Rivos Inc.
*
*/
#ifndef ASM_RISCV_ARCHRANDOM_H
#define ASM_RISCV_ARCHRANDOM_H
#include <asm/csr.h>
#include <asm/processor.h>
#define SEED_RETRY_LOOPS 100
static inline bool __must_check csr_seed_long(unsigned long *v)
{
unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0;
const int needed_seeds = sizeof(long) / sizeof(u16);
u16 *entropy = (u16 *)v;
do {
/*
* The SEED CSR must be accessed with a read-write instruction.
*/
unsigned long csr_seed = csr_swap(CSR_SEED, 0);
unsigned long opst = csr_seed & SEED_OPST_MASK;
switch (opst) {
case SEED_OPST_ES16:
entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK;
if (valid_seeds == needed_seeds)
return true;
break;
case SEED_OPST_DEAD:
pr_err_once("archrandom: Unrecoverable error\n");
return false;
case SEED_OPST_BIST:
case SEED_OPST_WAIT:
default:
cpu_relax();
continue;
}
} while (--retry);
return false;
}
static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
return 0;
}
static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
{
if (!max_longs)
return 0;
/*
* If Zkr is supported and csr_seed_long succeeds, we return one long
* worth of entropy.
*/
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v))
return 1;
return 0;
}
#endif /* ASM_RISCV_ARCHRANDOM_H */
......@@ -6,6 +6,7 @@
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
#define EX_TYPE_UACCESS_ERR_ZERO 3
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
#ifdef CONFIG_MMU
......@@ -47,6 +48,11 @@
#define EX_DATA_REG_ZERO_SHIFT 5
#define EX_DATA_REG_ZERO GENMASK(9, 5)
#define EX_DATA_REG_DATA_SHIFT 0
#define EX_DATA_REG_DATA GENMASK(4, 0)
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
#define EX_DATA_REG(reg, gpr) \
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
......@@ -62,6 +68,15 @@
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
"(" \
EX_DATA_REG(DATA, data) " | " \
EX_DATA_REG(ADDR, addr) \
")")
#endif /* __ASSEMBLY__ */
#else /* CONFIG_MMU */
......
......@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
#ifdef CONFIG_MMU
asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
#endif /* CONFIG_MMU */
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2);
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3);
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4);
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
#endif /* CONFIG_RISCV_ISA_V */
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
......
......@@ -271,7 +271,9 @@ static __always_inline int variable_fls(unsigned int x)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm/arch_hweight.h>
#include <asm-generic/bitops/const_hweight.h>
#if (BITS_PER_LONG == 64)
#define __AMO(op) "amo" #op ".d"
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Checksum routines
*
* Copyright (C) 2023 Rivos Inc.
*/
#ifndef __ASM_RISCV_CHECKSUM_H
#define __ASM_RISCV_CHECKSUM_H
#include <linux/in6.h>
#include <linux/uaccess.h>
#define ip_fast_csum ip_fast_csum
extern unsigned int do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
/* Default version is sufficient for 32 bit */
#ifndef CONFIG_32BIT
#define _HAVE_ARCH_IPV6_CSUM
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum sum);
#endif
/* Define riscv versions of functions before importing asm-generic/checksum.h */
#include <asm-generic/checksum.h>
/**
* Quickly compute an IP checksum with the assumption that IPv4 headers will
* always be in multiples of 32-bits, and have an ihl of at least 5.
*
* @ihl: the number of 32 bit segments and must be greater than or equal to 5.
* @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on
* riscv, defining IP headers to be aligned.
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
unsigned long csum = 0;
int pos = 0;
do {
csum += ((const unsigned int *)iph)[pos];
if (IS_ENABLED(CONFIG_32BIT))
csum += csum < ((const unsigned int *)iph)[pos];
} while (++pos < ihl);
/*
* ZBB only saves three instructions on 32-bit and five on 64-bit so not
* worth checking if supported without Alternatives.
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
if (IS_ENABLED(CONFIG_32BIT)) {
asm(".option push \n\
.option arch,+zbb \n\
not %[fold_temp], %[csum] \n\
rori %[csum], %[csum], 16 \n\
sub %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
} else {
asm(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
srli %[csum], %[csum], 32 \n\
not %[fold_temp], %[csum] \n\
roriw %[csum], %[csum], 16 \n\
subw %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
}
return (__force __sum16)(csum >> 16);
}
no_zbb:
#ifndef CONFIG_32BIT
csum += ror64(csum, 32);
csum >>= 32;
#endif
return csum_fold((__force __wsum)csum);
}
#endif /* __ASM_RISCV_CHECKSUM_H */
......@@ -135,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
}
DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
#endif
......@@ -411,6 +411,15 @@
#define CSR_VTYPE 0xc21
#define CSR_VLENB 0xc22
/* Scalar Crypto Extension - Entropy */
#define CSR_SEED 0x015
#define SEED_OPST_MASK _AC(0xC0000000, UL)
#define SEED_OPST_BIST _AC(0x00000000, UL)
#define SEED_OPST_WAIT _AC(0x40000000, UL)
#define SEED_OPST_ES16 _AC(0x80000000, UL)
#define SEED_OPST_DEAD _AC(0xC0000000, UL)
#define SEED_ENTROPY_MASK _AC(0xFFFF, UL)
#ifdef CONFIG_RISCV_M_MODE
# define CSR_STATUS CSR_MSTATUS
# define CSR_IE CSR_MIE
......
......@@ -4,6 +4,23 @@
#define _ASM_RISCV_ENTRY_COMMON_H
#include <asm/stacktrace.h>
#include <asm/thread_info.h>
#include <asm/vector.h>
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
/*
* We are already called with irq disabled, so go without
* keeping track of riscv_v_flags.
*/
riscv_v_vstate_restore(&current->thread.vstate, regs);
}
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs);
......
......@@ -24,9 +24,8 @@
#ifdef CONFIG_ERRATA_THEAD
#define ERRATA_THEAD_PBMT 0
#define ERRATA_THEAD_CMO 1
#define ERRATA_THEAD_PMU 2
#define ERRATA_THEAD_NUMBER 3
#define ERRATA_THEAD_PMU 1
#define ERRATA_THEAD_NUMBER 2
#endif
#ifdef __ASSEMBLY__
......@@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE( \
#define ALT_THEAD_PMA(_val)
#endif
/*
* th.dcache.ipa rs1 (invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01010 rs1 000 00000 0001011
* th.dache.iva rs1 (invalida, virtual address)
* 0000001 00110 rs1 000 00000 0001011
*
* th.dcache.cpa rs1 (clean, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01001 rs1 000 00000 0001011
* th.dcache.cva rs1 (clean, virtual address)
* 0000001 00101 rs1 000 00000 0001011
*
* th.dcache.cipa rs1 (clean then invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01011 rs1 000 00000 0001011
* th.dcache.civa rs1 (... virtual address)
* 0000001 00111 rs1 000 00000 0001011
*
* th.sync.s (make sure all cache operations finished)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000000 11001 00000 000 00000 0001011
*/
#define THEAD_INVAL_A0 ".long 0x0265000b"
#define THEAD_CLEAN_A0 ".long 0x0255000b"
#define THEAD_FLUSH_A0 ".long 0x0275000b"
#define THEAD_SYNC_S ".long 0x0190000b"
#define ALT_CMO_OP(_op, _start, _size, _cachesize) \
asm volatile(ALTERNATIVE_2( \
__nops(6), \
asm volatile(ALTERNATIVE( \
__nops(5), \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
CBO_##_op(a0) \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
"nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
THEAD_##_op##_A0 "\n\t" \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
THEAD_SYNC_S, THEAD_VENDOR_ID, \
ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \
"bltu a0, %2, 3b\n\t", \
0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
......
......@@ -128,7 +128,23 @@ do { \
struct dyn_ftrace;
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
struct ftrace_ops;
struct ftrace_regs;
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func
static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
{
regs->t1 = addr;
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
__arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_DYNAMIC_FTRACE */
......
......@@ -865,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#ifdef CONFIG_COMPAT
#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
#define TASK_SIZE_32 (_AC(0x80000000, UL))
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
#else
......
......@@ -16,7 +16,7 @@
#ifdef CONFIG_64BIT
#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
#define STACK_TOP_MAX TASK_SIZE_64
#define STACK_TOP_MAX TASK_SIZE
#define arch_get_mmap_end(addr, len, flags) \
({ \
......@@ -73,6 +73,43 @@
struct task_struct;
struct pt_regs;
/*
* We use a flag to track in-kernel Vector context. Currently the flag has the
* following meaning:
*
* - bit 0: indicates whether the in-kernel Vector context is active. The
* activation of this state disables the preemption. On a non-RT kernel, it
* also disable bh.
* - bits 8: is used for tracking preemptible kernel-mode Vector, when
* RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
* disable the preemption if the thread's kernel_vstate.datap is allocated.
* Instead, the kernel set this bit field. Then the trap entry/exit code
* knows if we are entering/exiting the context that owns preempt_v.
* - 0: the task is not using preempt_v
* - 1: the task is actively using preempt_v. But whether does the task own
* the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
* - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
* when preempt_v starts:
* - 0: the task is actively using, and own preempt_v context.
* - non-zero: the task was using preempt_v, but then took a trap within.
* Thus, the task does not own preempt_v. Any use of Vector will have to
* save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
* Vector.
* - bit 30: The in-kernel preempt_v context is saved, and requries to be
* restored when returning to the context that owns the preempt_v.
* - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
* trap entry code. Any context switches out-of current task need to save
* it to the task's in-kernel V context. Also, any traps nesting on-top-of
* preempt_v requesting to use V needs a save.
*/
#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000
#define RISCV_V_CTX_UNIT_DEPTH 0x00010000
#define RISCV_KERNEL_MODE_V 0x00000001
#define RISCV_PREEMPT_V 0x00000100
#define RISCV_PREEMPT_V_DIRTY 0x80000000
#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
/* CPU-specific state of a task */
struct thread_struct {
/* Callee-saved registers */
......@@ -81,9 +118,11 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
unsigned long vstate_ctrl;
u32 riscv_v_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
struct __riscv_v_ext_state kernel_vstate;
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
......
......@@ -29,6 +29,7 @@ enum sbi_ext_id {
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
SBI_EXT_SRST = 0x53525354,
SBI_EXT_SUSP = 0x53555350,
SBI_EXT_PMU = 0x504D55,
SBI_EXT_DBCN = 0x4442434E,
SBI_EXT_STA = 0x535441,
......@@ -115,6 +116,14 @@ enum sbi_srst_reset_reason {
SBI_SRST_RESET_REASON_SYS_FAILURE,
};
enum sbi_ext_susp_fid {
SBI_EXT_SUSP_SYSTEM_SUSPEND = 0,
};
enum sbi_ext_susp_sleep_type {
SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0,
};
enum sbi_ext_pmu_fid {
SBI_EXT_PMU_NUM_COUNTERS = 0,
SBI_EXT_PMU_COUNTER_GET_INFO,
......@@ -288,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
#ifdef CONFIG_RISCV_SBI_V01
void sbi_console_putchar(int ch);
int sbi_console_getchar(void);
#else
static inline void sbi_console_putchar(int ch) { }
static inline int sbi_console_getchar(void) { return -ENOENT; }
#endif
long sbi_get_mvendorid(void);
long sbi_get_marchid(void);
long sbi_get_mimpid(void);
......@@ -346,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major,
}
int sbi_err_map_linux_errno(int err);
extern bool sbi_debug_console_available;
int sbi_debug_console_write(const char *bytes, unsigned int num_bytes);
int sbi_debug_console_read(char *bytes, unsigned int num_bytes);
#else /* CONFIG_RISCV_SBI */
static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2023 SiFive
*/
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <linux/thread_info.h>
#include <asm/vector.h>
#ifdef CONFIG_RISCV_ISA_V
/*
* may_use_simd - whether it is allowable at this time to issue vector
* instructions or access the vector register file
*
* Callers must not assume that the result remains true beyond the next
* preempt_enable() or return from softirq context.
*/
static __must_check inline bool may_use_simd(void)
{
/*
* RISCV_KERNEL_MODE_V is only set while preemption is disabled,
* and is clear whenever preemption is enabled.
*/
if (in_hardirq() || in_nmi())
return false;
/*
* Nesting is acheived in preempt_v by spreading the control for
* preemptible and non-preemptible kernel-mode Vector into two fields.
* Always try to match with prempt_v if kernel V-context exists. Then,
* fallback to check non preempt_v if nesting happens, or if the config
* is not set.
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
if (!riscv_preempt_v_started(current))
return true;
}
/*
* Non-preemptible kernel-mode Vector temporarily disables bh. So we
* must not return true on irq_disabled(). Otherwise we would fail the
* lockdep check calling local_bh_enable()
*/
return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
}
#else /* ! CONFIG_RISCV_ISA_V */
static __must_check inline bool may_use_simd(void)
{
return false;
}
#endif /* ! CONFIG_RISCV_ISA_V */
#endif
......@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
struct pt_regs *regs;
regs = task_pt_regs(prev);
if (unlikely(regs->status & SR_SD))
fstate_save(prev, regs);
fstate_save(prev, regs);
fstate_restore(next, task_pt_regs(next));
}
......
......@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_32BIT 11 /* compat-mode 32bit process */
#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2023 Rivos Inc.
*/
#ifndef _ASM_RISCV_TLBBATCH_H
#define _ASM_RISCV_TLBBATCH_H
#include <linux/cpumask.h>
struct arch_tlbflush_unmap_batch {
struct cpumask cpumask;
};
#endif /* _ASM_RISCV_TLBBATCH_H */
......@@ -47,6 +47,14 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm);
void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm,
unsigned long uaddr);
void arch_flush_tlb_batched_pending(struct mm_struct *mm);
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#else /* CONFIG_SMP && CONFIG_MMU */
#define flush_tlb_all() local_flush_tlb_all()
......
......@@ -22,6 +22,18 @@
extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
bool riscv_v_first_use_handler(struct pt_regs *regs);
void kernel_vector_begin(void);
void kernel_vector_end(void);
void get_cpu_vector_context(void);
void put_cpu_vector_context(void);
void riscv_v_thread_free(struct task_struct *tsk);
void __init riscv_v_setup_ctx_cache(void);
void riscv_v_thread_alloc(struct task_struct *tsk);
static inline u32 riscv_v_flags(void)
{
return READ_ONCE(current->thread.riscv_v_flags);
}
static __always_inline bool has_vector(void)
{
......@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
__riscv_v_vstate_dirty(regs);
}
static inline void riscv_v_vstate_save(struct task_struct *task,
static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) == SR_VS_DIRTY) {
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
__riscv_v_vstate_save(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
}
static inline void riscv_v_vstate_restore(struct task_struct *task,
static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
__riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
}
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
riscv_v_vstate_on(regs);
}
}
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static inline bool riscv_preempt_v_dirty(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
}
static inline bool riscv_preempt_v_restore(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
}
static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
{
barrier();
task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
}
static inline void riscv_preempt_v_set_restore(struct task_struct *task)
{
barrier();
task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
}
static inline bool riscv_preempt_v_started(struct task_struct *task)
{
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
}
#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
#define riscv_preempt_v_clear_dirty(tsk) do {} while (0)
#define riscv_preempt_v_set_restore(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
static inline void __switch_to_vector(struct task_struct *prev,
struct task_struct *next)
{
struct pt_regs *regs;
regs = task_pt_regs(prev);
riscv_v_vstate_save(prev, regs);
riscv_v_vstate_restore(next, task_pt_regs(next));
if (riscv_preempt_v_started(prev)) {
if (riscv_preempt_v_dirty(prev)) {
__riscv_v_vstate_save(&prev->thread.kernel_vstate,
prev->thread.kernel_vstate.datap);
riscv_preempt_v_clear_dirty(prev);
}
} else {
regs = task_pt_regs(prev);
riscv_v_vstate_save(&prev->thread.vstate, regs);
}
if (riscv_preempt_v_started(next))
riscv_preempt_v_set_restore(next);
else
riscv_v_vstate_set_restore(next, task_pt_regs(next));
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
......@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#define riscv_v_vsize (0)
#define riscv_v_vstate_discard(regs) do {} while (0)
#define riscv_v_vstate_save(task, regs) do {} while (0)
#define riscv_v_vstate_restore(task, regs) do {} while (0)
#define riscv_v_vstate_save(vstate, regs) do {} while (0)
#define riscv_v_vstate_restore(vstate, regs) do {} while (0)
#define __switch_to_vector(__prev, __next) do {} while (0)
#define riscv_v_vstate_off(regs) do {} while (0)
#define riscv_v_vstate_on(regs) do {} while (0)
#define riscv_v_thread_free(tsk) do {} while (0)
#define riscv_v_setup_ctx_cache() do {} while (0)
#define riscv_v_thread_alloc(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V */
......
......@@ -9,6 +9,7 @@
#define _ASM_RISCV_WORD_AT_A_TIME_H
#include <asm/asm-extable.h>
#include <linux/kernel.h>
struct word_at_a_time {
......@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
#ifdef CONFIG_DCACHE_WORD_ACCESS
/*
* Load an unaligned word from kernel space.
*
* In the (very unlikely) case of the word being a page-crosser
* and the next page not being mapped, take the exception and
* return zeroes in the non-existing part.
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret;
/* Load word from unaligned pointer addr */
asm(
"1: " REG_L " %0, %2\n"
"2:\n"
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
: "=&r" (ret)
: "r" (addr), "m" (*(unsigned long *)addr));
return ret;
}
#endif /* CONFIG_DCACHE_WORD_ACCESS */
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#ifdef CONFIG_RISCV_ISA_V
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/asm-prototypes.h>
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2)
{
kernel_vector_begin();
xor_regs_2_(bytes, p1, p2);
kernel_vector_end();
}
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3)
{
kernel_vector_begin();
xor_regs_3_(bytes, p1, p2, p3);
kernel_vector_end();
}
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4)
{
kernel_vector_begin();
xor_regs_4_(bytes, p1, p2, p3, p4);
kernel_vector_end();
}
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5)
{
kernel_vector_begin();
xor_regs_5_(bytes, p1, p2, p3, p4, p5);
kernel_vector_end();
}
static struct xor_block_template xor_block_rvv = {
.name = "rvv",
.do_2 = xor_vector_2,
.do_3 = xor_vector_3,
.do_4 = xor_vector_4,
.do_5 = xor_vector_5
};
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_32regs); \
if (has_vector()) { \
xor_speed(&xor_block_rvv);\
} \
} while (0)
#endif
......@@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
......
......@@ -8,8 +8,10 @@
#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
#include <linux/ctype.h>
#include <linux/jump_label.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
......@@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
/* Performance information */
DEFINE_PER_CPU(long, misaligned_access_speed);
static cpumask_t fast_misaligned_access;
/**
* riscv_isa_extension_base() - Get base extension word
*
......@@ -784,6 +788,16 @@ static int check_unaligned_access(void *param)
(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
per_cpu(misaligned_access_speed, cpu) = speed;
/*
* Set the value of fast_misaligned_access of a CPU. These operations
* are atomic to avoid race conditions.
*/
if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
cpumask_set_cpu(cpu, &fast_misaligned_access);
else
cpumask_clear_cpu(cpu, &fast_misaligned_access);
return 0;
}
......@@ -796,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param)
check_unaligned_access(pages[cpu]);
}
DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
{
if (cpumask_weight(mask) == weight)
static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
else
static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
}
static void set_unaligned_access_static_branches_except_cpu(int cpu)
{
/*
* Same as set_unaligned_access_static_branches, except excludes the
* given CPU from the result. When a CPU is hotplugged into an offline
* state, this function is called before the CPU is set to offline in
* the cpumask, and thus the CPU needs to be explicitly excluded.
*/
cpumask_t fast_except_me;
cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
cpumask_clear_cpu(cpu, &fast_except_me);
modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
}
static void set_unaligned_access_static_branches(void)
{
/*
* This will be called after check_unaligned_access_all_cpus so the
* result of unaligned access speed for all CPUs will be available.
*
* To avoid the number of online cpus changing between reading
* cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
* held before calling this function.
*/
cpumask_t fast_and_online;
cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}
static int lock_and_set_unaligned_access_static_branch(void)
{
cpus_read_lock();
set_unaligned_access_static_branches();
cpus_read_unlock();
return 0;
}
arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
return 0;
goto exit;
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
......@@ -812,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu)
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
exit:
set_unaligned_access_static_branches();
return 0;
}
static int riscv_offline_cpu(unsigned int cpu)
{
set_unaligned_access_static_branches_except_cpu(cpu);
return 0;
}
......@@ -846,9 +927,12 @@ static int check_unaligned_access_all_cpus(void)
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
/* Setup hotplug callback for any new CPUs that come online. */
/*
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, NULL);
riscv_online_cpu, riscv_offline_cpu);
out:
unaligned_emulation_finish();
......
......@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
/* Load the kernel shadow call stack pointer if coming from userspace */
scs_load_current_if_task_changed s5
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
move a0, sp
call riscv_v_context_nesting_start
#endif
move a0, sp /* pt_regs */
la ra, ret_from_exception
......@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
*/
csrw CSR_SCRATCH, tp
1:
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
move a0, sp
call riscv_v_context_nesting_end
#endif
REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
......
......@@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
}
#ifdef CONFIG_DYNAMIC_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct pt_regs *regs = arch_ftrace_get_regs(fregs);
unsigned long *parent = (unsigned long *)&regs->ra;
prepare_ftrace_return(parent, ip, frame_pointer(regs));
}
#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
extern void ftrace_graph_call(void);
extern void ftrace_graph_regs_call(void);
int ftrace_enable_ftrace_graph_caller(void)
{
int ret;
ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, true, true);
if (ret)
return ret;
return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, true, true);
}
int ftrace_disable_ftrace_graph_caller(void)
{
int ret;
ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, false, true);
if (ret)
return ret;
return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, false, true);
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
* Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
* Copyright (C) 2021 SiFive
*/
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/simd.h>
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
#include <asm/asm-prototypes.h>
#endif
static inline void riscv_v_flags_set(u32 flags)
{
WRITE_ONCE(current->thread.riscv_v_flags, flags);
}
static inline void riscv_v_start(u32 flags)
{
int orig;
orig = riscv_v_flags();
BUG_ON((orig & flags) != 0);
riscv_v_flags_set(orig | flags);
barrier();
}
static inline void riscv_v_stop(u32 flags)
{
int orig;
barrier();
orig = riscv_v_flags();
BUG_ON((orig & flags) == 0);
riscv_v_flags_set(orig & ~flags);
}
/*
* Claim ownership of the CPU vector context for use by the calling context.
*
* The caller may freely manipulate the vector context metadata until
* put_cpu_vector_context() is called.
*/
void get_cpu_vector_context(void)
{
/*
* disable softirqs so it is impossible for softirqs to nest
* get_cpu_vector_context() when kernel is actively using Vector.
*/
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_disable();
else
preempt_disable();
riscv_v_start(RISCV_KERNEL_MODE_V);
}
/*
* Release the CPU vector context.
*
* Must be called from a context in which get_cpu_vector_context() was
* previously called, with no call to put_cpu_vector_context() in the
* meantime.
*/
void put_cpu_vector_context(void)
{
riscv_v_stop(RISCV_KERNEL_MODE_V);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static __always_inline u32 *riscv_v_flags_ptr(void)
{
return &current->thread.riscv_v_flags;
}
static inline void riscv_preempt_v_set_dirty(void)
{
*riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
}
static inline void riscv_preempt_v_reset_flags(void)
{
*riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
}
static inline void riscv_v_ctx_depth_inc(void)
{
*riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
}
static inline void riscv_v_ctx_depth_dec(void)
{
*riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
}
static inline u32 riscv_v_ctx_get_depth(void)
{
return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
}
static int riscv_v_stop_kernel_context(void)
{
if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
return 1;
riscv_preempt_v_clear_dirty(current);
riscv_v_stop(RISCV_PREEMPT_V);
return 0;
}
static int riscv_v_start_kernel_context(bool *is_nested)
{
struct __riscv_v_ext_state *kvstate, *uvstate;
kvstate = &current->thread.kernel_vstate;
if (!kvstate->datap)
return -ENOENT;
if (riscv_preempt_v_started(current)) {
WARN_ON(riscv_v_ctx_get_depth() == 0);
*is_nested = true;
get_cpu_vector_context();
if (riscv_preempt_v_dirty(current)) {
__riscv_v_vstate_save(kvstate, kvstate->datap);
riscv_preempt_v_clear_dirty(current);
}
riscv_preempt_v_set_restore(current);
return 0;
}
/* Transfer the ownership of V from user to kernel, then save */
riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
uvstate = &current->thread.vstate;
__riscv_v_vstate_save(uvstate, uvstate->datap);
}
riscv_preempt_v_clear_dirty(current);
return 0;
}
/* low-level V context handling code, called with irq disabled */
asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
{
int depth;
if (!riscv_preempt_v_started(current))
return;
depth = riscv_v_ctx_get_depth();
if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
riscv_preempt_v_set_dirty();
riscv_v_ctx_depth_inc();
}
asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
{
struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
u32 depth;
WARN_ON(!irqs_disabled());
if (!riscv_preempt_v_started(current))
return;
riscv_v_ctx_depth_dec();
depth = riscv_v_ctx_get_depth();
if (depth == 0) {
if (riscv_preempt_v_restore(current)) {
__riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
riscv_preempt_v_reset_flags();
}
}
}
#else
#define riscv_v_start_kernel_context(nested) (-ENOENT)
#define riscv_v_stop_kernel_context() (-ENOENT)
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
/*
* kernel_vector_begin(): obtain the CPU vector registers for use by the calling
* context
*
* Must not be called unless may_use_simd() returns true.
* Task context in the vector registers is saved back to memory as necessary.
*
* A matching call to kernel_vector_end() must be made before returning from the
* calling context.
*
* The caller may freely use the vector registers until kernel_vector_end() is
* called.
*/
void kernel_vector_begin(void)
{
bool nested = false;
if (WARN_ON(!has_vector()))
return;
BUG_ON(!may_use_simd());
if (riscv_v_start_kernel_context(&nested)) {
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
}
if (!nested)
riscv_v_vstate_set_restore(current, task_pt_regs(current));
riscv_v_enable();
}
EXPORT_SYMBOL_GPL(kernel_vector_begin);
/*
* kernel_vector_end(): give the CPU vector registers back to the current task
*
* Must be called from a context in which kernel_vector_begin() was previously
* called, with no call to kernel_vector_end() in the meantime.
*
* The caller must not use the vector registers after this function is called,
* unless kernel_vector_begin() is called again in the meantime.
*/
void kernel_vector_end(void)
{
if (WARN_ON(!has_vector()))
return;
riscv_v_disable();
if (riscv_v_stop_kernel_context())
put_cpu_vector_context();
}
EXPORT_SYMBOL_GPL(kernel_vector_end);
......@@ -57,31 +57,150 @@
.endm
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
.macro SAVE_ALL
/**
* SAVE_ABI_REGS - save regs against the pt_regs struct
*
* @all: tell if saving all the regs
*
* If all is set, all the regs will be saved, otherwise only ABI
* related regs (a0-a7,epc,ra and optional s0) will be saved.
*
* After the stack is established,
*
* 0(sp) stores the PC of the traced function which can be accessed
* by &(fregs)->regs->epc in tracing function. Note that the real
* function entry address should be computed with -FENTRY_RA_OFFSET.
*
* 8(sp) stores the function return address (i.e. parent IP) that
* can be accessed by &(fregs)->regs->ra in tracing function.
*
* The other regs are saved at the respective localtion and accessed
* by the respective pt_regs member.
*
* Here is the layout of stack for your reference.
*
* PT_SIZE_ON_STACK -> +++++++++
* + ..... +
* + t3-t6 +
* + s2-s11+
* + a0-a7 + --++++-> ftrace_caller saved
* + s1 + +
* + s0 + --+
* + t0-t2 + +
* + tp + +
* + gp + +
* + sp + +
* + ra + --+ // parent IP
* sp -> + epc + --+ // PC
* +++++++++
**/
.macro SAVE_ABI_REGS, all=0
addi sp, sp, -PT_SIZE_ON_STACK
REG_S t0, PT_EPC(sp)
REG_S x1, PT_RA(sp)
REG_S x2, PT_SP(sp)
REG_S x3, PT_GP(sp)
REG_S x4, PT_TP(sp)
REG_S x5, PT_T0(sp)
save_from_x6_to_x31
REG_S t0, PT_EPC(sp)
REG_S x1, PT_RA(sp)
// save the ABI regs
REG_S x10, PT_A0(sp)
REG_S x11, PT_A1(sp)
REG_S x12, PT_A2(sp)
REG_S x13, PT_A3(sp)
REG_S x14, PT_A4(sp)
REG_S x15, PT_A5(sp)
REG_S x16, PT_A6(sp)
REG_S x17, PT_A7(sp)
// save the leftover regs
.if \all == 1
REG_S x2, PT_SP(sp)
REG_S x3, PT_GP(sp)
REG_S x4, PT_TP(sp)
REG_S x5, PT_T0(sp)
REG_S x6, PT_T1(sp)
REG_S x7, PT_T2(sp)
REG_S x8, PT_S0(sp)
REG_S x9, PT_S1(sp)
REG_S x18, PT_S2(sp)
REG_S x19, PT_S3(sp)
REG_S x20, PT_S4(sp)
REG_S x21, PT_S5(sp)
REG_S x22, PT_S6(sp)
REG_S x23, PT_S7(sp)
REG_S x24, PT_S8(sp)
REG_S x25, PT_S9(sp)
REG_S x26, PT_S10(sp)
REG_S x27, PT_S11(sp)
REG_S x28, PT_T3(sp)
REG_S x29, PT_T4(sp)
REG_S x30, PT_T5(sp)
REG_S x31, PT_T6(sp)
// save s0 if FP_TEST defined
.else
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_S x8, PT_S0(sp)
#endif
.endif
.endm
.macro RESTORE_ALL
REG_L x1, PT_RA(sp)
REG_L x2, PT_SP(sp)
REG_L x3, PT_GP(sp)
REG_L x4, PT_TP(sp)
/* Restore t0 with PT_EPC */
REG_L x5, PT_EPC(sp)
restore_from_x6_to_x31
.macro RESTORE_ABI_REGS, all=0
REG_L t0, PT_EPC(sp)
REG_L x1, PT_RA(sp)
REG_L x10, PT_A0(sp)
REG_L x11, PT_A1(sp)
REG_L x12, PT_A2(sp)
REG_L x13, PT_A3(sp)
REG_L x14, PT_A4(sp)
REG_L x15, PT_A5(sp)
REG_L x16, PT_A6(sp)
REG_L x17, PT_A7(sp)
.if \all == 1
REG_L x2, PT_SP(sp)
REG_L x3, PT_GP(sp)
REG_L x4, PT_TP(sp)
REG_L x6, PT_T1(sp)
REG_L x7, PT_T2(sp)
REG_L x8, PT_S0(sp)
REG_L x9, PT_S1(sp)
REG_L x18, PT_S2(sp)
REG_L x19, PT_S3(sp)
REG_L x20, PT_S4(sp)
REG_L x21, PT_S5(sp)
REG_L x22, PT_S6(sp)
REG_L x23, PT_S7(sp)
REG_L x24, PT_S8(sp)
REG_L x25, PT_S9(sp)
REG_L x26, PT_S10(sp)
REG_L x27, PT_S11(sp)
REG_L x28, PT_T3(sp)
REG_L x29, PT_T4(sp)
REG_L x30, PT_T5(sp)
REG_L x31, PT_T6(sp)
.else
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
REG_L x8, PT_S0(sp)
#endif
.endif
addi sp, sp, PT_SIZE_ON_STACK
.endm
.macro PREPARE_ARGS
addi a0, t0, -FENTRY_RA_OFFSET
la a1, function_trace_op
REG_L a2, 0(a1)
mv a1, ra
mv a3, sp
.endm
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
SYM_FUNC_START(ftrace_caller)
SAVE_ABI
......@@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
call ftrace_stub
#endif
RESTORE_ABI
jr t0
jr t0
SYM_FUNC_END(ftrace_caller)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
SYM_FUNC_START(ftrace_regs_caller)
SAVE_ALL
addi a0, t0, -FENTRY_RA_OFFSET
la a1, function_trace_op
REG_L a2, 0(a1)
mv a1, ra
mv a3, sp
mv t1, zero
SAVE_ABI_REGS 1
PREPARE_ARGS
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
addi a0, sp, PT_RA
REG_L a1, PT_EPC(sp)
addi a1, a1, -FENTRY_RA_OFFSET
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
mv a2, s0
#endif
SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
RESTORE_ABI_REGS 1
bnez t1, .Ldirect
jr t0
.Ldirect:
jr t1
SYM_FUNC_END(ftrace_regs_caller)
SYM_FUNC_START(ftrace_caller)
SAVE_ABI_REGS 0
PREPARE_ARGS
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
call ftrace_stub
#endif
RESTORE_ALL
jr t0
SYM_FUNC_END(ftrace_regs_caller)
RESTORE_ABI_REGS 0
jr t0
SYM_FUNC_END(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
jr t0
SYM_CODE_END(ftrace_stub_direct_tramp)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
......@@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type,
if (!bucket) {
kfree(entry);
kfree(rel_head);
kfree(rel_head->rel_entry);
kfree(rel_head);
return -ENOMEM;
}
......@@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
{
/* Can safely assume that bits is not greater than sizeof(long) */
unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
/*
* When hashtable_size == 1, hashtable_bits == 0.
* This is valid because the hashing algorithm returns 0 in this case.
*/
unsigned int hashtable_bits = ilog2(hashtable_size);
/*
......@@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
hashtable_size <<= should_double_size;
*relocation_hashtable = kmalloc_array(hashtable_size,
sizeof(*relocation_hashtable),
sizeof(**relocation_hashtable),
GFP_KERNEL);
if (!*relocation_hashtable)
return -ENOMEM;
return 0;
__hash_init(*relocation_hashtable, hashtable_size);
......@@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
Elf_Sym *sym;
void *location;
unsigned int i, type;
unsigned int j_idx = 0;
Elf_Addr v;
int res;
unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
......@@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
hashtable_bits = initialize_relocation_hashtable(num_relocations,
&relocation_hashtable);
if (hashtable_bits < 0)
return hashtable_bits;
if (!relocation_hashtable)
return -ENOMEM;
INIT_LIST_HEAD(&used_buckets_list);
......@@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
v = sym->st_value + rel[i].r_addend;
if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) {
unsigned int j;
unsigned int j = j_idx;
bool found = false;
for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
do {
unsigned long hi20_loc =
sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[j].r_offset;
......@@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
hi20 = (offset + 0x800) & 0xfffff000;
lo12 = offset - hi20;
v = lo12;
found = true;
break;
}
}
if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
j++;
if (j > sechdrs[relsec].sh_size / sizeof(*rel))
j = 0;
} while (j_idx != j);
if (!found) {
pr_err(
"%s: Can not find HI20 relocation information\n",
me->name);
return -EINVAL;
}
/* Record the previous j-loop end index */
j_idx = j;
}
if (reloc_handlers[type].accumulate_handler)
......
......@@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa)
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
strncat(early_cmdline, CONFIG_CMDLINE,
COMMAND_LINE_SIZE - fdt_cmdline_size);
strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
}
return early_cmdline;
......
......@@ -171,6 +171,7 @@ void flush_thread(void)
riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap);
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
}
......@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk)
{
/* Free the vector context of datap. */
if (has_vector())
kfree(tsk->thread.vstate.datap);
riscv_v_thread_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
......@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
......@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0;
}
p->thread.riscv_v_flags = 0;
if (has_vector())
riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}
......@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
* Ensure the vector registers have been saved to the memory before
* copying them to membuf.
*/
if (target == current)
riscv_v_vstate_save(current, task_pt_regs(current));
if (target == current) {
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
put_cpu_vector_context();
}
ptrace_vstate.vstart = vstate->vstart;
ptrace_vstate.vl = vstate->vl;
......
......@@ -7,6 +7,7 @@
#include <linux/bits.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/reboot.h>
#include <asm/sbi.h>
......@@ -571,6 +572,66 @@ long sbi_get_mimpid(void)
}
EXPORT_SYMBOL_GPL(sbi_get_mimpid);
bool sbi_debug_console_available;
int sbi_debug_console_write(const char *bytes, unsigned int num_bytes)
{
phys_addr_t base_addr;
struct sbiret ret;
if (!sbi_debug_console_available)
return -EOPNOTSUPP;
if (is_vmalloc_addr(bytes))
base_addr = page_to_phys(vmalloc_to_page(bytes)) +
offset_in_page(bytes);
else
base_addr = __pa(bytes);
if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
num_bytes = PAGE_SIZE - offset_in_page(bytes);
if (IS_ENABLED(CONFIG_32BIT))
ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
num_bytes, lower_32_bits(base_addr),
upper_32_bits(base_addr), 0, 0, 0);
else
ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
num_bytes, base_addr, 0, 0, 0, 0);
if (ret.error == SBI_ERR_FAILURE)
return -EIO;
return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
}
int sbi_debug_console_read(char *bytes, unsigned int num_bytes)
{
phys_addr_t base_addr;
struct sbiret ret;
if (!sbi_debug_console_available)
return -EOPNOTSUPP;
if (is_vmalloc_addr(bytes))
base_addr = page_to_phys(vmalloc_to_page(bytes)) +
offset_in_page(bytes);
else
base_addr = __pa(bytes);
if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
num_bytes = PAGE_SIZE - offset_in_page(bytes);
if (IS_ENABLED(CONFIG_32BIT))
ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
num_bytes, lower_32_bits(base_addr),
upper_32_bits(base_addr), 0, 0, 0);
else
ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
num_bytes, base_addr, 0, 0, 0, 0);
if (ret.error == SBI_ERR_FAILURE)
return -EIO;
return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
}
void __init sbi_init(void)
{
int ret;
......@@ -612,6 +673,11 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
(sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
pr_info("SBI DBCN extension detected\n");
sbi_debug_console_available = true;
}
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
......
......@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
/* datap is designed to be 16 byte aligned for better performance */
WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
riscv_v_vstate_save(current, regs);
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, regs);
put_cpu_vector_context();
/* Copy everything of vstate but datap. */
err = __copy_to_user(&state->v_state, &current->thread.vstate,
offsetof(struct __riscv_v_ext_state, datap));
......@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
if (unlikely(err))
return err;
riscv_v_vstate_restore(current, regs);
riscv_v_vstate_set_restore(current, regs);
return err;
}
......
......@@ -4,8 +4,12 @@
* Copyright (c) 2022 Ventana Micro Systems Inc.
*/
#define pr_fmt(fmt) "suspend: " fmt
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <asm/csr.h>
#include <asm/sbi.h>
#include <asm/suspend.h>
void suspend_save_csrs(struct suspend_context *context)
......@@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg,
return rc;
}
#ifdef CONFIG_RISCV_SBI
static int sbi_system_suspend(unsigned long sleep_type,
unsigned long resume_addr,
unsigned long opaque)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND,
sleep_type, resume_addr, opaque, 0, 0, 0);
if (ret.error)
return sbi_err_map_linux_errno(ret.error);
return ret.value;
}
static int sbi_system_suspend_enter(suspend_state_t state)
{
return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend);
}
static const struct platform_suspend_ops sbi_system_suspend_ops = {
.valid = suspend_valid_only_mem,
.enter = sbi_system_suspend_enter,
};
static int __init sbi_system_suspend_init(void)
{
if (sbi_spec_version >= sbi_mk_version(2, 0) &&
sbi_probe_extension(SBI_EXT_SUSP) > 0) {
pr_info("SBI SUSP extension detected\n");
if (IS_ENABLED(CONFIG_SUSPEND))
suspend_set_ops(&sbi_system_suspend_ops);
}
return 0;
}
arch_initcall(sbi_system_suspend_init);
#endif /* CONFIG_RISCV_SBI */
......@@ -21,6 +21,10 @@
#include <asm/bug.h>
static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
static struct kmem_cache *riscv_v_user_cachep;
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
static struct kmem_cache *riscv_v_kernel_cachep;
#endif
unsigned long riscv_v_vsize __read_mostly;
EXPORT_SYMBOL_GPL(riscv_v_vsize);
......@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
return 0;
}
void __init riscv_v_setup_ctx_cache(void)
{
if (!has_vector())
return;
riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
riscv_v_vsize, 16, SLAB_PANIC,
0, riscv_v_vsize, NULL);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
riscv_v_vsize, 16,
SLAB_PANIC, NULL);
#endif
}
static bool insn_is_vector(u32 insn_buf)
{
u32 opcode = insn_buf & __INSN_OPCODE_MASK;
......@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
return false;
}
static int riscv_v_thread_zalloc(void)
static int riscv_v_thread_zalloc(struct kmem_cache *cache,
struct __riscv_v_ext_state *ctx)
{
void *datap;
datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
datap = kmem_cache_zalloc(cache, GFP_KERNEL);
if (!datap)
return -ENOMEM;
current->thread.vstate.datap = datap;
memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
datap));
ctx->datap = datap;
memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
return 0;
}
void riscv_v_thread_alloc(struct task_struct *tsk)
{
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
#endif
}
void riscv_v_thread_free(struct task_struct *tsk)
{
if (tsk->thread.vstate.datap)
kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
if (tsk->thread.kernel_vstate.datap)
kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
#endif
}
#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
......@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
if (inherit)
ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
tsk->thread.vstate_ctrl = ctrl;
tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
tsk->thread.vstate_ctrl |= ctrl;
}
bool riscv_v_vstate_ctrl_user_allowed(void)
......@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
* context where VS has been off. So, try to allocate the user's V
* context and resume execution.
*/
if (riscv_v_thread_zalloc()) {
if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
force_sig(SIGBUS);
return true;
}
riscv_v_vstate_on(regs);
riscv_v_vstate_restore(current, regs);
riscv_v_vstate_set_restore(current, regs);
return true;
}
......
......@@ -6,8 +6,14 @@ lib-y += memmove.o
lib-y += strcmp.o
lib-y += strlen.o
lib-y += strncmp.o
lib-y += csum.o
ifeq ($(CONFIG_MMU), y)
lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
// SPDX-License-Identifier: GPL-2.0
/*
* Checksum library
*
* Influenced by arch/arm64/lib/csum.c
* Copyright (C) 2023 Rivos Inc.
*/
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/jump_label.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
#include <asm/cpufeature.h>
#include <net/checksum.h>
/* Default version is sufficient for 32 bit */
#ifndef CONFIG_32BIT
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum csum)
{
unsigned int ulen, uproto;
unsigned long sum = (__force unsigned long)csum;
sum += (__force unsigned long)saddr->s6_addr32[0];
sum += (__force unsigned long)saddr->s6_addr32[1];
sum += (__force unsigned long)saddr->s6_addr32[2];
sum += (__force unsigned long)saddr->s6_addr32[3];
sum += (__force unsigned long)daddr->s6_addr32[0];
sum += (__force unsigned long)daddr->s6_addr32[1];
sum += (__force unsigned long)daddr->s6_addr32[2];
sum += (__force unsigned long)daddr->s6_addr32[3];
ulen = (__force unsigned int)htonl((unsigned int)len);
sum += ulen;
uproto = (__force unsigned int)htonl(proto);
sum += uproto;
/*
* Zbb support saves 4 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
asm(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[sum], 32 \n\
add %[sum], %[fold_temp], %[sum] \n\
srli %[sum], %[sum], 32 \n\
not %[fold_temp], %[sum] \n\
roriw %[sum], %[sum], 16 \n\
subw %[sum], %[fold_temp], %[sum] \n\
.option pop"
: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
return (__force __sum16)(sum >> 16);
}
no_zbb:
sum += ror64(sum, 32);
sum >>= 32;
return csum_fold((__force __wsum)sum);
}
EXPORT_SYMBOL(csum_ipv6_magic);
#endif /* !CONFIG_32BIT */
#ifdef CONFIG_32BIT
#define OFFSET_MASK 3
#elif CONFIG_64BIT
#define OFFSET_MASK 7
#endif
static inline __no_sanitize_address unsigned long
do_csum_common(const unsigned long *ptr, const unsigned long *end,
unsigned long data)
{
unsigned int shift;
unsigned long csum = 0, carry = 0;
/*
* Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
* faster than doing 32-bit reads on architectures that support larger
* reads.
*/
while (ptr < end) {
csum += data;
carry += csum < data;
data = *(ptr++);
}
/*
* Perform alignment (and over-read) bytes on the tail if any bytes
* leftover.
*/
shift = ((long)ptr - (long)end) * 8;
#ifdef __LITTLE_ENDIAN
data = (data << shift) >> shift;
#else
data = (data >> shift) << shift;
#endif
csum += data;
carry += csum < data;
csum += carry;
csum += csum < carry;
return csum;
}
/*
* Algorithm accounts for buff being misaligned.
* If buff is not aligned, will over-read bytes but not use the bytes that it
* shouldn't. The same thing will occur on the tail-end of the read.
*/
static inline __no_sanitize_address unsigned int
do_csum_with_alignment(const unsigned char *buff, int len)
{
unsigned int offset, shift;
unsigned long csum, data;
const unsigned long *ptr, *end;
/*
* Align address to closest word (double word on rv64) that comes before
* buff. This should always be in the same page and cache line.
* Directly call KASAN with the alignment we will be using.
*/
offset = (unsigned long)buff & OFFSET_MASK;
kasan_check_read(buff, len);
ptr = (const unsigned long *)(buff - offset);
/*
* Clear the most significant bytes that were over-read if buff was not
* aligned.
*/
shift = offset * 8;
data = *(ptr++);
#ifdef __LITTLE_ENDIAN
data = (data >> shift) << shift;
#else
data = (data << shift) >> shift;
#endif
end = (const unsigned long *)(buff + len);
csum = do_csum_common(ptr, end, data);
#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
/*
* Zbb support saves 6 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
#ifdef CONFIG_32BIT
asm_volatile_goto(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 16 \n\
andi %[offset], %[offset], 1 \n\
add %[csum], %[fold_temp], %[csum] \n\
beq %[offset], zero, %l[end] \n\
rev8 %[csum], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
: [offset] "r" (offset)
:
: end);
return (unsigned short)csum;
#else /* !CONFIG_32BIT */
asm_volatile_goto(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
srli %[csum], %[csum], 32 \n\
roriw %[fold_temp], %[csum], 16 \n\
addw %[csum], %[fold_temp], %[csum] \n\
andi %[offset], %[offset], 1 \n\
beq %[offset], zero, %l[end] \n\
rev8 %[csum], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
: [offset] "r" (offset)
:
: end);
return (csum << 16) >> 48;
#endif /* !CONFIG_32BIT */
end:
return csum >> 16;
}
no_zbb:
#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
#ifndef CONFIG_32BIT
csum += ror64(csum, 32);
csum >>= 32;
#endif
csum = (u32)csum + ror32((u32)csum, 16);
if (offset & 1)
return (u16)swab32(csum);
return csum >> 16;
}
/*
* Does not perform alignment, should only be used if machine has fast
* misaligned accesses, or when buff is known to be aligned.
*/
static inline __no_sanitize_address unsigned int
do_csum_no_alignment(const unsigned char *buff, int len)
{
unsigned long csum, data;
const unsigned long *ptr, *end;
ptr = (const unsigned long *)(buff);
data = *(ptr++);
kasan_check_read(buff, len);
end = (const unsigned long *)(buff + len);
csum = do_csum_common(ptr, end, data);
/*
* Zbb support saves 6 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
#ifdef CONFIG_32BIT
asm (".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 16 \n\
add %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
:
: );
#else /* !CONFIG_32BIT */
asm (".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
srli %[csum], %[csum], 32 \n\
roriw %[fold_temp], %[csum], 16 \n\
addw %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
:
: );
#endif /* !CONFIG_32BIT */
return csum >> 16;
}
no_zbb:
#ifndef CONFIG_32BIT
csum += ror64(csum, 32);
csum >>= 32;
#endif
csum = (u32)csum + ror32((u32)csum, 16);
return csum >> 16;
}
/*
* Perform a checksum on an arbitrary memory address.
* Will do a light-weight address alignment if buff is misaligned, unless
* cpu supports fast misaligned accesses.
*/
unsigned int do_csum(const unsigned char *buff, int len)
{
if (unlikely(len <= 0))
return 0;
/*
* Significant performance gains can be seen by not doing alignment
* on machines with fast misaligned accesses.
*
* There is some duplicate code between the "with_alignment" and
* "no_alignment" implmentations, but the overlap is too awkward to be
* able to fit in one function without introducing multiple static
* branches. The largest chunk of overlap was delegated into the
* do_csum_common function.
*/
if (static_branch_likely(&fast_misaligned_access_speed_key))
return do_csum_no_alignment(buff, len);
if (((unsigned long)buff & OFFSET_MASK) == 0)
return do_csum_no_alignment(buff, len);
return do_csum_with_alignment(buff, len);
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2023 SiFive
* Author: Andy Chiu <andy.chiu@sifive.com>
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/vector.h>
#include <asm/simd.h>
#ifdef CONFIG_MMU
#include <asm/asm-prototypes.h>
#endif
#ifdef CONFIG_MMU
size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
int __asm_vector_usercopy(void *dst, void *src, size_t n);
int fallback_scalar_usercopy(void *dst, void *src, size_t n);
asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
{
size_t remain, copied;
/* skip has_vector() check because it has been done by the asm */
if (!may_use_simd())
goto fallback;
kernel_vector_begin();
remain = __asm_vector_usercopy(dst, src, n);
kernel_vector_end();
if (remain) {
copied = n - remain;
dst += copied;
src += copied;
n = remain;
goto fallback;
}
return remain;
fallback:
return fallback_scalar_usercopy(dst, src, n);
}
#endif
......@@ -3,6 +3,8 @@
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
.macro fixup op reg addr lbl
100:
......@@ -11,6 +13,13 @@
.endm
SYM_FUNC_START(__asm_copy_to_user)
#ifdef CONFIG_RISCV_ISA_V
ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
REG_L t0, riscv_v_usercopy_threshold
bltu a2, t0, fallback_scalar_usercopy
tail enter_vector_usercopy
#endif
SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
......@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
sub a0, t5, a0
ret
SYM_FUNC_END(__asm_copy_to_user)
SYM_FUNC_END(fallback_scalar_usercopy)
EXPORT_SYMBOL(__asm_copy_to_user)
SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
EXPORT_SYMBOL(__asm_copy_from_user)
......
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm-generic/export.h>
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
#define pDst a0
#define pSrc a1
#define iNum a2
#define iVL a3
#define ELEM_LMUL_SETTING m8
#define vData v0
.macro fixup op reg addr lbl
100:
\op \reg, \addr
_asm_extable 100b, \lbl
.endm
SYM_FUNC_START(__asm_vector_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
csrs CSR_STATUS, t6
loop:
vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
fixup vle8.v vData, (pSrc), 10f
sub iNum, iNum, iVL
add pSrc, pSrc, iVL
fixup vse8.v vData, (pDst), 11f
add pDst, pDst, iVL
bnez iNum, loop
/* Exception fixup for vector load is shared with normal exit */
10:
/* Disable access to user memory */
csrc CSR_STATUS, t6
mv a0, iNum
ret
/* Exception fixup code for vector store. */
11:
/* Undo the subtraction after vle8.v */
add iNum, iNum, iVL
/* Make sure the scalar fallback skip already processed bytes */
csrr t2, CSR_VSTART
sub iNum, iNum, t2
j 10b
SYM_FUNC_END(__asm_vector_usercopy)
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/linkage.h>
#include <linux/export.h>
#include <asm/asm.h>
SYM_FUNC_START(xor_regs_2_)
vsetvli a3, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a3
vxor.vv v16, v0, v8
add a2, a2, a3
vse8.v v16, (a1)
add a1, a1, a3
bnez a0, xor_regs_2_
ret
SYM_FUNC_END(xor_regs_2_)
EXPORT_SYMBOL(xor_regs_2_)
SYM_FUNC_START(xor_regs_3_)
vsetvli a4, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a4
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a4
vxor.vv v16, v0, v16
add a3, a3, a4
vse8.v v16, (a1)
add a1, a1, a4
bnez a0, xor_regs_3_
ret
SYM_FUNC_END(xor_regs_3_)
EXPORT_SYMBOL(xor_regs_3_)
SYM_FUNC_START(xor_regs_4_)
vsetvli a5, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a5
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a5
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a5
vxor.vv v16, v0, v24
add a4, a4, a5
vse8.v v16, (a1)
add a1, a1, a5
bnez a0, xor_regs_4_
ret
SYM_FUNC_END(xor_regs_4_)
EXPORT_SYMBOL(xor_regs_4_)
SYM_FUNC_START(xor_regs_5_)
vsetvli a6, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a6
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a6
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a6
vxor.vv v0, v0, v24
vle8.v v8, (a5)
add a4, a4, a6
vxor.vv v16, v0, v8
add a5, a5, a6
vse8.v v16, (a1)
add a1, a1, a6
bnez a0, xor_regs_5_
ret
SYM_FUNC_END(xor_regs_5_)
EXPORT_SYMBOL(xor_regs_5_)
......@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
return true;
}
static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
{
if (unlikely(!offset || offset > MAX_REG_OFFSET))
return 0;
return *(unsigned long *)((unsigned long)regs + offset);
}
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
unsigned long val)
{
......@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
return true;
}
static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned long data, addr, offset;
addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
offset = addr & 0x7UL;
addr &= ~0x7UL;
data = *(unsigned long *)addr >> (offset * 8);
regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
regs->epc = get_ex_fixup(ex);
return true;
}
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
......@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_bpf(ex, regs);
case EX_TYPE_UACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
}
BUG();
......
......@@ -1060,7 +1060,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
#ifdef CONFIG_XIP_KERNEL
#ifdef CONFIG_64BIT
kernel_map.page_offset = PAGE_OFFSET_L3;
#else
kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#endif
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
......@@ -1387,10 +1391,29 @@ void __init misc_mem_init(void)
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
unsigned long addr, unsigned long next)
{
pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL);
}
int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
unsigned long addr, unsigned long next)
{
vmemmap_verify((pte_t *)pmdp, node, addr, next);
return 1;
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
return vmemmap_populate_basepages(start, end, node, NULL);
/*
* Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we
* can't use hugepage mappings for 2-level page table because in case of
* memory hotplug, we are not able to update all the page tables with
* the new PMDs.
*/
return vmemmap_populate_hugepages(start, end, node, NULL);
}
#endif
......
......@@ -98,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
struct flush_tlb_range_data ftd;
const struct cpumask *cmask;
unsigned long asid = FLUSH_TLB_NO_ASID;
bool broadcast;
if (mm) {
unsigned int cpuid;
if (cpumask_empty(cmask))
return;
cmask = mm_cpumask(mm);
if (cpumask_empty(cmask))
return;
if (cmask != cpu_online_mask) {
unsigned int cpuid;
cpuid = get_cpu();
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
if (static_branch_unlikely(&use_asid_allocator))
asid = atomic_long_read(&mm->context.id) & asid_mask;
} else {
cmask = cpu_online_mask;
broadcast = true;
}
......@@ -140,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
local_flush_tlb_range_asid(start, size, stride, asid);
}
if (mm)
if (cmask != cpu_online_mask)
put_cpu();
}
static inline unsigned long get_mm_asid(struct mm_struct *mm)
{
return static_branch_unlikely(&use_asid_allocator) ?
atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
}
void flush_tlb_mm(struct mm_struct *mm)
{
__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
__flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
}
void flush_tlb_mm_range(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned int page_size)
{
__flush_tlb_range(mm, start, end - start, page_size);
__flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
start, end - start, page_size);
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
__flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
addr, PAGE_SIZE, PAGE_SIZE);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
......@@ -190,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
}
__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
start, end - start, stride_size);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
__flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
__flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
start, end - start, PAGE_SIZE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
__flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE);
__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
start, end - start, PMD_SIZE);
}
#endif
bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
return true;
}
void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm,
unsigned long uaddr)
{
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
}
void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
}
......@@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP
config HVC_RISCV_SBI
bool "RISC-V SBI console support"
depends on RISCV_SBI_V01
depends on RISCV_SBI
select HVC_DRIVER
help
This enables support for console output via RISC-V SBI calls, which
......
......@@ -40,21 +40,44 @@ static ssize_t hvc_sbi_tty_get(uint32_t vtermno, u8 *buf, size_t count)
return i;
}
static const struct hv_ops hvc_sbi_ops = {
static const struct hv_ops hvc_sbi_v01_ops = {
.get_chars = hvc_sbi_tty_get,
.put_chars = hvc_sbi_tty_put,
};
static int __init hvc_sbi_init(void)
static ssize_t hvc_sbi_dbcn_tty_put(uint32_t vtermno, const u8 *buf, size_t count)
{
return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16));
return sbi_debug_console_write(buf, count);
}
device_initcall(hvc_sbi_init);
static int __init hvc_sbi_console_init(void)
static ssize_t hvc_sbi_dbcn_tty_get(uint32_t vtermno, u8 *buf, size_t count)
{
hvc_instantiate(0, 0, &hvc_sbi_ops);
return sbi_debug_console_read(buf, count);
}
static const struct hv_ops hvc_sbi_dbcn_ops = {
.put_chars = hvc_sbi_dbcn_tty_put,
.get_chars = hvc_sbi_dbcn_tty_get,
};
static int __init hvc_sbi_init(void)
{
int err;
if (sbi_debug_console_available) {
err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256));
if (err)
return err;
hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops);
} else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) {
err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 256));
if (err)
return err;
hvc_instantiate(0, 0, &hvc_sbi_v01_ops);
} else {
return -ENODEV;
}
return 0;
}
console_initcall(hvc_sbi_console_init);
device_initcall(hvc_sbi_init);
......@@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST
config SERIAL_EARLYCON_RISCV_SBI
bool "Early console using RISC-V SBI"
depends on RISCV_SBI_V01
depends on RISCV_SBI
select SERIAL_CORE
select SERIAL_CORE_CONSOLE
select SERIAL_EARLYCON
......
......@@ -15,17 +15,38 @@ static void sbi_putc(struct uart_port *port, unsigned char c)
sbi_console_putchar(c);
}
static void sbi_console_write(struct console *con,
const char *s, unsigned n)
static void sbi_0_1_console_write(struct console *con,
const char *s, unsigned int n)
{
struct earlycon_device *dev = con->data;
uart_console_write(&dev->port, s, n, sbi_putc);
}
static void sbi_dbcn_console_write(struct console *con,
const char *s, unsigned int n)
{
int ret;
while (n) {
ret = sbi_debug_console_write(s, n);
if (ret < 0)
break;
s += ret;
n -= ret;
}
}
static int __init early_sbi_setup(struct earlycon_device *device,
const char *opt)
{
device->con->write = sbi_console_write;
if (sbi_debug_console_available)
device->con->write = sbi_dbcn_console_write;
else if (IS_ENABLED(CONFIG_RISCV_SBI_V01))
device->con->write = sbi_0_1_console_write;
else
return -ENODEV;
return 0;
}
EARLYCON_DECLARE(sbi, early_sbi_setup);
......@@ -2,6 +2,8 @@
#ifndef __ASM_GENERIC_CHECKSUM_H
#define __ASM_GENERIC_CHECKSUM_H
#include <linux/bitops.h>
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
......@@ -31,9 +33,7 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
static inline __sum16 csum_fold(__wsum csum)
{
u32 sum = (__force u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
return (__force __sum16)~sum;
return (__force __sum16)((~sum - ror32(sum, 16)) >> 16);
}
#endif
......
......@@ -231,9 +231,10 @@ config DEBUG_INFO
in the "Debug information" choice below, indicating that debug
information will be generated for build targets.
# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which
# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
config AS_HAS_NON_CONST_LEB128
# Clang generates .uleb128 with label differences for DWARF v5, a feature that
# older binutils ports do not support when utilizing RISC-V style linker
# relaxation: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
config AS_HAS_NON_CONST_ULEB128
def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:)
choice
......@@ -258,7 +259,7 @@ config DEBUG_INFO_NONE
config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
bool "Rely on the toolchain's implicit default DWARF version"
select DEBUG_INFO
depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
help
The implicit default version of DWARF debug info produced by a
toolchain changes over time.
......@@ -282,7 +283,8 @@ config DEBUG_INFO_DWARF4
config DEBUG_INFO_DWARF5
bool "Generate DWARF Version 5 debuginfo"
select DEBUG_INFO
depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
depends on !ARCH_HAS_BROKEN_DWARF5
depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
help
Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
5.0+ accepts the -gdwarf-5 flag but only had partial support for some
......
This diff is collapsed.
......@@ -24,6 +24,41 @@ extern void my_tramp2(void *);
static unsigned long my_ip = (unsigned long)schedule;
#ifdef CONFIG_RISCV
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi sp,sp,-2*"SZREG"\n"
" "REG_S" t0,0*"SZREG"(sp)\n"
" "REG_S" ra,1*"SZREG"(sp)\n"
" call my_direct_func1\n"
" "REG_L" t0,0*"SZREG"(sp)\n"
" "REG_L" ra,1*"SZREG"(sp)\n"
" addi sp,sp,2*"SZREG"\n"
" jr t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi sp,sp,-2*"SZREG"\n"
" "REG_S" t0,0*"SZREG"(sp)\n"
" "REG_S" ra,1*"SZREG"(sp)\n"
" call my_direct_func2\n"
" "REG_L" t0,0*"SZREG"(sp)\n"
" "REG_L" ra,1*"SZREG"(sp)\n"
" addi sp,sp,2*"SZREG"\n"
" jr t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_RISCV */
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
......
......@@ -22,6 +22,47 @@ void my_direct_func2(unsigned long ip)
extern void my_tramp1(void *);
extern void my_tramp2(void *);
#ifdef CONFIG_RISCV
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi sp,sp,-3*"SZREG"\n"
" "REG_S" a0,0*"SZREG"(sp)\n"
" "REG_S" t0,1*"SZREG"(sp)\n"
" "REG_S" ra,2*"SZREG"(sp)\n"
" mv a0,t0\n"
" call my_direct_func1\n"
" "REG_L" a0,0*"SZREG"(sp)\n"
" "REG_L" t0,1*"SZREG"(sp)\n"
" "REG_L" ra,2*"SZREG"(sp)\n"
" addi sp,sp,3*"SZREG"\n"
" jr t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi sp,sp,-3*"SZREG"\n"
" "REG_S" a0,0*"SZREG"(sp)\n"
" "REG_S" t0,1*"SZREG"(sp)\n"
" "REG_S" ra,2*"SZREG"(sp)\n"
" mv a0,t0\n"
" call my_direct_func2\n"
" "REG_L" a0,0*"SZREG"(sp)\n"
" "REG_L" t0,1*"SZREG"(sp)\n"
" "REG_L" ra,2*"SZREG"(sp)\n"
" addi sp,sp,3*"SZREG"\n"
" jr t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_RISCV */
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
......
......@@ -17,6 +17,31 @@ void my_direct_func(unsigned long ip)
extern void my_tramp(void *);
#ifdef CONFIG_RISCV
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi sp,sp,-3*"SZREG"\n"
" "REG_S" a0,0*"SZREG"(sp)\n"
" "REG_S" t0,1*"SZREG"(sp)\n"
" "REG_S" ra,2*"SZREG"(sp)\n"
" mv a0,t0\n"
" call my_direct_func\n"
" "REG_L" a0,0*"SZREG"(sp)\n"
" "REG_L" t0,1*"SZREG"(sp)\n"
" "REG_L" ra,2*"SZREG"(sp)\n"
" addi sp,sp,3*"SZREG"\n"
" jr t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_RISCV */
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
......
......@@ -19,6 +19,34 @@ void my_direct_func(struct vm_area_struct *vma, unsigned long address,
extern void my_tramp(void *);
#ifdef CONFIG_RISCV
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi sp,sp,-5*"SZREG"\n"
" "REG_S" a0,0*"SZREG"(sp)\n"
" "REG_S" a1,1*"SZREG"(sp)\n"
" "REG_S" a2,2*"SZREG"(sp)\n"
" "REG_S" t0,3*"SZREG"(sp)\n"
" "REG_S" ra,4*"SZREG"(sp)\n"
" call my_direct_func\n"
" "REG_L" a0,0*"SZREG"(sp)\n"
" "REG_L" a1,1*"SZREG"(sp)\n"
" "REG_L" a2,2*"SZREG"(sp)\n"
" "REG_L" t0,3*"SZREG"(sp)\n"
" "REG_L" ra,4*"SZREG"(sp)\n"
" addi sp,sp,5*"SZREG"\n"
" jr t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_RISCV */
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
......
......@@ -16,6 +16,30 @@ void my_direct_func(struct task_struct *p)
extern void my_tramp(void *);
#ifdef CONFIG_RISCV
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi sp,sp,-3*"SZREG"\n"
" "REG_S" a0,0*"SZREG"(sp)\n"
" "REG_S" t0,1*"SZREG"(sp)\n"
" "REG_S" ra,2*"SZREG"(sp)\n"
" call my_direct_func\n"
" "REG_L" a0,0*"SZREG"(sp)\n"
" "REG_L" t0,1*"SZREG"(sp)\n"
" "REG_L" ra,2*"SZREG"(sp)\n"
" addi sp,sp,3*"SZREG"\n"
" jr t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_RISCV */
#ifdef CONFIG_X86_64
#include <asm/ibt.h>
......
......@@ -36,16 +36,14 @@ static void sigill_handler(int sig, siginfo_t *info, void *context)
regs[0] += 4;
}
static void cbo_insn(char *base, int fn)
{
uint32_t insn = MK_CBO(fn);
asm volatile(
"mv a0, %0\n"
"li a1, %1\n"
".4byte %2\n"
: : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
}
#define cbo_insn(base, fn) \
({ \
asm volatile( \
"mv a0, %0\n" \
"li a1, %1\n" \
".4byte %2\n" \
: : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \
})
static void cbo_inval(char *base) { cbo_insn(base, 0); }
static void cbo_clean(char *base) { cbo_insn(base, 1); }
......@@ -97,7 +95,7 @@ static void test_zicboz(void *arg)
block_size = pair.value;
ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
is_power_of_2(block_size), "Zicboz block size\n");
ksft_print_msg("Zicboz block size: %ld\n", block_size);
ksft_print_msg("Zicboz block size: %llu\n", block_size);
illegal_insn = false;
cbo_zero(&mem[block_size]);
......@@ -121,7 +119,7 @@ static void test_zicboz(void *arg)
for (j = 0; j < block_size; ++j) {
if (mem[i * block_size + j] != expected) {
ksft_test_result_fail("cbo.zero check\n");
ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n",
i * block_size + j, expected);
return;
}
......@@ -201,7 +199,7 @@ int main(int argc, char **argv)
pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
if (rc < 0)
ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc);
assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
......
......@@ -29,7 +29,7 @@ int main(int argc, char **argv)
/* Fail if the kernel claims not to recognize a base key. */
if ((i < 4) && (pairs[i].key != i))
ksft_exit_fail_msg("Failed to recognize base key: key != i, "
"key=%ld, i=%ld\n", pairs[i].key, i);
"key=%lld, i=%ld\n", pairs[i].key, i);
if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
continue;
......@@ -37,7 +37,7 @@ int main(int argc, char **argv)
if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
continue;
ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value);
}
out = riscv_hwprobe(pairs, 8, 0, 0, 0);
......
......@@ -18,6 +18,8 @@ struct addresses {
int *on_56_addr;
};
// Only works on 64 bit
#if __riscv_xlen == 64
static inline void do_mmaps(struct addresses *mmap_addresses)
{
/*
......@@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses)
mmap_addresses->on_56_addr =
mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
}
#endif /* __riscv_xlen == 64 */
static inline int memory_layout(void)
{
......
......@@ -27,7 +27,7 @@ int main(void)
datap = malloc(MAX_VSIZE);
if (!datap) {
ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE);
ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
exit(-1);
}
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/wait.h>
#define THIS_PROGRAM "./vstate_exec_nolibc"
int main(int argc, char **argv)
......
......@@ -60,7 +60,7 @@ int test_and_compare_child(long provided, long expected, int inherit)
}
rc = launch_test(inherit);
if (rc != expected) {
ksft_test_result_fail("Test failed, check %d != %d\n", rc,
ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
expected);
return -2;
}
......@@ -79,7 +79,7 @@ int main(void)
pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
if (rc < 0) {
ksft_test_result_fail("hwprobe() failed with %d\n", rc);
ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
return -1;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment