Commit 611c9d88 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'loongarch-6.4' of...

Merge tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:

 - Better backtraces for humanization

 - Relay BCE exceptions to userland as SIGSEGV

 - Provide kernel fpu functions

 - Optimize memory ops (memset/memcpy/memmove)

 - Optimize checksum and crc32(c) calculation

 - Add ARCH_HAS_FORTIFY_SOURCE selection

 - Add function error injection support

 - Add ftrace with direct call support

 - Add basic perf tools support

* tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (24 commits)
  tools/perf: Add basic support for LoongArch
  LoongArch: ftrace: Add direct call trampoline samples support
  LoongArch: ftrace: Add direct call support
  LoongArch: ftrace: Implement ftrace_find_callable_addr() to simplify code
  LoongArch: ftrace: Fix build error if DYNAMIC_FTRACE_WITH_REGS is not set
  LoongArch: ftrace: Abstract DYNAMIC_FTRACE_WITH_ARGS accesses
  LoongArch: Add support for function error injection
  LoongArch: Add ARCH_HAS_FORTIFY_SOURCE selection
  LoongArch: crypto: Add crc32 and crc32c hw acceleration
  LoongArch: Add checksum optimization for 64-bit system
  LoongArch: Optimize memory ops (memset/memcpy/memmove)
  LoongArch: Provide kernel fpu functions
  LoongArch: Relay BCE exceptions to userland as SIGSEGV with si_code=SEGV_BNDERR
  LoongArch: Tweak the BADV and CPUCFG.PRID lines in show_regs()
  LoongArch: Humanize the ESTAT line when showing registers
  LoongArch: Humanize the ECFG line when showing registers
  LoongArch: Humanize the EUEN line when showing registers
  LoongArch: Humanize the PRMD line when showing registers
  LoongArch: Humanize the CRMD line when showing registers
  LoongArch: Fix format of CSR lines during show_regs()
  ...
parents a1f749de 2fa5ebe3
...@@ -10,6 +10,7 @@ config LOONGARCH ...@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
...@@ -93,6 +94,7 @@ config LOONGARCH ...@@ -93,6 +94,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
...@@ -100,6 +102,7 @@ config LOONGARCH ...@@ -100,6 +102,7 @@ config LOONGARCH
select HAVE_FAST_GUP select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACER
select HAVE_GENERIC_VDSO select HAVE_GENERIC_VDSO
...@@ -118,6 +121,8 @@ config LOONGARCH ...@@ -118,6 +121,8 @@ config LOONGARCH
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ select HAVE_RSEQ
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_STACKPROTECTOR select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS select HAVE_SYSCALL_TRACEPOINTS
......
...@@ -115,6 +115,8 @@ endif ...@@ -115,6 +115,8 @@ endif
libs-y += arch/loongarch/lib/ libs-y += arch/loongarch/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
drivers-y += arch/loongarch/crypto/
# suspend and hibernation support # suspend and hibernation support
drivers-$(CONFIG_PM) += arch/loongarch/power/ drivers-$(CONFIG_PM) += arch/loongarch/power/
......
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (loongarch)"
config CRYPTO_CRC32_LOONGARCH
tristate "CRC32c and CRC32"
select CRC32
select CRYPTO_HASH
help
CRC32c and CRC32 CRC algorithms
Architecture: LoongArch with CRC32 instructions
endmenu
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for LoongArch crypto files..
#
obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o
// SPDX-License-Identifier: GPL-2.0
/*
* crc32.c - CRC32 and CRC32C using LoongArch crc* instructions
*
* Module based on mips/crypto/crc32-mips.c
*
* Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
* Copyright (C) 2018 MIPS Tech, LLC
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <linux/module.h>
#include <crypto/internal/hash.h>
#include <asm/cpu-features.h>
#include <asm/unaligned.h>
#define _CRC32(crc, value, size, type) \
do { \
__asm__ __volatile__( \
#type ".w." #size ".w" " %0, %1, %0\n\t"\
: "+r" (crc) \
: "r" (value) \
: "memory"); \
} while (0)
#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
while (len >= sizeof(u64)) {
u64 value = get_unaligned_le64(p);
CRC32(crc, value, d);
p += sizeof(u64);
len -= sizeof(u64);
}
if (len & sizeof(u32)) {
u32 value = get_unaligned_le32(p);
CRC32(crc, value, w);
p += sizeof(u32);
len -= sizeof(u32);
}
if (len & sizeof(u16)) {
u16 value = get_unaligned_le16(p);
CRC32(crc, value, h);
p += sizeof(u16);
}
if (len & sizeof(u8)) {
u8 value = *p++;
CRC32(crc, value, b);
}
return crc;
}
static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
while (len >= sizeof(u64)) {
u64 value = get_unaligned_le64(p);
CRC32C(crc, value, d);
p += sizeof(u64);
len -= sizeof(u64);
}
if (len & sizeof(u32)) {
u32 value = get_unaligned_le32(p);
CRC32C(crc, value, w);
p += sizeof(u32);
len -= sizeof(u32);
}
if (len & sizeof(u16)) {
u16 value = get_unaligned_le16(p);
CRC32C(crc, value, h);
p += sizeof(u16);
}
if (len & sizeof(u8)) {
u8 value = *p++;
CRC32C(crc, value, b);
}
return crc;
}
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
struct chksum_ctx {
u32 key;
};
struct chksum_desc_ctx {
u32 crc;
};
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = mctx->key;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set the seed.
*/
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
{
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
if (keylen != sizeof(mctx->key))
return -EINVAL;
mctx->key = get_unaligned_le32(key);
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32_loongarch_hw(ctx->crc, data, length);
return 0;
}
static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(ctx->crc, out);
return 0;
}
static int chksumc_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out);
return 0;
}
static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(ctx->crc, data, len, out);
}
static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksumc_finup(ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksum_finup(mctx->key, data, length, out);
}
static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksumc_finup(mctx->key, data, length, out);
}
static int chksum_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = 0;
return 0;
}
static int chksumc_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = ~0;
return 0;
}
static struct shash_alg crc32_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-loongarch",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksum_cra_init,
}
};
static struct shash_alg crc32c_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksumc_update,
.final = chksumc_final,
.finup = chksumc_finup,
.digest = chksumc_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-loongarch",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksumc_cra_init,
}
};
static int __init crc32_mod_init(void)
{
int err;
if (!cpu_has(CPU_FEATURE_CRC32))
return 0;
err = crypto_register_shash(&crc32_alg);
if (err)
return err;
err = crypto_register_shash(&crc32c_alg);
if (err)
return err;
return 0;
}
static void __exit crc32_mod_exit(void)
{
if (!cpu_has(CPU_FEATURE_CRC32))
return;
crypto_unregister_shash(&crc32_alg);
crypto_unregister_shash(&crc32c_alg);
}
module_init(crc32_mod_init);
module_exit(crc32_mod_exit);
MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
MODULE_LICENSE("GPL v2");
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2016 ARM Ltd.
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H
#include <linux/bitops.h>
#include <linux/in6.h>
#define _HAVE_ARCH_IPV6_CSUM
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum sum);
/*
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
* 1's complement 16-bit checksum.
*/
static inline __sum16 csum_fold(__wsum sum)
{
u32 tmp = (__force u32)sum;
/*
* swap the two 16-bit halves of sum
* if there is a carry from adding the two 16-bit halves,
* it will carry from the lower half into the upper half,
* giving us the correct sum in the upper half.
*/
return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
}
#define csum_fold csum_fold
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries. ihl is the number
* of 32-bit words and is always >= 5.
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
u64 sum;
__uint128_t tmp;
int n = ihl; /* we want it signed */
tmp = *(const __uint128_t *)iph;
iph += 16;
n -= 4;
tmp += ((tmp >> 64) | (tmp << 64));
sum = tmp >> 64;
do {
sum += *(const u32 *)iph;
iph += 4;
} while (--n > 0);
sum += ror64(sum, 32);
return csum_fold((__force __wsum)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
extern unsigned int do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
#include <asm-generic/checksum.h>
#endif /* __ASM_CHECKSUM_H */
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
struct sigcontext; struct sigcontext;
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern void _init_fpu(unsigned int); extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *); extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *);
......
...@@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * ...@@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
return &fregs->regs; return &fregs->regs;
} }
static __always_inline unsigned long
ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
{
return instruction_pointer(&fregs->regs);
}
static __always_inline void
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
{
regs_set_return_value(&fregs->regs, ip);
}
#define ftrace_regs_get_argument(fregs, n) \
regs_get_kernel_argument(&(fregs)->regs, n)
#define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
#define ftrace_regs_return_value(fregs) \
regs_return_value(&(fregs)->regs)
#define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
#define ftrace_override_function_with_return(fregs) \
override_function_with_return(&(fregs)->regs)
#define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
#define ftrace_graph_func ftrace_graph_func #define ftrace_graph_func ftrace_graph_func
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs); struct ftrace_ops *op, struct ftrace_regs *fregs);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static inline void
__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
{
regs->regs[13] = addr; /* t1 */
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
__arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#endif #endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -121,6 +121,8 @@ enum reg2bstrd_op { ...@@ -121,6 +121,8 @@ enum reg2bstrd_op {
}; };
enum reg3_op { enum reg3_op {
asrtle_op = 0x02,
asrtgt_op = 0x03,
addw_op = 0x20, addw_op = 0x20,
addd_op = 0x21, addd_op = 0x21,
subw_op = 0x22, subw_op = 0x22,
...@@ -176,6 +178,30 @@ enum reg3_op { ...@@ -176,6 +178,30 @@ enum reg3_op {
amord_op = 0x70c7, amord_op = 0x70c7,
amxorw_op = 0x70c8, amxorw_op = 0x70c8,
amxord_op = 0x70c9, amxord_op = 0x70c9,
fldgts_op = 0x70e8,
fldgtd_op = 0x70e9,
fldles_op = 0x70ea,
fldled_op = 0x70eb,
fstgts_op = 0x70ec,
fstgtd_op = 0x70ed,
fstles_op = 0x70ee,
fstled_op = 0x70ef,
ldgtb_op = 0x70f0,
ldgth_op = 0x70f1,
ldgtw_op = 0x70f2,
ldgtd_op = 0x70f3,
ldleb_op = 0x70f4,
ldleh_op = 0x70f5,
ldlew_op = 0x70f6,
ldled_op = 0x70f7,
stgtb_op = 0x70f8,
stgth_op = 0x70f9,
stgtw_op = 0x70fa,
stgtd_op = 0x70fb,
stleb_op = 0x70fc,
stleh_op = 0x70fd,
stlew_op = 0x70fe,
stled_op = 0x70ff,
}; };
enum reg3sa2_op { enum reg3sa2_op {
......
...@@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) ...@@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ECFG_VS_WIDTH 3 #define CSR_ECFG_VS_WIDTH 3
#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT) #define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT)
#define CSR_ECFG_IM_SHIFT 0 #define CSR_ECFG_IM_SHIFT 0
#define CSR_ECFG_IM_WIDTH 13 #define CSR_ECFG_IM_WIDTH 14
#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT) #define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT)
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
#define CSR_ESTAT_ESUBCODE_SHIFT 22 #define CSR_ESTAT_ESUBCODE_SHIFT 22
...@@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) ...@@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ESTAT_EXC_WIDTH 6 #define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) #define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0 #define CSR_ESTAT_IS_SHIFT 0
#define CSR_ESTAT_IS_WIDTH 15 #define CSR_ESTAT_IS_WIDTH 14
#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) #define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
#define LOONGARCH_CSR_ERA 0x6 /* ERA */ #define LOONGARCH_CSR_ERA 0x6 /* ERA */
...@@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) ...@@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI) #define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI)
#define ECFGF(hwirq) (_ULCAST_(1) << hwirq) #define ECFGF(hwirq) (_ULCAST_(1) << hwirq)
#define ESTATF_IP 0x00001fff #define ESTATF_IP 0x00003fff
#define LOONGARCH_IOCSR_FEATURES 0x8 #define LOONGARCH_IOCSR_FEATURES 0x8
#define IOCSRF_TEMP BIT_ULL(0) #define IOCSRF_TEMP BIT_ULL(0)
...@@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx) ...@@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx)
#define EXSUBCODE_ADEF 0 /* Fetch Instruction */ #define EXSUBCODE_ADEF 0 /* Fetch Instruction */
#define EXSUBCODE_ADEM 1 /* Access Memory*/ #define EXSUBCODE_ADEM 1 /* Access Memory*/
#define EXCCODE_ALE 9 /* Unalign Access */ #define EXCCODE_ALE 9 /* Unalign Access */
#define EXCCODE_OOB 10 /* Out of bounds */ #define EXCCODE_BCE 10 /* Bounds Check Error */
#define EXCCODE_SYS 11 /* System call */ #define EXCCODE_SYS 11 /* System call */
#define EXCCODE_BP 12 /* Breakpoint */ #define EXCCODE_BP 12 /* Breakpoint */
#define EXCCODE_INE 13 /* Inst. Not Exist */ #define EXCCODE_INE 13 /* Inst. Not Exist */
...@@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx) ...@@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx)
#define EXCCODE_FPE 18 /* Floating Point Exception */ #define EXCCODE_FPE 18 /* Floating Point Exception */
#define EXCSUBCODE_FPE 0 /* Floating Point Exception */ #define EXCSUBCODE_FPE 0 /* Floating Point Exception */
#define EXCSUBCODE_VFPE 1 /* Vector Exception */ #define EXCSUBCODE_VFPE 1 /* Vector Exception */
#define EXCCODE_WATCH 19 /* Watch address reference */ #define EXCCODE_WATCH 19 /* WatchPoint Exception */
#define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */
#define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */
#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */ #define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */
#define EXCCODE_BTE 21 /* Binary Trans. Exception */ #define EXCCODE_BTE 21 /* Binary Trans. Exception */
#define EXCCODE_PSI 22 /* Guest Privileged Error */ #define EXCCODE_GSPR 22 /* Guest Privileged Error */
#define EXCCODE_HYP 23 /* Hypercall */ #define EXCCODE_HVC 23 /* Hypercall */
#define EXCCODE_GCM 24 /* Guest CSR modified */ #define EXCCODE_GCM 24 /* Guest CSR modified */
#define EXCSUBCODE_GCSC 0 /* Software caused */ #define EXCSUBCODE_GCSC 0 /* Software caused */
#define EXCSUBCODE_GCHC 1 /* Hardware caused */ #define EXCSUBCODE_GCHC 1 /* Hardware caused */
#define EXCCODE_SE 25 /* Security */ #define EXCCODE_SE 25 /* Security */
#define EXCCODE_INT_START 64 /* Interrupt numbers */
#define EXCCODE_SIP0 64 #define INT_SWI0 0 /* Software Interrupts */
#define EXCCODE_SIP1 65 #define INT_SWI1 1
#define EXCCODE_IP0 66 #define INT_HWI0 2 /* Hardware Interrupts */
#define EXCCODE_IP1 67 #define INT_HWI1 3
#define EXCCODE_IP2 68 #define INT_HWI2 4
#define EXCCODE_IP3 69 #define INT_HWI3 5
#define EXCCODE_IP4 70 #define INT_HWI4 6
#define EXCCODE_IP5 71 #define INT_HWI5 7
#define EXCCODE_IP6 72 #define INT_HWI6 8
#define EXCCODE_IP7 73 #define INT_HWI7 9
#define EXCCODE_PMC 74 /* Performance Counter */ #define INT_PCOV 10 /* Performance Counter Overflow */
#define EXCCODE_TIMER 75 #define INT_TI 11 /* Timer */
#define EXCCODE_IPI 76 #define INT_IPI 12
#define EXCCODE_NMI 77 #define INT_NMI 13
#define EXCCODE_INT_END 78
#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START) /* ExcCodes corresponding to interrupts */
#define EXCCODE_INT_NUM (INT_NMI + 1)
#define EXCCODE_INT_START 64
#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)
/* FPU register names */ /* FPU register names */
#define LOONGARCH_FCSR0 $r0 #define LOONGARCH_FCSR0 $r0
......
...@@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs) ...@@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->regs[4]; return regs->regs[4];
} }
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val)
{
regs->regs[4] = val;
}
#define instruction_pointer(regs) ((regs)->csr_era) #define instruction_pointer(regs) ((regs)->csr_era)
#define profile_pc(regs) instruction_pointer(regs) #define profile_pc(regs) instruction_pointer(regs)
......
...@@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ ...@@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
......
...@@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) ...@@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
return 0; return 0;
} }
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
static inline int __get_mod(struct module **mod, unsigned long addr) static bool reachable_by_bl(unsigned long addr, unsigned long pc)
{ {
preempt_disable(); long offset = (long)addr - (long)pc;
*mod = __module_text_address(addr);
preempt_enable();
if (WARN_ON(!(*mod))) return offset >= -SZ_128M && offset < SZ_128M;
return -EINVAL;
return 0;
} }
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
...@@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) ...@@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
return NULL; return NULL;
} }
static unsigned long get_plt_addr(struct module *mod, unsigned long addr) /*
* Find the address the callsite must branch to in order to reach '*addr'.
*
* Due to the limited range of 'bl' instruction, modules may be placed too far
* away to branch directly and we must use a PLT.
*
* Returns true when '*addr' contains a reachable target address, or has been
* modified to contain a PLT address. Returns false otherwise.
*/
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{ {
unsigned long pc = rec->ip + LOONGARCH_INSN_SIZE;
struct plt_entry *plt; struct plt_entry *plt;
plt = get_ftrace_plt(mod, addr); /*
* If a custom trampoline is unreachable, rely on the ftrace_regs_caller
* trampoline which knows how to indirectly reach that trampoline through
* ops->direct_call.
*/
if (*addr != FTRACE_ADDR && *addr != FTRACE_REGS_ADDR && !reachable_by_bl(*addr, pc))
*addr = FTRACE_REGS_ADDR;
/*
* When the target is within range of the 'bl' instruction, use 'addr'
* as-is and branch to that directly.
*/
if (reachable_by_bl(*addr, pc))
return true;
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
* NOTE: __module_text_address() must be called with preemption
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
}
if (WARN_ON(!mod))
return false;
plt = get_ftrace_plt(mod, *addr);
if (!plt) { if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)addr); pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return -EINVAL; return false;
} }
return (unsigned long)plt; *addr = (unsigned long)plt;
return true;
}
#else /* !CONFIG_MODULES */
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{
return true;
} }
#endif #endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{ {
u32 old, new; u32 old, new;
unsigned long pc; unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE; pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES if (!ftrace_find_callable_addr(rec, NULL, &addr))
offset = (long)pc - (long)addr; return -EINVAL;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
old_addr = get_plt_addr(mod, old_addr); if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
} return -EINVAL;
#endif
new = larch_insn_gen_bl(pc, addr); new = larch_insn_gen_bl(pc, addr);
old = larch_insn_gen_bl(pc, old_addr); old = larch_insn_gen_bl(pc, old_addr);
return ftrace_modify_code(pc, old, new, true); return ftrace_modify_code(pc, old, new, true);
} }
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_update_ftrace_func(ftrace_func_t func)
...@@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ...@@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{ {
u32 old, new; u32 old, new;
unsigned long pc; unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE; pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES if (!ftrace_find_callable_addr(rec, NULL, &addr))
offset = (long)pc - (long)addr; return -EINVAL;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
}
#endif
old = larch_insn_gen_nop(); old = larch_insn_gen_nop();
new = larch_insn_gen_bl(pc, addr); new = larch_insn_gen_bl(pc, addr);
...@@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad ...@@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad
{ {
u32 old, new; u32 old, new;
unsigned long pc; unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE; pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES if (!ftrace_find_callable_addr(rec, NULL, &addr))
offset = (long)pc - (long)addr; return -EINVAL;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
}
#endif
new = larch_insn_gen_nop(); new = larch_insn_gen_nop();
old = larch_insn_gen_bl(pc, addr); old = larch_insn_gen_bl(pc, addr);
......
...@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex) ...@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER ade ade badv BUILD_HANDLER ade ade badv
BUILD_HANDLER ale ale badv BUILD_HANDLER ale ale badv
BUILD_HANDLER bce bce none
BUILD_HANDLER bp bp none BUILD_HANDLER bp bp none
BUILD_HANDLER fpe fpe fcsr BUILD_HANDLER fpe fpe fcsr
BUILD_HANDLER fpu fpu none BUILD_HANDLER fpu fpu none
......
...@@ -92,7 +92,7 @@ static int __init get_ipi_irq(void) ...@@ -92,7 +92,7 @@ static int __init get_ipi_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d) if (d)
return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START); return irq_create_mapping(d, INT_IPI);
return -EINVAL; return -EINVAL;
} }
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#include <linux/cpu.h>
#include <linux/init.h>
#include <asm/fpu.h>
#include <asm/smp.h>
static DEFINE_PER_CPU(bool, in_kernel_fpu);
void kernel_fpu_begin(void)
{
preempt_disable();
WARN_ON(this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, true);
if (!is_fpu_owner())
enable_fpu();
else
_save_fp(&current->thread.fpu);
write_fcsr(LOONGARCH_FCSR0, 0);
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
WARN_ON(!this_cpu_read(in_kernel_fpu));
if (!is_fpu_owner())
disable_fpu();
else
_restore_fp(&current->thread.fpu);
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
.if \allregs .if \allregs
PTR_S tp, sp, PT_R2 PTR_S tp, sp, PT_R2
PTR_S t0, sp, PT_R12 PTR_S t0, sp, PT_R12
PTR_S t1, sp, PT_R13
PTR_S t2, sp, PT_R14 PTR_S t2, sp, PT_R14
PTR_S t3, sp, PT_R15 PTR_S t3, sp, PT_R15
PTR_S t4, sp, PT_R16 PTR_S t4, sp, PT_R16
...@@ -64,6 +63,8 @@ ...@@ -64,6 +63,8 @@
PTR_S zero, sp, PT_R0 PTR_S zero, sp, PT_R0
.endif .endif
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
move t1, zero
PTR_S t1, sp, PT_R13
PTR_ADDI t8, sp, PT_SIZE PTR_ADDI t8, sp, PT_SIZE
PTR_S t8, sp, PT_R3 PTR_S t8, sp, PT_R3
.endm .endm
...@@ -104,8 +105,12 @@ ftrace_common_return: ...@@ -104,8 +105,12 @@ ftrace_common_return:
PTR_L a7, sp, PT_R11 PTR_L a7, sp, PT_R11
PTR_L fp, sp, PT_R22 PTR_L fp, sp, PT_R22
PTR_L t0, sp, PT_ERA PTR_L t0, sp, PT_ERA
PTR_L t1, sp, PT_R13
PTR_ADDI sp, sp, PT_SIZE PTR_ADDI sp, sp, PT_SIZE
bnez t1, .Ldirect
jr t0 jr t0
.Ldirect:
jr t1
SYM_CODE_END(ftrace_common) SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller) SYM_CODE_START(ftrace_caller)
...@@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler) ...@@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler)
jr ra jr ra
SYM_CODE_END(return_to_handler) SYM_CODE_END(return_to_handler)
#endif #endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
jr t0
SYM_CODE_END(ftrace_stub_direct_tramp)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
...@@ -461,7 +461,7 @@ static int get_pmc_irq(void) ...@@ -461,7 +461,7 @@ static int get_pmc_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d) if (d)
return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START); return irq_create_mapping(d, INT_PCOV);
return -EINVAL; return -EINVAL;
} }
......
...@@ -133,7 +133,7 @@ static int get_timer_irq(void) ...@@ -133,7 +133,7 @@ static int get_timer_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d) if (d)
return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START); return irq_create_mapping(d, INT_TI);
return -EINVAL; return -EINVAL;
} }
......
This diff is collapsed.
...@@ -4,4 +4,6 @@ ...@@ -4,4 +4,6 @@
# #
lib-y += delay.o memset.o memcpy.o memmove.o \ lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o dump_tlb.o unaligned.o clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
...@@ -13,7 +13,14 @@ ...@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\(): .L_fixup_handle_\to\():
addi.d a0, a1, (\to) * (-8) sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a1, -\to
jr ra jr ra
.endr .endr
...@@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic) ...@@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1 2: move a0, a1
jr ra jr ra
_asm_extable 1b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_generic) SYM_FUNC_END(__clear_user_generic)
/* /*
...@@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic) ...@@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic)
* a1: size * a1: size
*/ */
SYM_FUNC_START(__clear_user_fast) SYM_FUNC_START(__clear_user_fast)
beqz a1, 10f sltui t0, a1, 9
bnez t0, .Lsmall
ori a2, zero, 64 add.d a2, a0, a1
blt a1, a2, 9f 0: st.d zero, a0, 0
/* align up address */
addi.d a0, a0, 8
bstrins.d a0, zero, 2, 0
addi.d a3, a2, -64
bgeu a0, a3, .Llt64
/* set 64 bytes at a time */ /* set 64 bytes at a time */
.Lloop64:
1: st.d zero, a0, 0 1: st.d zero, a0, 0
2: st.d zero, a0, 8 2: st.d zero, a0, 8
3: st.d zero, a0, 16 3: st.d zero, a0, 16
...@@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast) ...@@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast)
6: st.d zero, a0, 40 6: st.d zero, a0, 40
7: st.d zero, a0, 48 7: st.d zero, a0, 48
8: st.d zero, a0, 56 8: st.d zero, a0, 56
addi.d a0, a0, 64 addi.d a0, a0, 64
addi.d a1, a1, -64 bltu a0, a3, .Lloop64
bge a1, a2, 1b
beqz a1, 10f
/* set the remaining bytes */ /* set the remaining bytes */
9: st.b zero, a0, 0 .Llt64:
addi.d a0, a0, 1 addi.d a3, a2, -32
addi.d a1, a1, -1 bgeu a0, a3, .Llt32
bgt a1, zero, 9b 9: st.d zero, a0, 0
10: st.d zero, a0, 8
11: st.d zero, a0, 16
12: st.d zero, a0, 24
addi.d a0, a0, 32
.Llt32:
addi.d a3, a2, -16
bgeu a0, a3, .Llt16
13: st.d zero, a0, 0
14: st.d zero, a0, 8
addi.d a0, a0, 16
.Llt16:
addi.d a3, a2, -8
bgeu a0, a3, .Llt8
15: st.d zero, a0, 0
.Llt8:
16: st.d zero, a2, -8
/* return */ /* return */
10: move a0, a1 move a0, zero
jr ra
.align 4
.Lsmall:
pcaddi t0, 4
slli.d a2, a1, 4
add.d t0, t0, a2
jr t0
.align 4
move a0, zero
jr ra
.align 4
17: st.b zero, a0, 0
move a0, zero
jr ra
.align 4
18: st.h zero, a0, 0
move a0, zero
jr ra
.align 4
19: st.h zero, a0, 0
20: st.b zero, a0, 2
move a0, zero
jr ra
.align 4
21: st.w zero, a0, 0
move a0, zero
jr ra
.align 4
22: st.w zero, a0, 0
23: st.b zero, a0, 4
move a0, zero
jr ra
.align 4
24: st.w zero, a0, 0
25: st.h zero, a0, 4
move a0, zero
jr ra
.align 4
26: st.w zero, a0, 0
27: st.w zero, a0, 3
move a0, zero
jr ra
.align 4
28: st.d zero, a0, 0
move a0, zero
jr ra jr ra
/* fixup and ex_table */ /* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1 _asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2 _asm_extable 3b, .L_fixup_handle_2
...@@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast) ...@@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast)
_asm_extable 7b, .L_fixup_handle_6 _asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7 _asm_extable 8b, .L_fixup_handle_7
_asm_extable 9b, .L_fixup_handle_0 _asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_1
_asm_extable 11b, .L_fixup_handle_2
_asm_extable 12b, .L_fixup_handle_3
_asm_extable 13b, .L_fixup_handle_0
_asm_extable 14b, .L_fixup_handle_1
_asm_extable 15b, .L_fixup_handle_0
_asm_extable 16b, .L_fixup_handle_1
_asm_extable 17b, .L_fixup_handle_s0
_asm_extable 18b, .L_fixup_handle_s0
_asm_extable 19b, .L_fixup_handle_s0
_asm_extable 20b, .L_fixup_handle_s2
_asm_extable 21b, .L_fixup_handle_s0
_asm_extable 22b, .L_fixup_handle_s0
_asm_extable 23b, .L_fixup_handle_s4
_asm_extable 24b, .L_fixup_handle_s0
_asm_extable 25b, .L_fixup_handle_s4
_asm_extable 26b, .L_fixup_handle_s0
_asm_extable 27b, .L_fixup_handle_s4
_asm_extable 28b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_fast) SYM_FUNC_END(__clear_user_fast)
...@@ -13,7 +13,14 @@ ...@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\(): .L_fixup_handle_\to\():
addi.d a0, a2, (\to) * (-8) sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a2, -\to
jr ra jr ra
.endr .endr
...@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic) ...@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2 3: move a0, a2
jr ra jr ra
_asm_extable 1b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_s0
_asm_extable 2b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_generic) SYM_FUNC_END(__copy_user_generic)
/* /*
...@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic) ...@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic)
* a2: n * a2: n
*/ */
SYM_FUNC_START(__copy_user_fast) SYM_FUNC_START(__copy_user_fast)
beqz a2, 19f sltui t0, a2, 9
bnez t0, .Lsmall
ori a3, zero, 64 add.d a3, a1, a2
blt a2, a3, 17f add.d a2, a0, a2
0: ld.d t0, a1, 0
1: st.d t0, a0, 0
/* copy 64 bytes at a time */ /* align up destination address */
1: ld.d t0, a1, 0 andi t1, a0, 7
2: ld.d t1, a1, 8 sub.d t0, zero, t1
3: ld.d t2, a1, 16 addi.d t0, t0, 8
4: ld.d t3, a1, 24 add.d a1, a1, t0
5: ld.d t4, a1, 32 add.d a0, a0, t0
6: ld.d t5, a1, 40
7: ld.d t6, a1, 48
8: ld.d t7, a1, 56
9: st.d t0, a0, 0
10: st.d t1, a0, 8
11: st.d t2, a0, 16
12: st.d t3, a0, 24
13: st.d t4, a0, 32
14: st.d t5, a0, 40
15: st.d t6, a0, 48
16: st.d t7, a0, 56
addi.d a0, a0, 64 addi.d a4, a3, -64
addi.d a1, a1, 64 bgeu a1, a4, .Llt64
addi.d a2, a2, -64
bge a2, a3, 1b
beqz a2, 19f /* copy 64 bytes at a time */
.Lloop64:
2: ld.d t0, a1, 0
3: ld.d t1, a1, 8
4: ld.d t2, a1, 16
5: ld.d t3, a1, 24
6: ld.d t4, a1, 32
7: ld.d t5, a1, 40
8: ld.d t6, a1, 48
9: ld.d t7, a1, 56
addi.d a1, a1, 64
10: st.d t0, a0, 0
11: st.d t1, a0, 8
12: st.d t2, a0, 16
13: st.d t3, a0, 24
14: st.d t4, a0, 32
15: st.d t5, a0, 40
16: st.d t6, a0, 48
17: st.d t7, a0, 56
addi.d a0, a0, 64
bltu a1, a4, .Lloop64
/* copy the remaining bytes */ /* copy the remaining bytes */
17: ld.b t0, a1, 0 .Llt64:
18: st.b t0, a0, 0 addi.d a4, a3, -32
addi.d a0, a0, 1 bgeu a1, a4, .Llt32
addi.d a1, a1, 1 18: ld.d t0, a1, 0
addi.d a2, a2, -1 19: ld.d t1, a1, 8
bgt a2, zero, 17b 20: ld.d t2, a1, 16
21: ld.d t3, a1, 24
addi.d a1, a1, 32
22: st.d t0, a0, 0
23: st.d t1, a0, 8
24: st.d t2, a0, 16
25: st.d t3, a0, 24
addi.d a0, a0, 32
.Llt32:
addi.d a4, a3, -16
bgeu a1, a4, .Llt16
26: ld.d t0, a1, 0
27: ld.d t1, a1, 8
addi.d a1, a1, 16
28: st.d t0, a0, 0
29: st.d t1, a0, 8
addi.d a0, a0, 16
.Llt16:
addi.d a4, a3, -8
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
.Llt8:
32: ld.d t0, a3, -8
33: st.d t0, a2, -8
/* return */ /* return */
19: move a0, a2 move a0, zero
jr ra
.align 5
.Lsmall:
pcaddi t0, 8
slli.d a3, a2, 5
add.d t0, t0, a3
jr t0
.align 5
move a0, zero
jr ra
.align 5
34: ld.b t0, a1, 0
35: st.b t0, a0, 0
move a0, zero
jr ra
.align 5
36: ld.h t0, a1, 0
37: st.h t0, a0, 0
move a0, zero
jr ra
.align 5
38: ld.h t0, a1, 0
39: ld.b t1, a1, 2
40: st.h t0, a0, 0
41: st.b t1, a0, 2
move a0, zero
jr ra
.align 5
42: ld.w t0, a1, 0
43: st.w t0, a0, 0
move a0, zero
jr ra
.align 5
44: ld.w t0, a1, 0
45: ld.b t1, a1, 4
46: st.w t0, a0, 0
47: st.b t1, a0, 4
move a0, zero
jr ra
.align 5
48: ld.w t0, a1, 0
49: ld.h t1, a1, 4
50: st.w t0, a0, 0
51: st.h t1, a0, 4
move a0, zero
jr ra
.align 5
52: ld.w t0, a1, 0
53: ld.w t1, a1, 3
54: st.w t0, a0, 0
55: st.w t1, a0, 3
move a0, zero
jr ra
.align 5
56: ld.d t0, a1, 0
57: st.d t0, a0, 0
move a0, zero
jr ra jr ra
/* fixup and ex_table */ /* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1 _asm_extable 2b, .L_fixup_handle_0
_asm_extable 3b, .L_fixup_handle_2 _asm_extable 3b, .L_fixup_handle_0
_asm_extable 4b, .L_fixup_handle_3 _asm_extable 4b, .L_fixup_handle_0
_asm_extable 5b, .L_fixup_handle_4 _asm_extable 5b, .L_fixup_handle_0
_asm_extable 6b, .L_fixup_handle_5 _asm_extable 6b, .L_fixup_handle_0
_asm_extable 7b, .L_fixup_handle_6 _asm_extable 7b, .L_fixup_handle_0
_asm_extable 8b, .L_fixup_handle_7 _asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0 _asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_1 _asm_extable 10b, .L_fixup_handle_0
_asm_extable 11b, .L_fixup_handle_2 _asm_extable 11b, .L_fixup_handle_1
_asm_extable 12b, .L_fixup_handle_3 _asm_extable 12b, .L_fixup_handle_2
_asm_extable 13b, .L_fixup_handle_4 _asm_extable 13b, .L_fixup_handle_3
_asm_extable 14b, .L_fixup_handle_5 _asm_extable 14b, .L_fixup_handle_4
_asm_extable 15b, .L_fixup_handle_6 _asm_extable 15b, .L_fixup_handle_5
_asm_extable 16b, .L_fixup_handle_7 _asm_extable 16b, .L_fixup_handle_6
_asm_extable 17b, .L_fixup_handle_0 _asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0 _asm_extable 18b, .L_fixup_handle_0
_asm_extable 19b, .L_fixup_handle_0
_asm_extable 20b, .L_fixup_handle_0
_asm_extable 21b, .L_fixup_handle_0
_asm_extable 22b, .L_fixup_handle_0
_asm_extable 23b, .L_fixup_handle_1
_asm_extable 24b, .L_fixup_handle_2
_asm_extable 25b, .L_fixup_handle_3
_asm_extable 26b, .L_fixup_handle_0
_asm_extable 27b, .L_fixup_handle_0
_asm_extable 28b, .L_fixup_handle_0
_asm_extable 29b, .L_fixup_handle_1
_asm_extable 30b, .L_fixup_handle_0
_asm_extable 31b, .L_fixup_handle_0
_asm_extable 32b, .L_fixup_handle_0
_asm_extable 33b, .L_fixup_handle_1
_asm_extable 34b, .L_fixup_handle_s0
_asm_extable 35b, .L_fixup_handle_s0
_asm_extable 36b, .L_fixup_handle_s0
_asm_extable 37b, .L_fixup_handle_s0
_asm_extable 38b, .L_fixup_handle_s0
_asm_extable 39b, .L_fixup_handle_s0
_asm_extable 40b, .L_fixup_handle_s0
_asm_extable 41b, .L_fixup_handle_s2
_asm_extable 42b, .L_fixup_handle_s0
_asm_extable 43b, .L_fixup_handle_s0
_asm_extable 44b, .L_fixup_handle_s0
_asm_extable 45b, .L_fixup_handle_s0
_asm_extable 46b, .L_fixup_handle_s0
_asm_extable 47b, .L_fixup_handle_s4
_asm_extable 48b, .L_fixup_handle_s0
_asm_extable 49b, .L_fixup_handle_s0
_asm_extable 50b, .L_fixup_handle_s0
_asm_extable 51b, .L_fixup_handle_s4
_asm_extable 52b, .L_fixup_handle_s0
_asm_extable 53b, .L_fixup_handle_s0
_asm_extable 54b, .L_fixup_handle_s0
_asm_extable 55b, .L_fixup_handle_s4
_asm_extable 56b, .L_fixup_handle_s0
_asm_extable 57b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_fast) SYM_FUNC_END(__copy_user_fast)
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2019-2020 Arm Ltd.
#include <linux/compiler.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
#include <net/checksum.h>
static u64 accumulate(u64 sum, u64 data)
{
sum += data;
if (sum < data)
sum += 1;
return sum;
}
/*
* We over-read the buffer and this makes KASAN unhappy. Instead, disable
* instrumentation and call kasan explicitly.
*/
unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
{
unsigned int offset, shift, sum;
const u64 *ptr;
u64 data, sum64 = 0;
if (unlikely(len == 0))
return 0;
offset = (unsigned long)buff & 7;
/*
* This is to all intents and purposes safe, since rounding down cannot
* result in a different page or cache line being accessed, and @buff
* should absolutely not be pointing to anything read-sensitive. We do,
* however, have to be careful not to piss off KASAN, which means using
* unchecked reads to accommodate the head and tail, for which we'll
* compensate with an explicit check up-front.
*/
kasan_check_read(buff, len);
ptr = (u64 *)(buff - offset);
len = len + offset - 8;
/*
* Head: zero out any excess leading bytes. Shifting back by the same
* amount should be at least as fast as any other way of handling the
* odd/even alignment, and means we can ignore it until the very end.
*/
shift = offset * 8;
data = *ptr++;
data = (data >> shift) << shift;
/*
* Body: straightforward aligned loads from here on (the paired loads
* underlying the quadword type still only need dword alignment). The
* main loop strictly excludes the tail, so the second loop will always
* run at least once.
*/
while (unlikely(len > 64)) {
__uint128_t tmp1, tmp2, tmp3, tmp4;
tmp1 = *(__uint128_t *)ptr;
tmp2 = *(__uint128_t *)(ptr + 2);
tmp3 = *(__uint128_t *)(ptr + 4);
tmp4 = *(__uint128_t *)(ptr + 6);
len -= 64;
ptr += 8;
/* This is the "don't dump the carry flag into a GPR" idiom */
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp2 += (tmp2 >> 64) | (tmp2 << 64);
tmp3 += (tmp3 >> 64) | (tmp3 << 64);
tmp4 += (tmp4 >> 64) | (tmp4 << 64);
tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
tmp3 += (tmp3 >> 64) | (tmp3 << 64);
tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp1 = ((tmp1 >> 64) << 64) | sum64;
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
sum64 = tmp1 >> 64;
}
while (len > 8) {
__uint128_t tmp;
sum64 = accumulate(sum64, data);
tmp = *(__uint128_t *)ptr;
len -= 16;
ptr += 2;
data = tmp >> 64;
sum64 = accumulate(sum64, tmp);
}
if (len > 0) {
sum64 = accumulate(sum64, data);
data = *ptr;
len -= 8;
}
/*
* Tail: zero any over-read bytes similarly to the head, again
* preserving odd/even alignment.
*/
shift = len * -8;
data = (data << shift) >> shift;
sum64 = accumulate(sum64, data);
/* Finally, folding */
sum64 += (sum64 >> 32) | (sum64 << 32);
sum = sum64 >> 32;
sum += (sum >> 16) | (sum << 16);
if (offset & 1)
return (u16)swab32(sum);
return sum >> 16;
}
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum csum)
{
__uint128_t src, dst;
u64 sum = (__force u64)csum;
src = *(const __uint128_t *)saddr->s6_addr;
dst = *(const __uint128_t *)daddr->s6_addr;
sum += (__force u32)htonl(len);
sum += (u32)proto << 24;
src += (src >> 64) | (src << 64);
dst += (dst >> 64) | (dst << 64);
sum = accumulate(sum, src >> 64);
sum = accumulate(sum, dst >> 64);
sum += ((sum >> 32) | (sum << 32));
return csum_fold((__force __wsum)(sum >> 32));
}
EXPORT_SYMBOL(csum_ipv6_magic);
// SPDX-License-Identifier: GPL-2.0
#include <linux/error-injection.h>
#include <linux/kprobes.h>
void override_function_with_return(struct pt_regs *regs)
{
instruction_pointer_set(regs, regs->regs[1]);
}
NOKPROBE_SYMBOL(override_function_with_return);
...@@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic) ...@@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic)
SYM_FUNC_END(__memcpy_generic) SYM_FUNC_END(__memcpy_generic)
_ASM_NOKPROBE(__memcpy_generic) _ASM_NOKPROBE(__memcpy_generic)
.align 5
SYM_FUNC_START_NOALIGN(__memcpy_small)
pcaddi t0, 8
slli.d a2, a2, 5
add.d t0, t0, a2
jr t0
.align 5
0: jr ra
.align 5
1: ld.b t0, a1, 0
st.b t0, a0, 0
jr ra
.align 5
2: ld.h t0, a1, 0
st.h t0, a0, 0
jr ra
.align 5
3: ld.h t0, a1, 0
ld.b t1, a1, 2
st.h t0, a0, 0
st.b t1, a0, 2
jr ra
.align 5
4: ld.w t0, a1, 0
st.w t0, a0, 0
jr ra
.align 5
5: ld.w t0, a1, 0
ld.b t1, a1, 4
st.w t0, a0, 0
st.b t1, a0, 4
jr ra
.align 5
6: ld.w t0, a1, 0
ld.h t1, a1, 4
st.w t0, a0, 0
st.h t1, a0, 4
jr ra
.align 5
7: ld.w t0, a1, 0
ld.w t1, a1, 3
st.w t0, a0, 0
st.w t1, a0, 3
jr ra
.align 5
8: ld.d t0, a1, 0
st.d t0, a0, 0
jr ra
SYM_FUNC_END(__memcpy_small)
_ASM_NOKPROBE(__memcpy_small)
/* /*
* void *__memcpy_fast(void *dst, const void *src, size_t n) * void *__memcpy_fast(void *dst, const void *src, size_t n)
* *
...@@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic) ...@@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic)
* a2: n * a2: n
*/ */
SYM_FUNC_START(__memcpy_fast) SYM_FUNC_START(__memcpy_fast)
move a3, a0 sltui t0, a2, 9
beqz a2, 3f bnez t0, __memcpy_small
add.d a3, a1, a2
add.d a2, a0, a2
ld.d a6, a1, 0
ld.d a7, a3, -8
/* align up destination address */
andi t1, a0, 7
sub.d t0, zero, t1
addi.d t0, t0, 8
add.d a1, a1, t0
add.d a5, a0, t0
ori a4, zero, 64 addi.d a4, a3, -64
blt a2, a4, 2f bgeu a1, a4, .Llt64
/* copy 64 bytes at a time */ /* copy 64 bytes at a time */
1: ld.d t0, a1, 0 .Lloop64:
ld.d t0, a1, 0
ld.d t1, a1, 8 ld.d t1, a1, 8
ld.d t2, a1, 16 ld.d t2, a1, 16
ld.d t3, a1, 24 ld.d t3, a1, 24
...@@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast) ...@@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast)
ld.d t5, a1, 40 ld.d t5, a1, 40
ld.d t6, a1, 48 ld.d t6, a1, 48
ld.d t7, a1, 56 ld.d t7, a1, 56
st.d t0, a0, 0
st.d t1, a0, 8
st.d t2, a0, 16
st.d t3, a0, 24
st.d t4, a0, 32
st.d t5, a0, 40
st.d t6, a0, 48
st.d t7, a0, 56
addi.d a0, a0, 64
addi.d a1, a1, 64 addi.d a1, a1, 64
addi.d a2, a2, -64 st.d t0, a5, 0
bge a2, a4, 1b st.d t1, a5, 8
st.d t2, a5, 16
beqz a2, 3f st.d t3, a5, 24
st.d t4, a5, 32
st.d t5, a5, 40
st.d t6, a5, 48
st.d t7, a5, 56
addi.d a5, a5, 64
bltu a1, a4, .Lloop64
/* copy the remaining bytes */ /* copy the remaining bytes */
2: ld.b t0, a1, 0 .Llt64:
st.b t0, a0, 0 addi.d a4, a3, -32
addi.d a0, a0, 1 bgeu a1, a4, .Llt32
addi.d a1, a1, 1 ld.d t0, a1, 0
addi.d a2, a2, -1 ld.d t1, a1, 8
bgt a2, zero, 2b ld.d t2, a1, 16
ld.d t3, a1, 24
addi.d a1, a1, 32
st.d t0, a5, 0
st.d t1, a5, 8
st.d t2, a5, 16
st.d t3, a5, 24
addi.d a5, a5, 32
.Llt32:
addi.d a4, a3, -16
bgeu a1, a4, .Llt16
ld.d t0, a1, 0
ld.d t1, a1, 8
addi.d a1, a1, 16
st.d t0, a5, 0
st.d t1, a5, 8
addi.d a5, a5, 16
.Llt16:
addi.d a4, a3, -8
bgeu a1, a4, .Llt8
ld.d t0, a1, 0
st.d t0, a5, 0
.Llt8:
st.d a6, a0, 0
st.d a7, a2, -8
/* return */ /* return */
3: move a0, a3
jr ra jr ra
SYM_FUNC_END(__memcpy_fast) SYM_FUNC_END(__memcpy_fast)
_ASM_NOKPROBE(__memcpy_fast) _ASM_NOKPROBE(__memcpy_fast)
...@@ -11,23 +11,9 @@ ...@@ -11,23 +11,9 @@
#include <asm/regdef.h> #include <asm/regdef.h>
SYM_FUNC_START(memmove) SYM_FUNC_START(memmove)
blt a0, a1, 1f /* dst < src, memcpy */ blt a0, a1, memcpy /* dst < src, memcpy */
blt a1, a0, 3f /* src < dst, rmemcpy */ blt a1, a0, rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */ jr ra /* dst == src, return */
/* if (src - dst) < 64, copy 1 byte at a time */
1: ori a3, zero, 64
sub.d t0, a1, a0
blt t0, a3, 2f
b memcpy
2: b __memcpy_generic
/* if (dst - src) < 64, copy 1 byte at a time */
3: ori a3, zero, 64
sub.d t0, a0, a1
blt t0, a3, 4f
b rmemcpy
4: b __rmemcpy_generic
SYM_FUNC_END(memmove) SYM_FUNC_END(memmove)
_ASM_NOKPROBE(memmove) _ASM_NOKPROBE(memmove)
...@@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic) ...@@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic)
* a2: n * a2: n
*/ */
SYM_FUNC_START(__rmemcpy_fast) SYM_FUNC_START(__rmemcpy_fast)
move a3, a0 sltui t0, a2, 9
beqz a2, 3f bnez t0, __memcpy_small
add.d a0, a0, a2 add.d a3, a1, a2
add.d a1, a1, a2 add.d a2, a0, a2
ld.d a6, a1, 0
ld.d a7, a3, -8
/* align up destination address */
andi t1, a2, 7
sub.d a3, a3, t1
sub.d a5, a2, t1
ori a4, zero, 64 addi.d a4, a1, 64
blt a2, a4, 2f bgeu a4, a3, .Llt64
/* copy 64 bytes at a time */ /* copy 64 bytes at a time */
1: ld.d t0, a1, -8 .Lloop64:
ld.d t1, a1, -16 ld.d t0, a3, -8
ld.d t2, a1, -24 ld.d t1, a3, -16
ld.d t3, a1, -32 ld.d t2, a3, -24
ld.d t4, a1, -40 ld.d t3, a3, -32
ld.d t5, a1, -48 ld.d t4, a3, -40
ld.d t6, a1, -56 ld.d t5, a3, -48
ld.d t7, a1, -64 ld.d t6, a3, -56
st.d t0, a0, -8 ld.d t7, a3, -64
st.d t1, a0, -16 addi.d a3, a3, -64
st.d t2, a0, -24 st.d t0, a5, -8
st.d t3, a0, -32 st.d t1, a5, -16
st.d t4, a0, -40 st.d t2, a5, -24
st.d t5, a0, -48 st.d t3, a5, -32
st.d t6, a0, -56 st.d t4, a5, -40
st.d t7, a0, -64 st.d t5, a5, -48
st.d t6, a5, -56
addi.d a0, a0, -64 st.d t7, a5, -64
addi.d a1, a1, -64 addi.d a5, a5, -64
addi.d a2, a2, -64 bltu a4, a3, .Lloop64
bge a2, a4, 1b
beqz a2, 3f
/* copy the remaining bytes */ /* copy the remaining bytes */
2: ld.b t0, a1, -1 .Llt64:
st.b t0, a0, -1 addi.d a4, a1, 32
addi.d a0, a0, -1 bgeu a4, a3, .Llt32
addi.d a1, a1, -1 ld.d t0, a3, -8
addi.d a2, a2, -1 ld.d t1, a3, -16
bgt a2, zero, 2b ld.d t2, a3, -24
ld.d t3, a3, -32
addi.d a3, a3, -32
st.d t0, a5, -8
st.d t1, a5, -16
st.d t2, a5, -24
st.d t3, a5, -32
addi.d a5, a5, -32
.Llt32:
addi.d a4, a1, 16
bgeu a4, a3, .Llt16
ld.d t0, a3, -8
ld.d t1, a3, -16
addi.d a3, a3, -16
st.d t0, a5, -8
st.d t1, a5, -16
addi.d a5, a5, -16
.Llt16:
addi.d a4, a1, 8
bgeu a4, a3, .Llt8
ld.d t0, a3, -8
st.d t0, a5, -8
.Llt8:
st.d a6, a0, 0
st.d a7, a2, -8
/* return */ /* return */
3: move a0, a3
jr ra jr ra
SYM_FUNC_END(__rmemcpy_fast) SYM_FUNC_END(__rmemcpy_fast)
_ASM_NOKPROBE(__rmemcpy_fast) _ASM_NOKPROBE(__rmemcpy_fast)
...@@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic) ...@@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic)
* a2: n * a2: n
*/ */
SYM_FUNC_START(__memset_fast) SYM_FUNC_START(__memset_fast)
move a3, a0
beqz a2, 3f
ori a4, zero, 64
blt a2, a4, 2f
/* fill a1 to 64 bits */ /* fill a1 to 64 bits */
fill_to_64 a1 fill_to_64 a1
/* set 64 bytes at a time */ sltui t0, a2, 9
1: st.d a1, a0, 0 bnez t0, .Lsmall
st.d a1, a0, 8
st.d a1, a0, 16
st.d a1, a0, 24
st.d a1, a0, 32
st.d a1, a0, 40
st.d a1, a0, 48
st.d a1, a0, 56
addi.d a0, a0, 64 add.d a2, a0, a2
addi.d a2, a2, -64 st.d a1, a0, 0
bge a2, a4, 1b
beqz a2, 3f /* align up address */
addi.d a3, a0, 8
bstrins.d a3, zero, 2, 0
addi.d a4, a2, -64
bgeu a3, a4, .Llt64
/* set 64 bytes at a time */
.Lloop64:
st.d a1, a3, 0
st.d a1, a3, 8
st.d a1, a3, 16
st.d a1, a3, 24
st.d a1, a3, 32
st.d a1, a3, 40
st.d a1, a3, 48
st.d a1, a3, 56
addi.d a3, a3, 64
bltu a3, a4, .Lloop64
/* set the remaining bytes */ /* set the remaining bytes */
2: st.b a1, a0, 0 .Llt64:
addi.d a0, a0, 1 addi.d a4, a2, -32
addi.d a2, a2, -1 bgeu a3, a4, .Llt32
bgt a2, zero, 2b st.d a1, a3, 0
st.d a1, a3, 8
st.d a1, a3, 16
st.d a1, a3, 24
addi.d a3, a3, 32
.Llt32:
addi.d a4, a2, -16
bgeu a3, a4, .Llt16
st.d a1, a3, 0
st.d a1, a3, 8
addi.d a3, a3, 16
.Llt16:
addi.d a4, a2, -8
bgeu a3, a4, .Llt8
st.d a1, a3, 0
.Llt8:
st.d a1, a2, -8
/* return */ /* return */
3: move a0, a3 jr ra
.align 4
.Lsmall:
pcaddi t0, 4
slli.d a2, a2, 4
add.d t0, t0, a2
jr t0
.align 4
0: jr ra
.align 4
1: st.b a1, a0, 0
jr ra
.align 4
2: st.h a1, a0, 0
jr ra
.align 4
3: st.h a1, a0, 0
st.b a1, a0, 2
jr ra
.align 4
4: st.w a1, a0, 0
jr ra
.align 4
5: st.w a1, a0, 0
st.b a1, a0, 4
jr ra
.align 4
6: st.w a1, a0, 0
st.h a1, a0, 4
jr ra
.align 4
7: st.w a1, a0, 0
st.w a1, a0, 3
jr ra
.align 4
8: st.d a1, a0, 0
jr ra jr ra
SYM_FUNC_END(__memset_fast) SYM_FUNC_END(__memset_fast)
_ASM_NOKPROBE(__memset_fast) _ASM_NOKPROBE(__memset_fast)
...@@ -1395,6 +1395,9 @@ endif ...@@ -1395,6 +1395,9 @@ endif
if ARM64 if ARM64
source "arch/arm64/crypto/Kconfig" source "arch/arm64/crypto/Kconfig"
endif endif
if LOONGARCH
source "arch/loongarch/crypto/Kconfig"
endif
if MIPS if MIPS
source "arch/mips/crypto/Kconfig" source "arch/mips/crypto/Kconfig"
endif endif
......
...@@ -96,6 +96,40 @@ asm ( ...@@ -96,6 +96,40 @@ asm (
#endif /* CONFIG_S390 */ #endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi.d $sp, $sp, -16\n"
" st.d $t0, $sp, 0\n"
" st.d $ra, $sp, 8\n"
" bl my_direct_func1\n"
" ld.d $t0, $sp, 0\n"
" ld.d $ra, $sp, 8\n"
" addi.d $sp, $sp, 16\n"
" jr $t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi.d $sp, $sp, -16\n"
" st.d $t0, $sp, 0\n"
" st.d $ra, $sp, 8\n"
" bl my_direct_func2\n"
" ld.d $t0, $sp, 0\n"
" ld.d $ra, $sp, 8\n"
" addi.d $sp, $sp, 16\n"
" jr $t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct; static struct ftrace_ops direct;
static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long my_tramp = (unsigned long)my_tramp1;
......
...@@ -103,6 +103,47 @@ asm ( ...@@ -103,6 +103,47 @@ asm (
#endif /* CONFIG_S390 */ #endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func1\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func2\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long my_tramp = (unsigned long)my_tramp1;
static unsigned long tramps[2] = { static unsigned long tramps[2] = {
(unsigned long)my_tramp1, (unsigned long)my_tramp1,
......
...@@ -66,6 +66,31 @@ asm ( ...@@ -66,6 +66,31 @@ asm (
#endif /* CONFIG_S390 */ #endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct; static struct ftrace_ops direct;
static int __init ftrace_direct_multi_init(void) static int __init ftrace_direct_multi_init(void)
......
...@@ -70,6 +70,33 @@ asm ( ...@@ -70,6 +70,33 @@ asm (
#endif /* CONFIG_S390 */ #endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -48\n"
" st.d $a0, $sp, 0\n"
" st.d $a1, $sp, 8\n"
" st.d $a2, $sp, 16\n"
" st.d $t0, $sp, 24\n"
" st.d $ra, $sp, 32\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $a1, $sp, 8\n"
" ld.d $a2, $sp, 16\n"
" ld.d $t0, $sp, 24\n"
" ld.d $ra, $sp, 32\n"
" addi.d $sp, $sp, 48\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct; static struct ftrace_ops direct;
static int __init ftrace_direct_init(void) static int __init ftrace_direct_init(void)
......
...@@ -63,6 +63,29 @@ asm ( ...@@ -63,6 +63,29 @@ asm (
#endif /* CONFIG_S390 */ #endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct; static struct ftrace_ops direct;
static int __init ftrace_direct_init(void) static int __init ftrace_direct_init(void)
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_LOONGARCH_PERF_REGS_H
#define _ASM_LOONGARCH_PERF_REGS_H
enum perf_event_loongarch_regs {
PERF_REG_LOONGARCH_PC,
PERF_REG_LOONGARCH_R1,
PERF_REG_LOONGARCH_R2,
PERF_REG_LOONGARCH_R3,
PERF_REG_LOONGARCH_R4,
PERF_REG_LOONGARCH_R5,
PERF_REG_LOONGARCH_R6,
PERF_REG_LOONGARCH_R7,
PERF_REG_LOONGARCH_R8,
PERF_REG_LOONGARCH_R9,
PERF_REG_LOONGARCH_R10,
PERF_REG_LOONGARCH_R11,
PERF_REG_LOONGARCH_R12,
PERF_REG_LOONGARCH_R13,
PERF_REG_LOONGARCH_R14,
PERF_REG_LOONGARCH_R15,
PERF_REG_LOONGARCH_R16,
PERF_REG_LOONGARCH_R17,
PERF_REG_LOONGARCH_R18,
PERF_REG_LOONGARCH_R19,
PERF_REG_LOONGARCH_R20,
PERF_REG_LOONGARCH_R21,
PERF_REG_LOONGARCH_R22,
PERF_REG_LOONGARCH_R23,
PERF_REG_LOONGARCH_R24,
PERF_REG_LOONGARCH_R25,
PERF_REG_LOONGARCH_R26,
PERF_REG_LOONGARCH_R27,
PERF_REG_LOONGARCH_R28,
PERF_REG_LOONGARCH_R29,
PERF_REG_LOONGARCH_R30,
PERF_REG_LOONGARCH_R31,
PERF_REG_LOONGARCH_MAX,
};
#endif /* _ASM_LOONGARCH_PERF_REGS_H */
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3
#include <asm-generic/unistd.h>
...@@ -38,7 +38,7 @@ ifneq ($(NO_SYSCALL_TABLE),1) ...@@ -38,7 +38,7 @@ ifneq ($(NO_SYSCALL_TABLE),1)
NO_SYSCALL_TABLE := 0 NO_SYSCALL_TABLE := 0
endif endif
else else
ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips)) ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips loongarch))
NO_SYSCALL_TABLE := 0 NO_SYSCALL_TABLE := 0
endif endif
endif endif
...@@ -80,6 +80,12 @@ ifeq ($(SRCARCH),arm64) ...@@ -80,6 +80,12 @@ ifeq ($(SRCARCH),arm64)
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif endif
ifeq ($(SRCARCH),loongarch)
NO_PERF_REGS := 0
CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
endif
ifeq ($(SRCARCH),riscv) ifeq ($(SRCARCH),riscv)
NO_PERF_REGS := 0 NO_PERF_REGS := 0
endif endif
...@@ -107,7 +113,7 @@ endif ...@@ -107,7 +113,7 @@ endif
# Disable it on all other architectures in case libdw unwind # Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures # support is detected in system. Add supported architectures
# to the check. # to the check.
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv)) ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loongarch))
NO_LIBDW_DWARF_UNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1
endif endif
...@@ -129,7 +135,7 @@ endef ...@@ -129,7 +135,7 @@ endef
ifdef LIBUNWIND_DIR ifdef LIBUNWIND_DIR
LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include
LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 loongarch
$(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch))) $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
endif endif
......
# SPDX-License-Identifier: GPL-2.0
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
#
# Syscall table generation for perf
#
out := $(OUTPUT)arch/loongarch/include/generated/asm
header := $(out)/syscalls.c
incpath := $(srctree)/tools
sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h
sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/
systbl := $(sysprf)/mksyscalltbl
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sysdef) $(systbl)
$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
clean::
$(call QUIET_CLEAN, loongarch) $(RM) $(header)
archheaders: $(header)
// SPDX-License-Identifier: GPL-2.0
/*
* Perf annotate functions.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
static
struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
if (!strncmp(name, "beqz", 4) ||
!strncmp(name, "bnez", 4) ||
!strncmp(name, "beq", 3) ||
!strncmp(name, "bne", 3) ||
!strncmp(name, "blt", 3) ||
!strncmp(name, "bge", 3) ||
!strncmp(name, "bltu", 4) ||
!strncmp(name, "bgeu", 4) ||
!strncmp(name, "bl", 2))
ops = &call_ops;
else if (!strncmp(name, "jirl", 4))
ops = &ret_ops;
else if (name[0] == 'b')
ops = &jump_ops;
else
return NULL;
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static
int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
if (!arch->initialized) {
arch->associate_instruction_ops = loongarch__associate_ins_ops;
arch->initialized = true;
arch->objdump.comment_char = '#';
}
return 0;
}
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# Generate system call table for perf. Derived from
# powerpc script.
#
# Author(s): Ming Wang <wangming01@loongson.cn>
# Author(s): Huacai Chen <chenhuacai@loongson.cn>
# Copyright (C) 2020-2023 Loongson Technology Corporation Limited
gcc=$1
hostcc=$2
incpath=$3
input=$4
if ! test -r $input; then
echo "Could not read input file" >&2
exit 1
fi
create_table_from_c()
{
local sc nr last_sc
create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
{
cat <<-_EoHEADER
#include <stdio.h>
#include "$input"
int main(int argc, char *argv[])
{
_EoHEADER
while read sc nr; do
printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);"
last_sc=$nr
done
printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);"
printf "}\n"
} | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
$create_table_exe
rm -f $create_table_exe
}
create_table()
{
echo "static const char *syscalltbl_loongarch[] = {"
create_table_from_c
echo "};"
}
$gcc -E -dM -x c -I $incpath/include/uapi $input \
|sed -ne 's/^#define __NR_//p' \
|sort -t' ' -k2 -n \
|create_table
/* SPDX-License-Identifier: GPL-2.0 */
/*
* dwarf-regs-table.h : Mapping of DWARF debug register numbers into
* register names.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#ifdef DEFINE_DWARF_REGSTR_TABLE
static const char * const loongarch_regstr_tbl[] = {
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
"%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
"%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
};
#endif
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
#define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX
#define PERF_REG_IP PERF_REG_LOONGARCH_PC
#define PERF_REG_SP PERF_REG_LOONGARCH_R3
#define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1)
#endif /* ARCH_PERF_REGS_H */
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
// SPDX-License-Identifier: GPL-2.0
/*
* dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <stdio.h>
#include <errno.h> /* for EINVAL */
#include <string.h> /* for strcmp */
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
static struct pt_regs_dwarfnum loongarch_gpr_table[] = {
{"%r0", 0}, {"%r1", 1}, {"%r2", 2}, {"%r3", 3},
{"%r4", 4}, {"%r5", 5}, {"%r6", 6}, {"%r7", 7},
{"%r8", 8}, {"%r9", 9}, {"%r10", 10}, {"%r11", 11},
{"%r12", 12}, {"%r13", 13}, {"%r14", 14}, {"%r15", 15},
{"%r16", 16}, {"%r17", 17}, {"%r18", 18}, {"%r19", 19},
{"%r20", 20}, {"%r21", 21}, {"%r22", 22}, {"%r23", 23},
{"%r24", 24}, {"%r25", 25}, {"%r26", 26}, {"%r27", 27},
{"%r28", 28}, {"%r29", 29}, {"%r30", 30}, {"%r31", 31},
{NULL, 0}
};
const char *get_arch_regstr(unsigned int n)
{
n %= 32;
return loongarch_gpr_table[n].name;
}
int regs_query_register_offset(const char *name)
{
const struct pt_regs_dwarfnum *roff;
for (roff = loongarch_gpr_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return roff->dwarfnum;
return -EINVAL;
}
// SPDX-License-Identifier: GPL-2.0
#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2020-2023 Loongson Technology Corporation Limited */
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_LOONGARCH_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_LOONGARCH_##r); \
val; \
})
dwarf_regs[0] = 0;
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(R11);
dwarf_regs[12] = REG(R12);
dwarf_regs[13] = REG(R13);
dwarf_regs[14] = REG(R14);
dwarf_regs[15] = REG(R15);
dwarf_regs[16] = REG(R16);
dwarf_regs[17] = REG(R17);
dwarf_regs[18] = REG(R18);
dwarf_regs[19] = REG(R19);
dwarf_regs[20] = REG(R20);
dwarf_regs[21] = REG(R21);
dwarf_regs[22] = REG(R22);
dwarf_regs[23] = REG(R23);
dwarf_regs[24] = REG(R24);
dwarf_regs[25] = REG(R25);
dwarf_regs[26] = REG(R26);
dwarf_regs[27] = REG(R27);
dwarf_regs[28] = REG(R28);
dwarf_regs[29] = REG(R29);
dwarf_regs[30] = REG(R30);
dwarf_regs[31] = REG(R31);
dwfl_thread_state_register_pc(thread, REG(PC));
return dwfl_thread_state_registers(thread, 0, PERF_REG_LOONGARCH_MAX, dwarf_regs);
}
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "util/debug.h"
int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_LOONGARCH64_R1:
return PERF_REG_LOONGARCH_R1;
case UNW_LOONGARCH64_R2:
return PERF_REG_LOONGARCH_R2;
case UNW_LOONGARCH64_R3:
return PERF_REG_LOONGARCH_R3;
case UNW_LOONGARCH64_R4:
return PERF_REG_LOONGARCH_R4;
case UNW_LOONGARCH64_R5:
return PERF_REG_LOONGARCH_R5;
case UNW_LOONGARCH64_R6:
return PERF_REG_LOONGARCH_R6;
case UNW_LOONGARCH64_R7:
return PERF_REG_LOONGARCH_R7;
case UNW_LOONGARCH64_R8:
return PERF_REG_LOONGARCH_R8;
case UNW_LOONGARCH64_R9:
return PERF_REG_LOONGARCH_R9;
case UNW_LOONGARCH64_R10:
return PERF_REG_LOONGARCH_R10;
case UNW_LOONGARCH64_R11:
return PERF_REG_LOONGARCH_R11;
case UNW_LOONGARCH64_R12:
return PERF_REG_LOONGARCH_R12;
case UNW_LOONGARCH64_R13:
return PERF_REG_LOONGARCH_R13;
case UNW_LOONGARCH64_R14:
return PERF_REG_LOONGARCH_R14;
case UNW_LOONGARCH64_R15:
return PERF_REG_LOONGARCH_R15;
case UNW_LOONGARCH64_R16:
return PERF_REG_LOONGARCH_R16;
case UNW_LOONGARCH64_R17:
return PERF_REG_LOONGARCH_R17;
case UNW_LOONGARCH64_R18:
return PERF_REG_LOONGARCH_R18;
case UNW_LOONGARCH64_R19:
return PERF_REG_LOONGARCH_R19;
case UNW_LOONGARCH64_R20:
return PERF_REG_LOONGARCH_R20;
case UNW_LOONGARCH64_R21:
return PERF_REG_LOONGARCH_R21;
case UNW_LOONGARCH64_R22:
return PERF_REG_LOONGARCH_R22;
case UNW_LOONGARCH64_R23:
return PERF_REG_LOONGARCH_R23;
case UNW_LOONGARCH64_R24:
return PERF_REG_LOONGARCH_R24;
case UNW_LOONGARCH64_R25:
return PERF_REG_LOONGARCH_R25;
case UNW_LOONGARCH64_R26:
return PERF_REG_LOONGARCH_R26;
case UNW_LOONGARCH64_R27:
return PERF_REG_LOONGARCH_R27;
case UNW_LOONGARCH64_R28:
return PERF_REG_LOONGARCH_R28;
case UNW_LOONGARCH64_R29:
return PERF_REG_LOONGARCH_R29;
case UNW_LOONGARCH64_R30:
return PERF_REG_LOONGARCH_R30;
case UNW_LOONGARCH64_R31:
return PERF_REG_LOONGARCH_R31;
case UNW_LOONGARCH64_PC:
return PERF_REG_LOONGARCH_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}
...@@ -40,6 +40,7 @@ arch/x86/lib/x86-opcode-map.txt ...@@ -40,6 +40,7 @@ arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk arch/x86/tools/gen-insn-attr-x86.awk
arch/arm/include/uapi/asm/perf_regs.h arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h
arch/loongarch/include/uapi/asm/perf_regs.h
arch/mips/include/uapi/asm/perf_regs.h arch/mips/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h
arch/s390/include/uapi/asm/perf_regs.h arch/s390/include/uapi/asm/perf_regs.h
......
...@@ -149,6 +149,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i ...@@ -149,6 +149,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
#include "arch/arm/annotate/instructions.c" #include "arch/arm/annotate/instructions.c"
#include "arch/arm64/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c"
#include "arch/csky/annotate/instructions.c" #include "arch/csky/annotate/instructions.c"
#include "arch/loongarch/annotate/instructions.c"
#include "arch/mips/annotate/instructions.c" #include "arch/mips/annotate/instructions.c"
#include "arch/x86/annotate/instructions.c" #include "arch/x86/annotate/instructions.c"
#include "arch/powerpc/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c"
...@@ -211,6 +212,13 @@ static struct arch architectures[] = { ...@@ -211,6 +212,13 @@ static struct arch architectures[] = {
.comment_char = '#', .comment_char = '#',
}, },
}, },
{
.name = "loongarch",
.init = loongarch__annotate_init,
.objdump = {
.comment_char = '#',
},
},
}; };
static void ins__delete(struct ins_operands *ops) static void ins__delete(struct ins_operands *ops)
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#define EM_AARCH64 183 /* ARM 64 bit */ #define EM_AARCH64 183 /* ARM 64 bit */
#endif #endif
#ifndef EM_LOONGARCH
#define EM_LOONGARCH 258 /* LoongArch */
#endif
/* Define const char * {arch}_register_tbl[] */ /* Define const char * {arch}_register_tbl[] */
#define DEFINE_DWARF_REGSTR_TABLE #define DEFINE_DWARF_REGSTR_TABLE
#include "../arch/x86/include/dwarf-regs-table.h" #include "../arch/x86/include/dwarf-regs-table.h"
...@@ -25,6 +29,7 @@ ...@@ -25,6 +29,7 @@
#include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h"
#include "../arch/xtensa/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h"
#include "../arch/mips/include/dwarf-regs-table.h" #include "../arch/mips/include/dwarf-regs-table.h"
#include "../arch/loongarch/include/dwarf-regs-table.h"
#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
...@@ -56,6 +61,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) ...@@ -56,6 +61,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
return __get_dwarf_regstr(xtensa_regstr_tbl, n); return __get_dwarf_regstr(xtensa_regstr_tbl, n);
case EM_MIPS: case EM_MIPS:
return __get_dwarf_regstr(mips_regstr_tbl, n); return __get_dwarf_regstr(mips_regstr_tbl, n);
case EM_LOONGARCH:
return __get_dwarf_regstr(loongarch_regstr_tbl, n);
default: default:
pr_err("ELF MACHINE %x is not supported.\n", machine); pr_err("ELF MACHINE %x is not supported.\n", machine);
} }
......
...@@ -435,6 +435,8 @@ static const char *normalize_arch(char *arch) ...@@ -435,6 +435,8 @@ static const char *normalize_arch(char *arch)
return "mips"; return "mips";
if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
return "sh"; return "sh";
if (!strncmp(arch, "loongarch", 9))
return "loongarch";
return arch; return arch;
} }
......
...@@ -43,6 +43,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent ...@@ -43,6 +43,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__riscv) && __riscv_xlen == 64 #elif defined(__riscv) && __riscv_xlen == 64
#define GEN_ELF_ARCH EM_RISCV #define GEN_ELF_ARCH EM_RISCV
#define GEN_ELF_CLASS ELFCLASS64 #define GEN_ELF_CLASS ELFCLASS64
#elif defined(__loongarch__)
#define GEN_ELF_ARCH EM_LOONGARCH
#define GEN_ELF_CLASS ELFCLASS64
#else #else
#error "unsupported architecture" #error "unsupported architecture"
#endif #endif
......
...@@ -28,6 +28,7 @@ uint64_t __weak arch__user_reg_mask(void) ...@@ -28,6 +28,7 @@ uint64_t __weak arch__user_reg_mask(void)
#include "../../arch/arm/include/uapi/asm/perf_regs.h" #include "../../arch/arm/include/uapi/asm/perf_regs.h"
#include "../../arch/csky/include/uapi/asm/perf_regs.h" #include "../../arch/csky/include/uapi/asm/perf_regs.h"
#include "../../arch/loongarch/include/uapi/asm/perf_regs.h"
#include "../../arch/mips/include/uapi/asm/perf_regs.h" #include "../../arch/mips/include/uapi/asm/perf_regs.h"
#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" #include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
#include "../../arch/riscv/include/uapi/asm/perf_regs.h" #include "../../arch/riscv/include/uapi/asm/perf_regs.h"
...@@ -236,6 +237,79 @@ static const char *__perf_reg_name_csky(int id) ...@@ -236,6 +237,79 @@ static const char *__perf_reg_name_csky(int id)
return NULL; return NULL;
} }
static inline const char *__perf_reg_name_loongarch(int id)
{
switch (id) {
case PERF_REG_LOONGARCH_PC:
return "PC";
case PERF_REG_LOONGARCH_R1:
return "%r1";
case PERF_REG_LOONGARCH_R2:
return "%r2";
case PERF_REG_LOONGARCH_R3:
return "%r3";
case PERF_REG_LOONGARCH_R4:
return "%r4";
case PERF_REG_LOONGARCH_R5:
return "%r5";
case PERF_REG_LOONGARCH_R6:
return "%r6";
case PERF_REG_LOONGARCH_R7:
return "%r7";
case PERF_REG_LOONGARCH_R8:
return "%r8";
case PERF_REG_LOONGARCH_R9:
return "%r9";
case PERF_REG_LOONGARCH_R10:
return "%r10";
case PERF_REG_LOONGARCH_R11:
return "%r11";
case PERF_REG_LOONGARCH_R12:
return "%r12";
case PERF_REG_LOONGARCH_R13:
return "%r13";
case PERF_REG_LOONGARCH_R14:
return "%r14";
case PERF_REG_LOONGARCH_R15:
return "%r15";
case PERF_REG_LOONGARCH_R16:
return "%r16";
case PERF_REG_LOONGARCH_R17:
return "%r17";
case PERF_REG_LOONGARCH_R18:
return "%r18";
case PERF_REG_LOONGARCH_R19:
return "%r19";
case PERF_REG_LOONGARCH_R20:
return "%r20";
case PERF_REG_LOONGARCH_R21:
return "%r21";
case PERF_REG_LOONGARCH_R22:
return "%r22";
case PERF_REG_LOONGARCH_R23:
return "%r23";
case PERF_REG_LOONGARCH_R24:
return "%r24";
case PERF_REG_LOONGARCH_R25:
return "%r25";
case PERF_REG_LOONGARCH_R26:
return "%r26";
case PERF_REG_LOONGARCH_R27:
return "%r27";
case PERF_REG_LOONGARCH_R28:
return "%r28";
case PERF_REG_LOONGARCH_R29:
return "%r29";
case PERF_REG_LOONGARCH_R30:
return "%r30";
case PERF_REG_LOONGARCH_R31:
return "%r31";
default:
break;
}
return NULL;
}
static const char *__perf_reg_name_mips(int id) static const char *__perf_reg_name_mips(int id)
{ {
switch (id) { switch (id) {
...@@ -670,6 +744,8 @@ const char *perf_reg_name(int id, const char *arch) ...@@ -670,6 +744,8 @@ const char *perf_reg_name(int id, const char *arch)
if (!strcmp(arch, "csky")) if (!strcmp(arch, "csky"))
reg_name = __perf_reg_name_csky(id); reg_name = __perf_reg_name_csky(id);
else if (!strcmp(arch, "loongarch"))
reg_name = __perf_reg_name_loongarch(id);
else if (!strcmp(arch, "mips")) else if (!strcmp(arch, "mips"))
reg_name = __perf_reg_name_mips(id); reg_name = __perf_reg_name_mips(id);
else if (!strcmp(arch, "powerpc")) else if (!strcmp(arch, "powerpc"))
......
...@@ -38,6 +38,10 @@ static const char **syscalltbl_native = syscalltbl_arm64; ...@@ -38,6 +38,10 @@ static const char **syscalltbl_native = syscalltbl_arm64;
#include <asm/syscalls_n64.c> #include <asm/syscalls_n64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_mips_n64; static const char **syscalltbl_native = syscalltbl_mips_n64;
#elif defined(__loongarch__)
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
static const char **syscalltbl_native = syscalltbl_loongarch;
#endif #endif
struct syscall { struct syscall {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment