Commit cc07aabc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux into next

Pull arm64 updates from Catalin Marinas:
 - Optimised assembly string/memory routines (based on the AArch64
   Cortex Strings library contributed to glibc but re-licensed under
   GPLv2)
 - Optimised crypto algorithms making use of the ARMv8 crypto extensions
   (together with kernel API for using FPSIMD instructions in interrupt
   context)
 - Ftrace support
 - CPU topology parsing from DT
 - ESR_EL1 (Exception Syndrome Register) exposed to user space signal
   handlers for SIGSEGV/SIGBUS (useful to emulation tools like Qemu)
 - 1GB section linear mapping if applicable
 - Barriers usage clean-up
 - Default pgprot clean-up

Conflicts as per Catalin.

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (57 commits)
  arm64: kernel: initialize broadcast hrtimer based clock event device
  arm64: ftrace: Add system call tracepoint
  arm64: ftrace: Add CALLER_ADDRx macros
  arm64: ftrace: Add dynamic ftrace support
  arm64: Add ftrace support
  ftrace: Add arm64 support to recordmcount
  arm64: Add 'notrace' attribute to unwind_frame() for ftrace
  arm64: add __ASSEMBLY__ in asm/insn.h
  arm64: Fix linker script entry point
  arm64: lib: Implement optimized string length routines
  arm64: lib: Implement optimized string compare routines
  arm64: lib: Implement optimized memcmp routine
  arm64: lib: Implement optimized memset routine
  arm64: lib: Implement optimized memmove routine
  arm64: lib: Implement optimized memcpy routine
  arm64: defconfig: enable a few more common/useful options in defconfig
  ftrace: Make CALLER_ADDRx macros more generic
  arm64: Fix deadlock scenario with smp_send_stop()
  arm64: Fix machine_shutdown() definition
  arm64: Support arch_irq_work_raise() via self IPIs
  ...
parents 9e47aaef 9358d755
...@@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level) ...@@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level)
#endif #endif
#define HAVE_ARCH_CALLER_ADDR #define ftrace_return_addr(n) return_address(n)
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
#define CALLER_ADDR1 ((unsigned long)return_address(1))
#define CALLER_ADDR2 ((unsigned long)return_address(2))
#define CALLER_ADDR3 ((unsigned long)return_address(3))
#define CALLER_ADDR4 ((unsigned long)return_address(4))
#define CALLER_ADDR5 ((unsigned long)return_address(5))
#define CALLER_ADDR6 ((unsigned long)return_address(6))
#endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASSEMBLY__ */
......
...@@ -30,12 +30,17 @@ config ARM64 ...@@ -30,12 +30,17 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_MEMBLOCK select HAVE_MEMBLOCK
...@@ -43,6 +48,7 @@ config ARM64 ...@@ -43,6 +48,7 @@ config ARM64
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_PERF_REGS select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN select IRQ_DOMAIN
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select NO_BOOTMEM select NO_BOOTMEM
...@@ -245,6 +251,9 @@ config ARCH_WANT_HUGE_PMD_SHARE ...@@ -245,6 +251,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
config HAVE_ARCH_TRANSPARENT_HUGEPAGE config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y def_bool y
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
source "mm/Kconfig" source "mm/Kconfig"
config XEN_DOM0 config XEN_DOM0
...@@ -359,5 +368,8 @@ source "arch/arm64/Kconfig.debug" ...@@ -359,5 +368,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig" source "security/Kconfig"
source "crypto/Kconfig" source "crypto/Kconfig"
if CRYPTO
source "arch/arm64/crypto/Kconfig"
endif
source "lib/Kconfig" source "lib/Kconfig"
...@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS ...@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/ core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_KVM) += arch/arm64/kvm/ core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/ core-$(CONFIG_XEN) += arch/arm64/xen/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y) libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC) libs-y += $(LIBGCC)
......
# CONFIG_LOCALVERSION_AUTO is not set # CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14 CONFIG_LOG_BUF_SHIFT=14
...@@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y ...@@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_XGENE=y CONFIG_ARCH_XGENE=y
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_PREEMPT=y CONFIG_PREEMPT=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y CONFIG_CMA=y
CONFIG_CMDLINE="console=ttyAMA0" CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
...@@ -44,7 +45,7 @@ CONFIG_IP_PNP_BOOTP=y ...@@ -44,7 +45,7 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS=y
CONFIG_DMA_CMA=y CONFIG_DMA_CMA=y
CONFIG_SCSI=y CONFIG_VIRTIO_BLK=y
# CONFIG_SCSI_PROC_FS is not set # CONFIG_SCSI_PROC_FS is not set
CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_LOWLEVEL is not set
...@@ -56,20 +57,18 @@ CONFIG_SMC91X=y ...@@ -56,20 +57,18 @@ CONFIG_SMC91X=y
CONFIG_SMSC911X=y CONFIG_SMSC911X=y
# CONFIG_WLAN is not set # CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_SERPORT is not set
CONFIG_LEGACY_PTY_COUNT=16 CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set # CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set # CONFIG_HWMON is not set
CONFIG_REGULATOR=y CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_FB=y CONFIG_FB=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_MONO is not set
...@@ -79,27 +78,38 @@ CONFIG_USB_ISP1760_HCD=y ...@@ -79,27 +78,38 @@ CONFIG_USB_ISP1760_HCD=y
CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE=y
CONFIG_MMC=y CONFIG_MMC=y
CONFIG_MMC_ARMMMCI=y CONFIG_MMC_ARMMMCI=y
CONFIG_VIRTIO_MMIO=y
# CONFIG_IOMMU_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=y CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y CONFIG_EXT3_FS=y
CONFIG_EXT4_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
# CONFIG_EXT3_FS_XATTR is not set # CONFIG_EXT3_FS_XATTR is not set
CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=y CONFIG_FUSE_FS=y
CONFIG_CUSE=y CONFIG_CUSE=y
CONFIG_VFAT_FS=y CONFIG_VFAT_FS=y
CONFIG_TMPFS=y CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
# CONFIG_MISC_FILESYSTEMS is not set # CONFIG_MISC_FILESYSTEMS is not set
CONFIG_NFS_FS=y CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_1=y
CONFIG_MAGIC_SYSRQ=y CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_LOCKUP_DETECTOR=y
# CONFIG_SCHED_DEBUG is not set # CONFIG_SCHED_DEBUG is not set
CONFIG_DEBUG_INFO=y
# CONFIG_FTRACE is not set # CONFIG_FTRACE is not set
CONFIG_ATOMIC64_SELFTEST=y CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_VIRTIO_MMIO=y CONFIG_ARM64_CRYPTO=y
CONFIG_VIRTIO_BLK=y CONFIG_CRYPTO_SHA1_ARM64_CE=y
CONFIG_CRYPTO_SHA2_ARM64_CE=y
CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
menuconfig ARM64_CRYPTO
bool "ARM64 Accelerated Cryptographic Algorithms"
depends on ARM64
help
Say Y here to choose from a selection of cryptographic algorithms
implemented using ARM64 specific CPU features or instructions.
if ARM64_CRYPTO
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES
select CRYPTO_AEAD
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ABLK_HELPER
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ABLK_HELPER
endif
#
# linux/arch/arm64/crypto/Makefile
#
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
aes-ce-blk-y := aes-glue-ce.o aes-ce.o
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
AFLAGS_aes-neon.o := -DINTERLEAVE=4
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_dep,cc_o_c)
/*
* aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
.text
.arch armv8-a+crypto
/*
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
ld1 {v0.2d}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1
add w8, w8, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.2d}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
sub w7, w5, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
4: ld1 {v3.2d}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
5: ld1 {v4.2d}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
ld1 {v5.2d}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
6: st1 {v0.2d}, [x0] /* store mac */
beq 10f
adds w2, w2, #16
beq 10f
mov w8, w2
7: ldrb w7, [x1], #1
umov w6, v0.b[0]
eor w6, w6, w7
strb w6, [x0], #1
subs w2, w2, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
8: mov w7, w8
add w8, w8, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
st1 {v0.2d}, [x0]
10: str w8, [x3]
ret
ENDPROC(ce_aes_ccm_auth_data)
/*
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
* u32 rounds);
*/
ENTRY(ce_aes_ccm_final)
ld1 {v3.2d}, [x2], #16 /* load first round key */
ld1 {v0.2d}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */
ld1 {v1.2d}, [x1] /* load 1st ctriv */
bmi 0f
bne 3f
mov v5.16b, v3.16b
b 2f
0: mov v4.16b, v3.16b
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
bpl 1b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
st1 {v0.2d}, [x0] /* store result */
ret
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.2d}, [x5] /* load mac */
rev x8, x8 /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.1d}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
ld1 {v3.2d}, [x3] /* load first round key */
add x10, x3, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
b 3f
1: mov v4.16b, v3.16b
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
subs w2, w2, #16
bmi 6f /* partial block? */
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
rev x8, x8
st1 {v0.2d}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
st1 {v0.2d}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
eor w7, w7, w9
eor w9, w9, w6
.else
eor w9, w9, w6
eor w7, w7, w9
.endif
strb w9, [x0], #1 /* store out byte */
strb w7, [x5], #1 /* store mac byte */
subs w2, w2, #1
beq 5b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
b 7b
.endm
/*
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[]);
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[]);
*/
ENTRY(ce_aes_ccm_encrypt)
aes_ccm_do_crypt 1
ENDPROC(ce_aes_ccm_encrypt)
ENTRY(ce_aes_ccm_decrypt)
aes_ccm_do_crypt 0
ENDPROC(ce_aes_ccm_decrypt)
/*
* aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/scatterwalk.h>
#include <linux/crypto.h>
#include <linux/module.h>
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
* # of rounds specified by AES:
* 128 bit key 10 rounds
* 192 bit key 12 rounds
* 256 bit key 14 rounds
* => n byte key => 6 + (n/4) rounds
*/
return 6 + ctx->key_length / 4;
}
asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
u32 *macp, u32 const rk[], u32 rounds);
asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[],
u8 ctr[]);
asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[],
u8 ctr[]);
asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
u32 rounds);
static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
int ret;
ret = crypto_aes_expand_key(ctx, in_key, key_len);
if (!ret)
return 0;
tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
if ((authsize & 1) || authsize < 4)
return -EINVAL;
return 0;
}
static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
u32 l = req->iv[0] + 1;
/* verify that CCM dimension 'L' is set correctly in the IV */
if (l < 2 || l > 8)
return -EINVAL;
/* verify that msglen can in fact be represented in L bytes */
if (l < 4 && msglen >> (8 * l))
return -EOVERFLOW;
/*
* Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
* uses a u32 type to represent msglen so the top 4 bytes are always 0.
*/
n[0] = 0;
n[1] = cpu_to_be32(msglen);
memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
/*
* Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
* - bits 0..2 : max # of bytes required to represent msglen, minus 1
* (already set by caller)
* - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
* - bit 6 : indicates presence of authenticate-only data
*/
maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
if (req->assoclen)
maciv[0] |= 0x40;
memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
return 0;
}
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
struct __packed { __be16 l; __be32 h; u16 len; } ltag;
struct scatter_walk walk;
u32 len = req->assoclen;
u32 macp = 0;
/* prepend the AAD with a length tag */
if (len < 0xff00) {
ltag.l = cpu_to_be16(len);
ltag.len = 2;
} else {
ltag.l = cpu_to_be16(0xfffe);
put_unaligned_be32(len, &ltag.h);
ltag.len = 6;
}
ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
num_rounds(ctx));
scatterwalk_start(&walk, req->assoc);
do {
u32 n = scatterwalk_clamp(&walk, len);
u8 *p;
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, len);
}
p = scatterwalk_map(&walk);
ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
num_rounds(ctx));
len -= n;
scatterwalk_unmap(p);
scatterwalk_advance(&walk, n);
scatterwalk_done(&walk, 0, len);
} while (len);
}
static int ccm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
struct blkcipher_desc desc = { .info = req->iv };
struct blkcipher_walk walk;
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen;
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
kernel_neon_begin_partial(6);
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
blkcipher_walk_init(&walk, req->dst, req->src, len);
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
AES_BLOCK_SIZE);
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
if (walk.nbytes == len)
tail = 0;
ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
len -= walk.nbytes - tail;
err = blkcipher_walk_done(&desc, &walk, tail);
}
if (!err)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
kernel_neon_end();
if (err)
return err;
/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
}
static int ccm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
unsigned int authsize = crypto_aead_authsize(aead);
struct blkcipher_desc desc = { .info = req->iv };
struct blkcipher_walk walk;
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen - authsize;
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
kernel_neon_begin_partial(6);
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
blkcipher_walk_init(&walk, req->dst, req->src, len);
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
AES_BLOCK_SIZE);
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
if (walk.nbytes == len)
tail = 0;
ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
len -= walk.nbytes - tail;
err = blkcipher_walk_done(&desc, &walk, tail);
}
if (!err)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
kernel_neon_end();
if (err)
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
authsize, 0);
if (memcmp(mac, buf, authsize))
return -EBADMSG;
return 0;
}
static struct crypto_alg ccm_aes_alg = {
.cra_name = "ccm(aes)",
.cra_driver_name = "ccm-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_aead_type,
.cra_module = THIS_MODULE,
.cra_aead = {
.ivsize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = ccm_setkey,
.setauthsize = ccm_setauthsize,
.encrypt = ccm_encrypt,
.decrypt = ccm_decrypt,
}
};
static int __init aes_mod_init(void)
{
if (!(elf_hwcap & HWCAP_AES))
return -ENODEV;
return crypto_register_alg(&ccm_aes_alg);
}
static void __exit aes_mod_exit(void)
{
crypto_unregister_alg(&ccm_aes_alg);
}
module_init(aes_mod_init);
module_exit(aes_mod_exit);
MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("ccm(aes)");
/*
* aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
*
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
* # of rounds specified by AES:
* 128 bit key 10 rounds
* 192 bit key 12 rounds
* 256 bit key 14 rounds
* => n byte key => 6 + (n/4) rounds
*/
return 6 + ctx->key_length / 4;
}
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct aes_block *out = (struct aes_block *)dst;
struct aes_block const *in = (struct aes_block *)src;
void *dummy0;
int dummy1;
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
" ld1 {v1.2d}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
"1: aese v0.16b, v2.16b ;"
" aesmc v0.16b, v0.16b ;"
"2: ld1 {v1.2d}, [%[key]], #16 ;"
" aese v0.16b, v3.16b ;"
" aesmc v0.16b, v0.16b ;"
"3: ld1 {v2.2d}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aese v0.16b, v1.16b ;"
" aesmc v0.16b, v0.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
" bpl 1b ;"
" aese v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
" st1 {v0.16b}, %[out] ;"
: [out] "=Q"(*out),
[key] "=r"(dummy0),
[rounds] "=r"(dummy1)
: [in] "Q"(*in),
"1"(ctx->key_enc),
"2"(num_rounds(ctx) - 2)
: "cc");
kernel_neon_end();
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct aes_block *out = (struct aes_block *)dst;
struct aes_block const *in = (struct aes_block *)src;
void *dummy0;
int dummy1;
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
" ld1 {v1.2d}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
"1: aesd v0.16b, v2.16b ;"
" aesimc v0.16b, v0.16b ;"
"2: ld1 {v1.2d}, [%[key]], #16 ;"
" aesd v0.16b, v3.16b ;"
" aesimc v0.16b, v0.16b ;"
"3: ld1 {v2.2d}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aesd v0.16b, v1.16b ;"
" aesimc v0.16b, v0.16b ;"
" ld1 {v3.2d}, [%[key]], #16 ;"
" bpl 1b ;"
" aesd v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
" st1 {v0.16b}, %[out] ;"
: [out] "=Q"(*out),
[key] "=r"(dummy0),
[rounds] "=r"(dummy1)
: [in] "Q"(*in),
"1"(ctx->key_dec),
"2"(num_rounds(ctx) - 2)
: "cc");
kernel_neon_end();
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = crypto_aes_set_key,
.cia_encrypt = aes_cipher_encrypt,
.cia_decrypt = aes_cipher_decrypt
}
};
static int __init aes_mod_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_mod_exit(void)
{
crypto_unregister_alg(&aes_alg);
}
module_cpu_feature_match(AES, aes_mod_init);
module_exit(aes_mod_exit);
/*
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
* Crypto Extensions
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#define AES_ENTRY(func) ENTRY(ce_ ## func)
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
.arch armv8-a+crypto
/* preload all round keys */
.macro load_round_keys, rounds, rk
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
ld1 {v17.16b-v18.16b}, [\rk], #32
1111: ld1 {v19.16b-v20.16b}, [\rk], #32
2222: ld1 {v21.16b-v24.16b}, [\rk], #64
ld1 {v25.16b-v28.16b}, [\rk], #64
ld1 {v29.16b-v31.16b}, [\rk]
.endm
/* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
/* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
/* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.endif
.endif
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
aes\mc \i1\().16b, \i1\().16b
.ifnb \i3
aes\mc \i2\().16b, \i2\().16b
aes\mc \i3\().16b, \i3\().16b
.endif
.endif
.endm
/* up to 4 interleaved encryption rounds with the same round key */
.macro round_Nx, enc, k, i0, i1, i2, i3
.ifc \enc, e
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
.else
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
.endif
.endm
/* up to 4 interleaved final rounds */
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.endif
.endif
eor \i0\().16b, \i0\().16b, \k2\().16b
.ifnb \i1
eor \i1\().16b, \i1\().16b, \k2\().16b
.ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b
.endif
.endif
.endm
/* up to 4 interleaved blocks */
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
round_Nx \enc, v17, \i0, \i1, \i2, \i3
round_Nx \enc, v18, \i0, \i1, \i2, \i3
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
round_Nx \enc, v20, \i0, \i1, \i2, \i3
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
round_Nx \enc, \key, \i0, \i1, \i2, \i3
.endr
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
.endm
.macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in
.endm
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1
.endm
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm
.macro decrypt_block, in, rounds, t0, t1, t2
do_block_Nx d, \rounds, \in
.endm
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1
.endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm
#include "aes-modes.S"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
DATA .req v0
SHASH .req v1
IN1 .req v2
T1 .req v2
T2 .req v3
T3 .req v4
VZR .req v5
.text
.arch armv8-a+crypto
/*
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
ld1 {DATA.16b}, [x1]
ld1 {SHASH.16b}, [x3]
eor VZR.16b, VZR.16b, VZR.16b
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {IN1.2d}, [x4]
b 1f
0: ld1 {IN1.2d}, [x2], #16
sub w0, w0, #1
1: ext IN1.16b, IN1.16b, IN1.16b, #8
CPU_LE( rev64 IN1.16b, IN1.16b )
eor DATA.16b, DATA.16b, IN1.16b
/* multiply DATA by SHASH in GF(2^128) */
ext T2.16b, DATA.16b, DATA.16b, #8
ext T3.16b, SHASH.16b, SHASH.16b, #8
eor T2.16b, T2.16b, DATA.16b
eor T3.16b, T3.16b, SHASH.16b
pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
eor T2.16b, T2.16b, DATA.16b
ext T3.16b, VZR.16b, T2.16b, #8
ext T2.16b, T2.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T3.16b
eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
// carry-less multiplication
/* first phase of the reduction */
shl T3.2d, DATA.2d, #1
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #5
eor T3.16b, T3.16b, DATA.16b
shl T3.2d, T3.2d, #57
ext T2.16b, VZR.16b, T3.16b, #8
ext T3.16b, T3.16b, VZR.16b, #8
eor DATA.16b, DATA.16b, T2.16b
eor T1.16b, T1.16b, T3.16b
/* second phase of the reduction */
ushr T2.2d, DATA.2d, #5
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T2.16b, T2.16b, DATA.16b
ushr T2.2d, T2.2d, #1
eor T1.16b, T1.16b, T2.16b
eor DATA.16b, DATA.16b, T1.16b
cbnz w0, 0b
st1 {DATA.16b}, [x1]
ret
ENDPROC(pmull_ghash_update)
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
u64 a;
u64 b;
};
struct ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
u8 buf[GHASH_BLOCK_SIZE];
u32 count;
};
asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k, const char *head);
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
ctx->count += len;
if ((partial + len) >= GHASH_BLOCK_SIZE) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
int blocks;
if (partial) {
int p = GHASH_BLOCK_SIZE - partial;
memcpy(ctx->buf + partial, src, p);
src += p;
len -= p;
}
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
kernel_neon_begin_partial(6);
pmull_ghash_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
kernel_neon_end();
src += blocks * GHASH_BLOCK_SIZE;
}
if (len)
memcpy(ctx->buf + partial, src, len);
return 0;
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
if (partial) {
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
kernel_neon_begin_partial(6);
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
kernel_neon_end();
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
*ctx = (struct ghash_desc_ctx){};
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/* perform multiplication by 'x' in GF(2^128) */
b = get_unaligned_be64(inkey);
a = get_unaligned_be64(inkey + 8);
key->a = (a << 1) | (b >> 63);
key->b = (b << 1) | (a >> 63);
if (b >> 63)
key->b ^= 0xc200000000000000UL;
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_key),
.cra_module = THIS_MODULE,
},
};
static int __init ghash_ce_mod_init(void)
{
return crypto_register_shash(&ghash_alg);
}
static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_shash(&ghash_alg);
}
module_cpu_feature_match(PMULL, ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);
/*
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
k0 .req v0
k1 .req v1
k2 .req v2
k3 .req v3
t0 .req v4
t1 .req v5
dga .req q6
dgav .req v6
dgb .req s7
dgbv .req v7
dg0q .req q12
dg0s .req s12
dg0v .req v12
dg1s .req s13
dg1v .req v13
dg2s .req s14
.macro add_only, op, ev, rc, s0, dg1
.ifc \ev, ev
add t1.4s, v\s0\().4s, \rc\().4s
sha1h dg2s, dg0s
.ifnb \dg1
sha1\op dg0q, \dg1, t0.4s
.else
sha1\op dg0q, dg1s, t0.4s
.endif
.else
.ifnb \s0
add t0.4s, v\s0\().4s, \rc\().4s
.endif
sha1h dg1s, dg0s
sha1\op dg0q, dg2s, t1.4s
.endif
.endm
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
add_only \op, \ev, \rc, \s1, \dg1
sha1su1 v\s0\().4s, v\s3\().4s
.endm
/*
* The SHA1 round constants
*/
.align 4
.Lsha1_rcon:
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
/*
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
adr x6, .Lsha1_rcon
ld1r {k0.4s}, [x6], #4
ld1r {k1.4s}, [x6], #4
ld1r {k2.4s}, [x6], #4
ld1r {k3.4s}, [x6]
/* load state */
ldr dga, [x2]
ldr dgb, [x2, #16]
/* load partial state (if supplied) */
cbz x3, 0f
ld1 {v8.4s-v11.4s}, [x3]
b 1f
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w0, w0, #1
1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
2: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
add_update c, od, k0, 9, 10, 11, 8
add_update c, ev, k0, 10, 11, 8, 9
add_update c, od, k0, 11, 8, 9, 10
add_update c, ev, k1, 8, 9, 10, 11
add_update p, od, k1, 9, 10, 11, 8
add_update p, ev, k1, 10, 11, 8, 9
add_update p, od, k1, 11, 8, 9, 10
add_update p, ev, k1, 8, 9, 10, 11
add_update p, od, k2, 9, 10, 11, 8
add_update m, ev, k2, 10, 11, 8, 9
add_update m, od, k2, 11, 8, 9, 10
add_update m, ev, k2, 8, 9, 10, 11
add_update m, od, k2, 9, 10, 11, 8
add_update m, ev, k3, 10, 11, 8, 9
add_update p, od, k3, 11, 8, 9, 10
add_only p, ev, k3, 9
add_only p, od, k3, 10
add_only p, ev, k3, 11
add_only p, od
/* update state */
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
cbnz w0, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
*/
cbz x4, 3f
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
fmov d8, x8
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
b 2b
/* store new state */
3: str dga, [x2]
str dgb, [x2, #16]
ret
ENDPROC(sha1_ce_transform)
/*
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
static int sha1_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA1_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA1_BLOCK_SIZE;
len %= SHA1_BLOCK_SIZE;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state,
partial ? sctx->buffer : NULL, 0);
kernel_neon_end();
data += blocks * SHA1_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buffer + partial, data, len);
return 0;
}
static int sha1_final(struct shash_desc *desc, u8 *out)
{
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
struct sha1_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
__be32 *dst = (__be32 *)out;
int i;
u32 padlen = SHA1_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
sha1_update(desc, padding, padlen);
sha1_update(desc, (const u8 *)&bits, sizeof(bits));
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int blocks;
int i;
if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
sha1_update(desc, data, len);
return sha1_final(desc, out);
}
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha1_ce_transform()
*/
blocks = len / SHA1_BLOCK_SIZE;
kernel_neon_begin_partial(16);
sha1_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha1_state){};
return 0;
}
static int sha1_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha1_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
struct sha1_state const *src = in;
*sctx = *src;
return 0;
}
static struct shash_alg alg = {
.init = sha1_init,
.update = sha1_update,
.final = sha1_final,
.finup = sha1_finup,
.export = sha1_export,
.import = sha1_import,
.descsize = sizeof(struct sha1_state),
.digestsize = SHA1_DIGEST_SIZE,
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_ce_mod_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit sha1_ce_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_cpu_feature_match(SHA1, sha1_ce_mod_init);
module_exit(sha1_ce_mod_fini);
/*
* sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
dga .req q20
dgav .req v20
dgb .req q21
dgbv .req v21
t0 .req v22
t1 .req v23
dg0q .req q24
dg0v .req v24
dg1q .req q25
dg1v .req v25
dg2q .req q26
dg2v .req v26
.macro add_only, ev, rc, s0
mov dg2v.16b, dg0v.16b
.ifeq \ev
add t1.4s, v\s0\().4s, \rc\().4s
sha256h dg0q, dg1q, t0.4s
sha256h2 dg1q, dg2q, t0.4s
.else
.ifnb \s0
add t0.4s, v\s0\().4s, \rc\().4s
.endif
sha256h dg0q, dg1q, t1.4s
sha256h2 dg1q, dg2q, t1.4s
.endif
.endm
.macro add_update, ev, rc, s0, s1, s2, s3
sha256su0 v\s0\().4s, v\s1\().4s
add_only \ev, \rc, \s1
sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
.endm
/*
* The SHA-256 round constants
*/
.align 4
.Lsha2_rcon:
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
* void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
* u8 *head, long bytes)
*/
ENTRY(sha2_ce_transform)
/* load round constants */
adr x8, .Lsha2_rcon
ld1 { v0.4s- v3.4s}, [x8], #64
ld1 { v4.4s- v7.4s}, [x8], #64
ld1 { v8.4s-v11.4s}, [x8], #64
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
ldp dga, dgb, [x2]
/* load partial input (if supplied) */
cbz x3, 0f
ld1 {v16.4s-v19.4s}, [x3]
b 1f
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
sub w0, w0, #1
1:
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
2: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
add_update 0, v1, 16, 17, 18, 19
add_update 1, v2, 17, 18, 19, 16
add_update 0, v3, 18, 19, 16, 17
add_update 1, v4, 19, 16, 17, 18
add_update 0, v5, 16, 17, 18, 19
add_update 1, v6, 17, 18, 19, 16
add_update 0, v7, 18, 19, 16, 17
add_update 1, v8, 19, 16, 17, 18
add_update 0, v9, 16, 17, 18, 19
add_update 1, v10, 17, 18, 19, 16
add_update 0, v11, 18, 19, 16, 17
add_update 1, v12, 19, 16, 17, 18
add_only 0, v13, 17
add_only 1, v14, 18
add_only 0, v15, 19
add_only 1
/* update state */
add dgav.4s, dgav.4s, dg0v.4s
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
cbnz w0, 0b
/*
* Final block: add padding and total bit count.
* Skip if we have no total byte count in x4. In that case, the input
* size was not a round multiple of the block size, and the padding is
* handled by the C code.
*/
cbz x4, 3f
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
fmov d16, x8
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
b 2b
/* store new state */
3: stp dga, dgb, [x2]
ret
ENDPROC(sha2_ce_transform)
/*
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
*
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head, long bytes);
static int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
}
};
return 0;
}
static int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
}
};
return 0;
}
static int sha2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA256_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA256_BLOCK_SIZE;
len %= SHA256_BLOCK_SIZE;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state,
partial ? sctx->buf : NULL, 0);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buf + partial, data, len);
return 0;
}
static void sha2_final(struct shash_desc *desc)
{
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
struct sha256_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
u32 padlen = SHA256_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
sha2_update(desc, padding, padlen);
sha2_update(desc, (const u8 *)&bits, sizeof(bits));
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static void sha2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
int blocks;
if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
sha2_update(desc, data, len);
sha2_final(desc);
return;
}
/*
* Use a fast path if the input is a multiple of 64 bytes. In
* this case, there is no need to copy data around, and we can
* perform the entire digest calculation in a single invocation
* of sha2_ce_transform()
*/
blocks = len / SHA256_BLOCK_SIZE;
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
}
static int sha224_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_finup(desc, data, len);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha2_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha2_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state const *src = in;
*sctx = *src;
return 0;
}
static struct shash_alg algs[] = { {
.init = sha224_init,
.update = sha2_update,
.final = sha224_final,
.finup = sha224_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha256_init,
.update = sha2_update,
.final = sha256_final,
.finup = sha256_finup,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha2_ce_mod_init(void)
{
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha2_ce_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_cpu_feature_match(SHA2, sha2_ce_mod_init);
module_exit(sha2_ce_mod_fini);
...@@ -40,6 +40,7 @@ generic-y += segment.h ...@@ -40,6 +40,7 @@ generic-y += segment.h
generic-y += sembuf.h generic-y += sembuf.h
generic-y += serial.h generic-y += serial.h
generic-y += shmbuf.h generic-y += shmbuf.h
generic-y += simd.h
generic-y += sizes.h generic-y += sizes.h
generic-y += socket.h generic-y += socket.h
generic-y += sockios.h generic-y += sockios.h
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#endif #endif
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/thread_info.h>
/* /*
* Stack pushing/popping (register pairs only). Equivalent to store decrement * Stack pushing/popping (register pairs only). Equivalent to store decrement
...@@ -68,23 +69,31 @@ ...@@ -68,23 +69,31 @@
msr daifclr, #8 msr daifclr, #8
.endm .endm
.macro disable_step, tmp .macro disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1 mrs \tmp, mdscr_el1
bic \tmp, \tmp, #1 bic \tmp, \tmp, #1
msr mdscr_el1, \tmp msr mdscr_el1, \tmp
isb // Synchronise with enable_dbg
9990:
.endm .endm
.macro enable_step, tmp .macro enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
disable_dbg
mrs \tmp, mdscr_el1 mrs \tmp, mdscr_el1
orr \tmp, \tmp, #1 orr \tmp, \tmp, #1
msr mdscr_el1, \tmp msr mdscr_el1, \tmp
9990:
.endm .endm
.macro enable_dbg_if_not_stepping, tmp /*
mrs \tmp, mdscr_el1 * Enable both debug exceptions and interrupts. This is likely to be
tbnz \tmp, #0, 9990f * faster than two daifclr operations, since writes to this register
enable_dbg * are self-synchronising.
9990: */
.macro enable_dbg_and_irq
msr daifclr, #(8 | 2)
.endm .endm
/* /*
......
...@@ -157,7 +157,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) ...@@ -157,7 +157,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/ */
#define ATOMIC64_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) }
#define atomic64_read(v) (*(volatile long long *)&(v)->counter) #define atomic64_read(v) (*(volatile long *)&(v)->counter)
#define atomic64_set(v,i) (((v)->counter) = (i)) #define atomic64_set(v,i) (((v)->counter) = (i))
static inline void atomic64_add(u64 i, atomic64_t *v) static inline void atomic64_add(u64 i, atomic64_t *v)
......
...@@ -25,12 +25,12 @@ ...@@ -25,12 +25,12 @@
#define wfi() asm volatile("wfi" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory")
#define isb() asm volatile("isb" : : : "memory") #define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb sy" : : : "memory") #define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb sy" : : : "memory") #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define mb() dsb() #define mb() dsb(sy)
#define rmb() asm volatile("dsb ld" : : : "memory") #define rmb() dsb(ld)
#define wmb() asm volatile("dsb st" : : : "memory") #define wmb() dsb(st)
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
#define smp_mb() barrier() #define smp_mb() barrier()
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#define smp_store_release(p, v) \ #define smp_store_release(p, v) \
do { \ do { \
compiletime_assert_atomic_type(*p); \ compiletime_assert_atomic_type(*p); \
smp_mb(); \ barrier(); \
ACCESS_ONCE(*p) = (v); \ ACCESS_ONCE(*p) = (v); \
} while (0) } while (0)
...@@ -48,15 +48,15 @@ do { \ ...@@ -48,15 +48,15 @@ do { \
({ \ ({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \ compiletime_assert_atomic_type(*p); \
smp_mb(); \ barrier(); \
___p1; \ ___p1; \
}) })
#else #else
#define smp_mb() asm volatile("dmb ish" : : : "memory") #define smp_mb() dmb(ish)
#define smp_rmb() asm volatile("dmb ishld" : : : "memory") #define smp_rmb() dmb(ishld)
#define smp_wmb() asm volatile("dmb ishst" : : : "memory") #define smp_wmb() dmb(ishst)
#define smp_store_release(p, v) \ #define smp_store_release(p, v) \
do { \ do { \
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#ifndef __ASM_CACHE_H #ifndef __ASM_CACHE_H
#define __ASM_CACHE_H #define __ASM_CACHE_H
#include <asm/cachetype.h>
#define L1_CACHE_SHIFT 6 #define L1_CACHE_SHIFT 6
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
...@@ -27,6 +29,15 @@ ...@@ -27,6 +29,15 @@
* the CPU. * the CPU.
*/ */
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES #define ARCH_DMA_MINALIGN L1_CACHE_BYTES
#define ARCH_SLAB_MINALIGN 8
#ifndef __ASSEMBLY__
static inline int cache_line_size(void)
{
u32 cwg = cache_type_cwg();
return cwg ? 4 << cwg : L1_CACHE_BYTES;
}
#endif /* __ASSEMBLY__ */
#endif #endif
...@@ -123,7 +123,7 @@ extern void flush_dcache_page(struct page *); ...@@ -123,7 +123,7 @@ extern void flush_dcache_page(struct page *);
static inline void __flush_icache_all(void) static inline void __flush_icache_all(void)
{ {
asm("ic ialluis"); asm("ic ialluis");
dsb(); dsb(ish);
} }
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) \
...@@ -150,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) ...@@ -150,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
* set_pte_at() called from vmap_pte_range() does not * set_pte_at() called from vmap_pte_range() does not
* have a DSB after cleaning the cache line. * have a DSB after cleaning the cache line.
*/ */
dsb(); dsb(ish);
} }
static inline void flush_cache_vunmap(unsigned long start, unsigned long end) static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
......
...@@ -20,12 +20,16 @@ ...@@ -20,12 +20,16 @@
#define CTR_L1IP_SHIFT 14 #define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3 #define CTR_L1IP_MASK 3
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
#define ICACHE_POLICY_RESERVED 0 #define ICACHE_POLICY_RESERVED 0
#define ICACHE_POLICY_AIVIVT 1 #define ICACHE_POLICY_AIVIVT 1
#define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_VIPT 2
#define ICACHE_POLICY_PIPT 3 #define ICACHE_POLICY_PIPT 3
#ifndef __ASSEMBLY__
static inline u32 icache_policy(void) static inline u32 icache_policy(void)
{ {
return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK;
...@@ -45,4 +49,11 @@ static inline int icache_is_aivivt(void) ...@@ -45,4 +49,11 @@ static inline int icache_is_aivivt(void)
return icache_policy() == ICACHE_POLICY_AIVIVT; return icache_policy() == ICACHE_POLICY_AIVIVT;
} }
static inline u32 cache_type_cwg(void)
{
return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
}
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CACHETYPE_H */ #endif /* __ASM_CACHETYPE_H */
...@@ -72,7 +72,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size ...@@ -72,7 +72,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
} }
#define xchg(ptr,x) \ #define xchg(ptr,x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) ({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
})
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size) unsigned long new, int size)
......
...@@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread) ...@@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
#else /* !CONFIG_COMPAT */ #else /* !CONFIG_COMPAT */
static inline int is_compat_task(void)
{
return 0;
}
static inline int is_compat_thread(struct thread_info *thread) static inline int is_compat_thread(struct thread_info *thread)
{ {
return 0; return 0;
......
...@@ -18,9 +18,11 @@ ...@@ -18,9 +18,11 @@
#ifndef __ASM_ESR_H #ifndef __ASM_ESR_H
#define __ASM_ESR_H #define __ASM_ESR_H
#define ESR_EL1_EC_SHIFT (26) #define ESR_EL1_WRITE (1 << 6)
#define ESR_EL1_IL (1U << 25) #define ESR_EL1_CM (1 << 8)
#define ESR_EL1_IL (1 << 25)
#define ESR_EL1_EC_SHIFT (26)
#define ESR_EL1_EC_UNKNOWN (0x00) #define ESR_EL1_EC_UNKNOWN (0x00)
#define ESR_EL1_EC_WFI (0x01) #define ESR_EL1_EC_WFI (0x01)
#define ESR_EL1_EC_CP15_32 (0x03) #define ESR_EL1_EC_CP15_32 (0x03)
......
...@@ -37,8 +37,21 @@ struct fpsimd_state { ...@@ -37,8 +37,21 @@ struct fpsimd_state {
u32 fpcr; u32 fpcr;
}; };
}; };
/* the id of the last cpu to have restored this state */
unsigned int cpu;
}; };
/*
* Struct for stacking the bottom 'n' FP/SIMD registers.
*/
struct fpsimd_partial_state {
u32 fpsr;
u32 fpcr;
u32 num_regs;
__uint128_t vregs[32];
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */ /* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f #define VFP_FPSCR_STAT_MASK 0xf800009f
...@@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state); ...@@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void); extern void fpsimd_flush_thread(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
#endif #endif
#endif #endif
...@@ -62,3 +62,38 @@ ...@@ -62,3 +62,38 @@
ldr w\tmpnr, [\state, #16 * 2 + 4] ldr w\tmpnr, [\state, #16 * 2 + 4]
msr fpcr, x\tmpnr msr fpcr, x\tmpnr
.endm .endm
.altmacro
.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
mrs x\tmpnr1, fpsr
str w\numnr, [\state, #8]
mrs x\tmpnr2, fpcr
stp w\tmpnr1, w\tmpnr2, [\state]
adr x\tmpnr1, 0f
add \state, \state, x\numnr, lsl #4
sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
br x\tmpnr1
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
.irp qb, %(qa + 1)
stp q\qa, q\qb, [\state, # -16 * \qa - 16]
.endr
.endr
0:
.endm
.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
ldp w\tmpnr1, w\tmpnr2, [\state]
msr fpsr, x\tmpnr1
msr fpcr, x\tmpnr2
adr x\tmpnr1, 0f
ldr w\tmpnr2, [\state, #8]
add \state, \state, x\tmpnr2, lsl #4
sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
br x\tmpnr1
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
.irp qb, %(qa + 1)
ldp q\qa, q\qb, [\state, # -16 * \qa - 16]
.endr
.endr
0:
.endm
/*
* arch/arm64/include/asm/ftrace.h
*
* Copyright (C) 2013 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __ASM_FTRACE_H
#define __ASM_FTRACE_H
#include <asm/insn.h>
#define MCOUNT_ADDR ((unsigned long)_mcount)
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
#ifndef __ASSEMBLY__
#include <linux/compat.h>
extern void _mcount(unsigned long);
extern void *return_address(unsigned int);
struct dyn_arch_ftrace {
/* No extra data needed for arm64 */
};
extern unsigned long ftrace_graph_call;
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
* addr is the address of the mcount call instruction.
* recordmcount does the necessary offset calculation.
*/
return addr;
}
#define ftrace_return_address(n) return_address(n)
/*
* Because AArch32 mode does not share the same syscall table with AArch64,
* tracing compat syscalls may result in reporting bogus syscalls or even
* hang-up, so just do not trace them.
* See kernel/trace/trace_syscalls.c
*
* x86 code says:
* If the user realy wants these, then they should use the
* raw syscall tracepoints with filtering.
*/
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
{
return is_compat_task();
}
#endif /* ifndef __ASSEMBLY__ */
#endif /* __ASM_FTRACE_H */
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <asm/irq.h> #include <asm/irq.h>
#define NR_IPI 5 #define NR_IPI 6
typedef struct { typedef struct {
unsigned int __softirq_pending; unsigned int __softirq_pending;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
/* A64 instructions are always 32 bits. */ /* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4 #define AARCH64_INSN_SIZE 4
#ifndef __ASSEMBLY__
/* /*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section C3.1 "A64 instruction index by encoding": * Section C3.1 "A64 instruction index by encoding":
...@@ -104,5 +105,6 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); ...@@ -104,5 +105,6 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_INSN_H */ #endif /* __ASM_INSN_H */
...@@ -230,19 +230,11 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot ...@@ -230,19 +230,11 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot
extern void __iounmap(volatile void __iomem *addr); extern void __iounmap(volatile void __iomem *addr);
extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
#define iounmap __iounmap #define iounmap __iounmap
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define ARCH_HAS_IOREMAP_WC #define ARCH_HAS_IOREMAP_WC
#include <asm-generic/iomap.h> #include <asm-generic/iomap.h>
......
...@@ -8,7 +8,11 @@ ...@@ -8,7 +8,11 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include <linux/types.h>
#define cpu_has_neon() (1) #define cpu_has_neon() (1)
void kernel_neon_begin(void); #define kernel_neon_begin() kernel_neon_begin_partial(32)
void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_end(void); void kernel_neon_end(void);
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
*/ */
#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
#define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0)
/* /*
* Level 2 descriptor (PMD). * Level 2 descriptor (PMD).
......
...@@ -52,66 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val); ...@@ -52,66 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#endif #endif
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
/* #ifdef CONFIG_SMP
* The pgprot_* and protection_map entries will be fixed up at runtime to #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
* include the cachable and bufferable bits based on memory policy, as well as #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
* any architecture dependent bits like global/ASID and SMP shared mapping #else
* bits. #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF)
*/ #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF #endif
extern pgprot_t pgprot_default; #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
#define __pgprot_modify(prot,mask,bits) \ #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
__pgprot((pgprot_val(prot) & ~(mask)) | (bits)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP) #define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#endif /* __ASSEMBLY__ */ #define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P000 __PAGE_NONE #define __P010 PAGE_COPY
#define __P001 __PAGE_READONLY #define __P011 PAGE_COPY
#define __P010 __PAGE_COPY #define __P100 PAGE_READONLY_EXEC
#define __P011 __PAGE_COPY #define __P101 PAGE_READONLY_EXEC
#define __P100 __PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC
#define __P101 __PAGE_READONLY_EXEC #define __P111 PAGE_COPY_EXEC
#define __P110 __PAGE_COPY_EXEC
#define __P111 __PAGE_COPY_EXEC #define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
#define __S000 __PAGE_NONE #define __S010 PAGE_SHARED
#define __S001 __PAGE_READONLY #define __S011 PAGE_SHARED
#define __S010 __PAGE_SHARED #define __S100 PAGE_READONLY_EXEC
#define __S011 __PAGE_SHARED #define __S101 PAGE_READONLY_EXEC
#define __S100 __PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC
#define __S101 __PAGE_READONLY_EXEC #define __S111 PAGE_SHARED_EXEC
#define __S110 __PAGE_SHARED_EXEC
#define __S111 __PAGE_SHARED_EXEC
#ifndef __ASSEMBLY__
/* /*
* ZERO_PAGE is a global shared page that is always zero: used * ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc.. * for zero-mapped memory areas etc..
...@@ -265,6 +258,7 @@ static inline pmd_t pte_pmd(pte_t pte) ...@@ -265,6 +258,7 @@ static inline pmd_t pte_pmd(pte_t pte)
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
...@@ -273,6 +267,9 @@ static inline int has_transparent_hugepage(void) ...@@ -273,6 +267,9 @@ static inline int has_transparent_hugepage(void)
return 1; return 1;
} }
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
/* /*
* Mark the prot value as uncacheable and unbufferable. * Mark the prot value as uncacheable and unbufferable.
*/ */
...@@ -295,11 +292,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, ...@@ -295,11 +292,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT) PMD_TYPE_SECT)
#ifdef ARM64_64K_PAGES
#define pud_sect(pud) (0)
#else
#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
PUD_TYPE_SECT)
#endif
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{ {
*pmdp = pmd; *pmdp = pmd;
dsb(); dsb(ishst);
} }
static inline void pmd_clear(pmd_t *pmdp) static inline void pmd_clear(pmd_t *pmdp)
...@@ -329,7 +332,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) ...@@ -329,7 +332,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
static inline void set_pud(pud_t *pudp, pud_t pud) static inline void set_pud(pud_t *pudp, pud_t pud)
{ {
*pudp = pud; *pudp = pud;
dsb(); dsb(ishst);
} }
static inline void pud_clear(pud_t *pudp) static inline void pud_clear(pud_t *pudp)
......
...@@ -79,6 +79,7 @@ struct thread_struct { ...@@ -79,6 +79,7 @@ struct thread_struct {
unsigned long tp_value; unsigned long tp_value;
struct fpsimd_state fpsimd_state; struct fpsimd_state fpsimd_state;
unsigned long fault_address; /* fault info */ unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */ struct debug_info debug; /* debugging */
}; };
......
...@@ -135,6 +135,11 @@ struct pt_regs { ...@@ -135,6 +135,11 @@ struct pt_regs {
#define user_stack_pointer(regs) \ #define user_stack_pointer(regs) \
(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
return regs->regs[0];
}
/* /*
* Are the current registers suitable for user mode? (used to maintain * Are the current registers suitable for user mode? (used to maintain
* security in signal handlers) * security in signal handlers)
......
/*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SIGCONTEXT_H
#define __ASM_SIGCONTEXT_H
#include <uapi/asm/sigcontext.h>
/*
* Auxiliary context saved in the sigcontext.__reserved array. Not exported to
* user space as it will change with the addition of new context. User space
* should check the magic/size information.
*/
struct aux_context {
struct fpsimd_context fpsimd;
/* additional context to be added before "end" */
struct _aarch64_ctx end;
};
#endif
...@@ -22,6 +22,18 @@ extern char *strrchr(const char *, int c); ...@@ -22,6 +22,18 @@ extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR #define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c); extern char *strchr(const char *, int c);
#define __HAVE_ARCH_STRCMP
extern int strcmp(const char *, const char *);
#define __HAVE_ARCH_STRNCMP
extern int strncmp(const char *, const char *, __kernel_size_t);
#define __HAVE_ARCH_STRLEN
extern __kernel_size_t strlen(const char *);
#define __HAVE_ARCH_STRNLEN
extern __kernel_size_t strnlen(const char *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t); extern void *memcpy(void *, const void *, __kernel_size_t);
...@@ -34,4 +46,7 @@ extern void *memchr(const void *, int, __kernel_size_t); ...@@ -34,4 +46,7 @@ extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t); extern void *memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t);
#endif #endif
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/err.h> #include <linux/err.h>
extern const void *sys_call_table[];
static inline int syscall_get_nr(struct task_struct *task, static inline int syscall_get_nr(struct task_struct *task,
struct pt_regs *regs) struct pt_regs *regs)
......
...@@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void) ...@@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void)
/* /*
* thread information flags: * thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active * TIF_SYSCALL_TRACE - syscall trace active
* TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
* TIF_SYSCALL_AUDIT - syscall auditing
* TIF_SECOMP - syscall secure computing
* TIF_SIGPENDING - signal pending * TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary * TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user * TIF_NOTIFY_RESUME - callback before returning to user
...@@ -99,7 +102,11 @@ static inline struct thread_info *current_thread_info(void) ...@@ -99,7 +102,11 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SIGPENDING 0 #define TIF_SIGPENDING 0
#define TIF_NEED_RESCHED 1 #define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_SECCOMP 11
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19 #define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20 #define TIF_RESTORE_SIGMASK 20
...@@ -110,10 +117,18 @@ static inline struct thread_info *current_thread_info(void) ...@@ -110,10 +117,18 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME) _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */ #endif /* __ASM_THREAD_INFO_H */
...@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb; ...@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
*/ */
static inline void flush_tlb_all(void) static inline void flush_tlb_all(void)
{ {
dsb(); dsb(ishst);
asm("tlbi vmalle1is"); asm("tlbi vmalle1is");
dsb(); dsb(ish);
isb(); isb();
} }
...@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm) ...@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
{ {
unsigned long asid = (unsigned long)ASID(mm) << 48; unsigned long asid = (unsigned long)ASID(mm) << 48;
dsb(); dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid)); asm("tlbi aside1is, %0" : : "r" (asid));
dsb(); dsb(ish);
} }
static inline void flush_tlb_page(struct vm_area_struct *vma, static inline void flush_tlb_page(struct vm_area_struct *vma,
...@@ -93,16 +93,36 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, ...@@ -93,16 +93,36 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr = uaddr >> 12 | unsigned long addr = uaddr >> 12 |
((unsigned long)ASID(vma->vm_mm) << 48); ((unsigned long)ASID(vma->vm_mm) << 48);
dsb(); dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr)); asm("tlbi vae1is, %0" : : "r" (addr));
dsb(); dsb(ish);
} }
/* static inline void flush_tlb_range(struct vm_area_struct *vma,
* Convert calls to our calling convention. unsigned long start, unsigned long end)
*/ {
#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma) unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e) unsigned long addr;
start = asid | (start >> 12);
end = asid | (end >> 12);
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
asm("tlbi vae1is, %0" : : "r"(addr));
dsb(ish);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
unsigned long addr;
start >>= 12;
end >>= 12;
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
asm("tlbi vaae1is, %0" : : "r"(addr));
dsb(ish);
}
/* /*
* On AArch64, the cache coherency is handled via the set_pte_at() function. * On AArch64, the cache coherency is handled via the set_pte_at() function.
...@@ -114,7 +134,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, ...@@ -114,7 +134,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
* set_pte() does not have a DSB, so make sure that the page table * set_pte() does not have a DSB, so make sure that the page table
* write is visible. * write is visible.
*/ */
dsb(); dsb(ishst);
} }
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
......
...@@ -20,9 +20,6 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; ...@@ -20,9 +20,6 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) #define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
#define mc_capable() (cpu_topology[0].cluster_id != -1)
#define smt_capable() (cpu_topology[0].thread_id != -1)
void init_cpu_topology(void); void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid); void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu); const struct cpumask *cpu_coregroup_mask(int cpu);
......
...@@ -29,3 +29,5 @@ ...@@ -29,3 +29,5 @@
#endif #endif
#define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h> #include <uapi/asm/unistd.h>
#define NR_syscalls (__NR_syscalls)
...@@ -53,5 +53,12 @@ struct fpsimd_context { ...@@ -53,5 +53,12 @@ struct fpsimd_context {
__uint128_t vregs[32]; __uint128_t vregs[32];
}; };
/* ESR_EL1 context */
#define ESR_MAGIC 0x45535201
struct esr_context {
struct _aarch64_ctx head;
u64 esr;
};
#endif /* _UAPI__ASM_SIGCONTEXT_H */ #endif /* _UAPI__ASM_SIGCONTEXT_H */
...@@ -7,14 +7,19 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) ...@@ -7,14 +7,19 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \
-I$(src)/../../../scripts/dtc/libfdt -I$(src)/../../../scripts/dtc/libfdt
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
# Object file lists. # Object file lists.
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \
hyp-stub.o psci.o cpu_ops.o insn.o hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o sys_compat.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
......
...@@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr); ...@@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr);
/* string / mem functions */ /* string / mem functions */
EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strcmp);
EXPORT_SYMBOL(strncmp);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(memcmp);
/* atomic bitops */ /* atomic bitops */
EXPORT_SYMBOL(set_bit); EXPORT_SYMBOL(set_bit);
...@@ -56,3 +61,7 @@ EXPORT_SYMBOL(clear_bit); ...@@ -56,3 +61,7 @@ EXPORT_SYMBOL(clear_bit);
EXPORT_SYMBOL(test_and_clear_bit); EXPORT_SYMBOL(test_and_clear_bit);
EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(change_bit);
EXPORT_SYMBOL(test_and_change_bit); EXPORT_SYMBOL(test_and_change_bit);
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
#endif
...@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) ...@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8 fpsimd_restore x0, 8
ret ret
ENDPROC(fpsimd_load_state) ENDPROC(fpsimd_load_state)
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Save the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_save_partial_state)
fpsimd_save_partial x0, 1, 8, 9
ret
ENDPROC(fpsimd_load_partial_state)
/*
* Load the bottom n FP registers.
*
* x0 - pointer to struct fpsimd_partial_state
*/
ENTRY(fpsimd_load_partial_state)
fpsimd_restore_partial x0, 8, 9
ret
ENDPROC(fpsimd_load_partial_state)
#endif
/*
* arch/arm64/kernel/entry-ftrace.S
*
* Copyright (C) 2013 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
/*
* Gcc with -pg will put the following code in the beginning of each function:
* mov x0, x30
* bl _mcount
* [function's body ...]
* "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
* ftrace is enabled.
*
* Please note that x0 as an argument will not be used here because we can
* get lr(x30) of instrumented function at any time by winding up call stack
* as long as the kernel is compiled without -fomit-frame-pointer.
* (or CONFIG_FRAME_POINTER, this is forced on arm64)
*
* stack layout after mcount_enter in _mcount():
*
* current sp/fp => 0:+-----+
* in _mcount() | x29 | -> instrumented function's fp
* +-----+
* | x30 | -> _mcount()'s lr (= instrumented function's pc)
* old sp => +16:+-----+
* when instrumented | |
* function calls | ... |
* _mcount() | |
* | |
* instrumented => +xx:+-----+
* function's fp | x29 | -> parent's fp
* +-----+
* | x30 | -> instrumented function's lr (= parent's pc)
* +-----+
* | ... |
*/
.macro mcount_enter
stp x29, x30, [sp, #-16]!
mov x29, sp
.endm
.macro mcount_exit
ldp x29, x30, [sp], #16
ret
.endm
.macro mcount_adjust_addr rd, rn
sub \rd, \rn, #AARCH64_INSN_SIZE
.endm
/* for instrumented function's parent */
.macro mcount_get_parent_fp reg
ldr \reg, [x29]
ldr \reg, [\reg]
.endm
/* for instrumented function */
.macro mcount_get_pc0 reg
mcount_adjust_addr \reg, x30
.endm
.macro mcount_get_pc reg
ldr \reg, [x29, #8]
mcount_adjust_addr \reg, \reg
.endm
.macro mcount_get_lr reg
ldr \reg, [x29]
ldr \reg, [\reg, #8]
mcount_adjust_addr \reg, \reg
.endm
.macro mcount_get_lr_addr reg
ldr \reg, [x29]
add \reg, \reg, #8
.endm
#ifndef CONFIG_DYNAMIC_FTRACE
/*
* void _mcount(unsigned long return_address)
* @return_address: return address to instrumented function
*
* This function makes calls, if enabled, to:
* - tracer function to probe instrumented function's entry,
* - ftrace_graph_caller to set up an exit hook
*/
ENTRY(_mcount)
#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
ldr x0, =ftrace_trace_stop
ldr x0, [x0] // if ftrace_trace_stop
ret // return;
#endif
mcount_enter
ldr x0, =ftrace_trace_function
ldr x2, [x0]
adr x0, ftrace_stub
cmp x0, x2 // if (ftrace_trace_function
b.eq skip_ftrace_call // != ftrace_stub) {
mcount_get_pc x0 // function's pc
mcount_get_lr x1 // function's lr (= parent's pc)
blr x2 // (*ftrace_trace_function)(pc, lr);
#ifndef CONFIG_FUNCTION_GRAPH_TRACER
skip_ftrace_call: // return;
mcount_exit // }
#else
mcount_exit // return;
// }
skip_ftrace_call:
ldr x1, =ftrace_graph_return
ldr x2, [x1] // if ((ftrace_graph_return
cmp x0, x2 // != ftrace_stub)
b.ne ftrace_graph_caller
ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry
ldr x2, [x1] // != ftrace_graph_entry_stub))
ldr x0, =ftrace_graph_entry_stub
cmp x0, x2
b.ne ftrace_graph_caller // ftrace_graph_caller();
mcount_exit
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
ENDPROC(_mcount)
#else /* CONFIG_DYNAMIC_FTRACE */
/*
* _mcount() is used to build the kernel with -pg option, but all the branch
* instructions to _mcount() are replaced to NOP initially at kernel start up,
* and later on, NOP to branch to ftrace_caller() when enabled or branch to
* NOP when disabled per-function base.
*/
ENTRY(_mcount)
ret
ENDPROC(_mcount)
/*
* void ftrace_caller(unsigned long return_address)
* @return_address: return address to instrumented function
*
* This function is a counterpart of _mcount() in 'static' ftrace, and
* makes calls to:
* - tracer function to probe instrumented function's entry,
* - ftrace_graph_caller to set up an exit hook
*/
ENTRY(ftrace_caller)
mcount_enter
mcount_get_pc0 x0 // function's pc
mcount_get_lr x1 // function's lr
.global ftrace_call
ftrace_call: // tracer(pc, lr);
nop // This will be replaced with "bl xxx"
// where xxx can be any kind of tracer.
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.global ftrace_graph_call
ftrace_graph_call: // ftrace_graph_caller();
nop // If enabled, this will be replaced
// "b ftrace_graph_caller"
#endif
mcount_exit
ENDPROC(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE */
ENTRY(ftrace_stub)
ret
ENDPROC(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void ftrace_graph_caller(void)
*
* Called from _mcount() or ftrace_caller() when function_graph tracer is
* selected.
* This function w/ prepare_ftrace_return() fakes link register's value on
* the call stack in order to intercept instrumented function's return path
* and run return_to_handler() later on its exit.
*/
ENTRY(ftrace_graph_caller)
mcount_get_lr_addr x0 // pointer to function's saved lr
mcount_get_pc x1 // function's pc
mcount_get_parent_fp x2 // parent's fp
bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
mcount_exit
ENDPROC(ftrace_graph_caller)
/*
* void return_to_handler(void)
*
* Run ftrace_return_to_handler() before going back to parent.
* @fp is checked against the value passed by ftrace_graph_caller()
* only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
*/
ENTRY(return_to_handler)
str x0, [sp, #-16]!
mov x0, x29 // parent's fp
bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
mov x30, x0 // restore the original return address
ldr x0, [sp], #16
ret
END(return_to_handler)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
...@@ -60,6 +60,9 @@ ...@@ -60,6 +60,9 @@
push x0, x1 push x0, x1
.if \el == 0 .if \el == 0
mrs x21, sp_el0 mrs x21, sp_el0
get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
.else .else
add x21, sp, #S_FRAME_SIZE add x21, sp, #S_FRAME_SIZE
.endif .endif
...@@ -259,7 +262,7 @@ el1_da: ...@@ -259,7 +262,7 @@ el1_da:
* Data abort handling * Data abort handling
*/ */
mrs x0, far_el1 mrs x0, far_el1
enable_dbg_if_not_stepping x2 enable_dbg
// re-enable interrupts if they were enabled in the aborted context // re-enable interrupts if they were enabled in the aborted context
tbnz x23, #7, 1f // PSR_I_BIT tbnz x23, #7, 1f // PSR_I_BIT
enable_irq enable_irq
...@@ -275,6 +278,7 @@ el1_sp_pc: ...@@ -275,6 +278,7 @@ el1_sp_pc:
* Stack or PC alignment exception handling * Stack or PC alignment exception handling
*/ */
mrs x0, far_el1 mrs x0, far_el1
enable_dbg
mov x1, x25 mov x1, x25
mov x2, sp mov x2, sp
b do_sp_pc_abort b do_sp_pc_abort
...@@ -282,6 +286,7 @@ el1_undef: ...@@ -282,6 +286,7 @@ el1_undef:
/* /*
* Undefined instruction * Undefined instruction
*/ */
enable_dbg
mov x0, sp mov x0, sp
b do_undefinstr b do_undefinstr
el1_dbg: el1_dbg:
...@@ -294,10 +299,11 @@ el1_dbg: ...@@ -294,10 +299,11 @@ el1_dbg:
mrs x0, far_el1 mrs x0, far_el1
mov x2, sp // struct pt_regs mov x2, sp // struct pt_regs
bl do_debug_exception bl do_debug_exception
enable_dbg
kernel_exit 1 kernel_exit 1
el1_inv: el1_inv:
// TODO: add support for undefined instructions in kernel mode // TODO: add support for undefined instructions in kernel mode
enable_dbg
mov x0, sp mov x0, sp
mov x1, #BAD_SYNC mov x1, #BAD_SYNC
mrs x2, esr_el1 mrs x2, esr_el1
...@@ -307,7 +313,7 @@ ENDPROC(el1_sync) ...@@ -307,7 +313,7 @@ ENDPROC(el1_sync)
.align 6 .align 6
el1_irq: el1_irq:
kernel_entry 1 kernel_entry 1
enable_dbg_if_not_stepping x0 enable_dbg
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off bl trace_hardirqs_off
#endif #endif
...@@ -332,8 +338,7 @@ ENDPROC(el1_irq) ...@@ -332,8 +338,7 @@ ENDPROC(el1_irq)
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
el1_preempt: el1_preempt:
mov x24, lr mov x24, lr
1: enable_dbg 1: bl preempt_schedule_irq // irq en/disable is done inside
bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24 ret x24
...@@ -349,7 +354,7 @@ el0_sync: ...@@ -349,7 +354,7 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state
b.eq el0_svc b.eq el0_svc
adr lr, ret_from_exception adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
...@@ -378,7 +383,7 @@ el0_sync_compat: ...@@ -378,7 +383,7 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state
b.eq el0_svc_compat b.eq el0_svc_compat
adr lr, ret_from_exception adr lr, ret_to_user
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
...@@ -423,11 +428,8 @@ el0_da: ...@@ -423,11 +428,8 @@ el0_da:
*/ */
mrs x0, far_el1 mrs x0, far_el1
bic x0, x0, #(0xff << 56) bic x0, x0, #(0xff << 56)
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler // enable interrupts before calling the main handler
enable_irq enable_dbg_and_irq
mov x1, x25 mov x1, x25
mov x2, sp mov x2, sp
b do_mem_abort b do_mem_abort
...@@ -436,11 +438,8 @@ el0_ia: ...@@ -436,11 +438,8 @@ el0_ia:
* Instruction abort handling * Instruction abort handling
*/ */
mrs x0, far_el1 mrs x0, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler // enable interrupts before calling the main handler
enable_irq enable_dbg_and_irq
orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
mov x2, sp mov x2, sp
b do_mem_abort b do_mem_abort
...@@ -448,6 +447,7 @@ el0_fpsimd_acc: ...@@ -448,6 +447,7 @@ el0_fpsimd_acc:
/* /*
* Floating Point or Advanced SIMD access * Floating Point or Advanced SIMD access
*/ */
enable_dbg
mov x0, x25 mov x0, x25
mov x1, sp mov x1, sp
b do_fpsimd_acc b do_fpsimd_acc
...@@ -455,6 +455,7 @@ el0_fpsimd_exc: ...@@ -455,6 +455,7 @@ el0_fpsimd_exc:
/* /*
* Floating Point or Advanced SIMD exception * Floating Point or Advanced SIMD exception
*/ */
enable_dbg
mov x0, x25 mov x0, x25
mov x1, sp mov x1, sp
b do_fpsimd_exc b do_fpsimd_exc
...@@ -463,11 +464,8 @@ el0_sp_pc: ...@@ -463,11 +464,8 @@ el0_sp_pc:
* Stack or PC alignment exception handling * Stack or PC alignment exception handling
*/ */
mrs x0, far_el1 mrs x0, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler // enable interrupts before calling the main handler
enable_irq enable_dbg_and_irq
mov x1, x25 mov x1, x25
mov x2, sp mov x2, sp
b do_sp_pc_abort b do_sp_pc_abort
...@@ -475,9 +473,9 @@ el0_undef: ...@@ -475,9 +473,9 @@ el0_undef:
/* /*
* Undefined instruction * Undefined instruction
*/ */
mov x0, sp
// enable interrupts before calling the main handler // enable interrupts before calling the main handler
enable_irq enable_dbg_and_irq
mov x0, sp
b do_undefinstr b do_undefinstr
el0_dbg: el0_dbg:
/* /*
...@@ -485,11 +483,13 @@ el0_dbg: ...@@ -485,11 +483,13 @@ el0_dbg:
*/ */
tbnz x24, #0, el0_inv // EL0 only tbnz x24, #0, el0_inv // EL0 only
mrs x0, far_el1 mrs x0, far_el1
disable_step x1
mov x1, x25 mov x1, x25
mov x2, sp mov x2, sp
b do_debug_exception bl do_debug_exception
enable_dbg
b ret_to_user
el0_inv: el0_inv:
enable_dbg
mov x0, sp mov x0, sp
mov x1, #BAD_SYNC mov x1, #BAD_SYNC
mrs x2, esr_el1 mrs x2, esr_el1
...@@ -500,15 +500,12 @@ ENDPROC(el0_sync) ...@@ -500,15 +500,12 @@ ENDPROC(el0_sync)
el0_irq: el0_irq:
kernel_entry 0 kernel_entry 0
el0_irq_naked: el0_irq_naked:
disable_step x1
isb
enable_dbg enable_dbg
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off bl trace_hardirqs_off
#endif #endif
irq_handler irq_handler
get_thread_info tsk
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on bl trace_hardirqs_on
...@@ -516,14 +513,6 @@ el0_irq_naked: ...@@ -516,14 +513,6 @@ el0_irq_naked:
b ret_to_user b ret_to_user
ENDPROC(el0_irq) ENDPROC(el0_irq)
/*
* This is the return code to user mode for abort handlers
*/
ret_from_exception:
get_thread_info tsk
b ret_to_user
ENDPROC(ret_from_exception)
/* /*
* Register switch for AArch64. The callee-saved registers need to be saved * Register switch for AArch64. The callee-saved registers need to be saved
* and restored. On entry: * and restored. On entry:
...@@ -563,10 +552,7 @@ ret_fast_syscall: ...@@ -563,10 +552,7 @@ ret_fast_syscall:
ldr x1, [tsk, #TI_FLAGS] ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK and x2, x1, #_TIF_WORK_MASK
cbnz x2, fast_work_pending cbnz x2, fast_work_pending
tbz x1, #TIF_SINGLESTEP, fast_exit enable_step_tsk x1, x2
disable_dbg
enable_step x2
fast_exit:
kernel_exit 0, ret = 1 kernel_exit 0, ret = 1
/* /*
...@@ -576,7 +562,7 @@ fast_work_pending: ...@@ -576,7 +562,7 @@ fast_work_pending:
str x0, [sp, #S_X0] // returned x0 str x0, [sp, #S_X0] // returned x0
work_pending: work_pending:
tbnz x1, #TIF_NEED_RESCHED, work_resched tbnz x1, #TIF_NEED_RESCHED, work_resched
/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
ldr x2, [sp, #S_PSTATE] ldr x2, [sp, #S_PSTATE]
mov x0, sp // 'regs' mov x0, sp // 'regs'
tst x2, #PSR_MODE_MASK // user mode regs? tst x2, #PSR_MODE_MASK // user mode regs?
...@@ -585,7 +571,6 @@ work_pending: ...@@ -585,7 +571,6 @@ work_pending:
bl do_notify_resume bl do_notify_resume
b ret_to_user b ret_to_user
work_resched: work_resched:
enable_dbg
bl schedule bl schedule
/* /*
...@@ -596,9 +581,7 @@ ret_to_user: ...@@ -596,9 +581,7 @@ ret_to_user:
ldr x1, [tsk, #TI_FLAGS] ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending cbnz x2, work_pending
tbz x1, #TIF_SINGLESTEP, no_work_pending enable_step_tsk x1, x2
disable_dbg
enable_step x2
no_work_pending: no_work_pending:
kernel_exit 0, ret = 0 kernel_exit 0, ret = 0
ENDPROC(ret_to_user) ENDPROC(ret_to_user)
...@@ -625,14 +608,11 @@ el0_svc: ...@@ -625,14 +608,11 @@ el0_svc:
mov sc_nr, #__NR_syscalls mov sc_nr, #__NR_syscalls
el0_svc_naked: // compat entry point el0_svc_naked: // compat entry point
stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
disable_step x16 enable_dbg_and_irq
isb
enable_dbg
enable_irq
get_thread_info tsk ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing tst x16, #_TIF_SYSCALL_WORK
tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? b.ne __sys_trace
adr lr, ret_fast_syscall // return address adr lr, ret_fast_syscall // return address
cmp scno, sc_nr // check upper syscall limit cmp scno, sc_nr // check upper syscall limit
b.hs ni_sys b.hs ni_sys
...@@ -648,9 +628,8 @@ ENDPROC(el0_svc) ...@@ -648,9 +628,8 @@ ENDPROC(el0_svc)
* switches, and waiting for our parent to respond. * switches, and waiting for our parent to respond.
*/ */
__sys_trace: __sys_trace:
mov x1, sp mov x0, sp
mov w0, #0 // trace entry bl syscall_trace_enter
bl syscall_trace
adr lr, __sys_trace_return // return address adr lr, __sys_trace_return // return address
uxtw scno, w0 // syscall number (possibly new) uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs mov x1, sp // pointer to regs
...@@ -665,9 +644,8 @@ __sys_trace: ...@@ -665,9 +644,8 @@ __sys_trace:
__sys_trace_return: __sys_trace_return:
str x0, [sp] // save returned x0 str x0, [sp] // save returned x0
mov x1, sp mov x0, sp
mov w0, #1 // trace exit bl syscall_trace_exit
bl syscall_trace
b ret_to_user b ret_to_user
/* /*
......
...@@ -34,6 +34,60 @@ ...@@ -34,6 +34,60 @@
#define FPEXC_IXF (1 << 4) #define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7) #define FPEXC_IDF (1 << 7)
/*
* In order to reduce the number of times the FPSIMD state is needlessly saved
* and restored, we need to keep track of two things:
* (a) for each task, we need to remember which CPU was the last one to have
* the task's FPSIMD state loaded into its FPSIMD registers;
* (b) for each CPU, we need to remember which task's userland FPSIMD state has
* been loaded into its FPSIMD registers most recently, or whether it has
* been used to perform kernel mode NEON in the meantime.
*
* For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
* the id of the current CPU everytime the state is loaded onto a CPU. For (b),
* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
* address of the userland FPSIMD state of the task that was loaded onto the CPU
* the most recently, or NULL if kernel mode NEON has been performed after that.
*
* With this in place, we no longer have to restore the next FPSIMD state right
* when switching between tasks. Instead, we can defer this check to userland
* resume, at which time we verify whether the CPU's fpsimd_last_state and the
* task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
* can omit the FPSIMD restore.
*
* As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
* indicate whether or not the userland FPSIMD state of the current task is
* present in the registers. The flag is set unless the FPSIMD registers of this
* CPU currently contain the most recent userland FPSIMD state of the current
* task.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
* variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
* cleared, otherwise it is set;
*
* - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
* userland FPSIMD state is copied from memory to the registers, the task's
* fpsimd_state.cpu field is set to the id of the current CPU, the current
* CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
* TIF_FOREIGN_FPSTATE flag is cleared;
*
* - the task executes an ordinary syscall; upon return to userland, the
* TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
* restored;
*
* - the task executes a syscall which executes some NEON instructions; this is
* preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
* register contents to memory, clears the fpsimd_last_state per-cpu variable
* and sets the TIF_FOREIGN_FPSTATE flag;
*
* - the task gets preempted after kernel_neon_end() is called; as we have not
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
/* /*
* Trapped FP/ASIMD access. * Trapped FP/ASIMD access.
*/ */
...@@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) ...@@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next) void fpsimd_thread_switch(struct task_struct *next)
{ {
/* check if not kernel threads */ /*
if (current->mm) * Save the current FPSIMD state to memory, but only if whatever is in
* the registers is in fact the most recent userland FPSIMD state of
* 'current'.
*/
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state); fpsimd_save_state(&current->thread.fpsimd_state);
if (next->mm)
fpsimd_load_state(&next->thread.fpsimd_state); if (next->mm) {
/*
* If we are switching to a task whose most recent userland
* FPSIMD state is already in the registers of *this* cpu,
* we can skip loading the state from memory. Otherwise, set
* the TIF_FOREIGN_FPSTATE flag so the state will be loaded
* upon the next return to userland.
*/
struct fpsimd_state *st = &next->thread.fpsimd_state;
if (__this_cpu_read(fpsimd_last_state) == st
&& st->cpu == smp_processor_id())
clear_ti_thread_flag(task_thread_info(next),
TIF_FOREIGN_FPSTATE);
else
set_ti_thread_flag(task_thread_info(next),
TIF_FOREIGN_FPSTATE);
}
} }
void fpsimd_flush_thread(void) void fpsimd_flush_thread(void)
{ {
preempt_disable();
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_load_state(&current->thread.fpsimd_state); set_thread_flag(TIF_FOREIGN_FPSTATE);
}
/*
* Save the userland FPSIMD state of 'current' to memory, but only if the state
* currently held in the registers does in fact belong to 'current'
*/
void fpsimd_preserve_current_state(void)
{
preempt_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
preempt_enable();
}
/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
* state of 'current'
*/
void fpsimd_restore_current_state(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
fpsimd_load_state(st);
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
preempt_enable();
}
/*
* Load an updated userland FPSIMD state for 'current' from memory and set the
* flag that indicates that the FPSIMD register contents are the most recent
* FPSIMD state of 'current'
*/
void fpsimd_update_current_state(struct fpsimd_state *state)
{
preempt_disable();
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
preempt_enable(); preempt_enable();
} }
/*
* Invalidate live CPU copies of task t's FPSIMD state
*/
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_state.cpu = NR_CPUS;
}
#ifdef CONFIG_KERNEL_MODE_NEON #ifdef CONFIG_KERNEL_MODE_NEON
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
/* /*
* Kernel-side NEON support functions * Kernel-side NEON support functions
*/ */
void kernel_neon_begin(void) void kernel_neon_begin_partial(u32 num_regs)
{ {
/* Avoid using the NEON in interrupt context */ if (in_interrupt()) {
BUG_ON(in_interrupt()); struct fpsimd_partial_state *s = this_cpu_ptr(
preempt_disable(); in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
if (current->mm) BUG_ON(num_regs > 32);
fpsimd_save_state(&current->thread.fpsimd_state); fpsimd_save_partial_state(s, roundup(num_regs, 2));
} else {
/*
* Save the userland FPSIMD state if we have one and if we
* haven't done so already. Clear fpsimd_last_state to indicate
* that there is no longer userland FPSIMD state in the
* registers.
*/
preempt_disable();
if (current->mm &&
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
this_cpu_write(fpsimd_last_state, NULL);
}
} }
EXPORT_SYMBOL(kernel_neon_begin); EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void) void kernel_neon_end(void)
{ {
if (current->mm) if (in_interrupt()) {
fpsimd_load_state(&current->thread.fpsimd_state); struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
preempt_enable(); fpsimd_load_partial_state(s);
} else {
preempt_enable();
}
} }
EXPORT_SYMBOL(kernel_neon_end); EXPORT_SYMBOL(kernel_neon_end);
...@@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, ...@@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{ {
switch (cmd) { switch (cmd) {
case CPU_PM_ENTER: case CPU_PM_ENTER:
if (current->mm) if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state); fpsimd_save_state(&current->thread.fpsimd_state);
break; break;
case CPU_PM_EXIT: case CPU_PM_EXIT:
if (current->mm) if (current->mm)
fpsimd_load_state(&current->thread.fpsimd_state); set_thread_flag(TIF_FOREIGN_FPSTATE);
break; break;
case CPU_PM_ENTER_FAILED: case CPU_PM_ENTER_FAILED:
default: default:
......
This diff is collapsed.
...@@ -342,11 +342,9 @@ ENTRY(set_cpu_boot_mode_flag) ...@@ -342,11 +342,9 @@ ENTRY(set_cpu_boot_mode_flag)
cmp w20, #BOOT_CPU_MODE_EL2 cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f b.ne 1f
add x1, x1, #4 add x1, x1, #4
1: dc cvac, x1 // Clean potentially dirty cache line 1: str w20, [x1] // This CPU has booted in EL1
dsb sy dmb sy
str w20, [x1] // This CPU has booted in EL1 dc ivac, x1 // Invalidate potentially stale cache line
dc civac, x1 // Clean&invalidate potentially stale cache line
dsb sy
ret ret
ENDPROC(set_cpu_boot_mode_flag) ENDPROC(set_cpu_boot_mode_flag)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#define pr_fmt(fmt) "hw-breakpoint: " fmt #define pr_fmt(fmt) "hw-breakpoint: " fmt
#include <linux/compat.h>
#include <linux/cpu_pm.h> #include <linux/cpu_pm.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/hw_breakpoint.h> #include <linux/hw_breakpoint.h>
...@@ -27,7 +28,6 @@ ...@@ -27,7 +28,6 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/compat.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/debug-monitors.h> #include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h> #include <asm/hw_breakpoint.h>
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* ldp x29, x30, [sp] * ldp x29, x30, [sp]
* add sp, sp, #0x10 * add sp, sp, #0x10
*/ */
int unwind_frame(struct stackframe *frame) int notrace unwind_frame(struct stackframe *frame)
{ {
unsigned long high, low; unsigned long high, low;
unsigned long fp = frame->fp; unsigned long fp = frame->fp;
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <linux/clockchips.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
...@@ -69,6 +70,8 @@ void __init time_init(void) ...@@ -69,6 +70,8 @@ void __init time_init(void)
of_clk_init(NULL); of_clk_init(NULL);
clocksource_of_init(); clocksource_of_init();
tick_setup_hrtimer_broadcast();
arch_timer_rate = arch_timer_get_rate(); arch_timer_rate = arch_timer_get_rate();
if (!arch_timer_rate) if (!arch_timer_rate)
panic("Unable to initialise architected timer.\n"); panic("Unable to initialise architected timer.\n");
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment