Commit da7bc9c5 authored by Dan Williams's avatar Dan Williams Committed by Ingo Molnar

x86/asm/memcpy_mcsafe: Remove loop unrolling

In preparation for teaching memcpy_mcsafe() to return 'bytes remaining'
rather than pass / fail, simplify the implementation to remove loop
unrolling. The unrolling complicates the fault handling for negligible
benefit given modern CPUs perform loop stream detection.
Suggested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: hch@lst.de
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-nvdimm@lists.01.org
Link: http://lkml.kernel.org/r/152539237092.31796.9115692316555638048.stgit@dwillia2-desk3.amr.corp.intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 67b8d5c7
...@@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct); ...@@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct);
#endif #endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1 #define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); __must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key); DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/** /**
...@@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) ...@@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{ {
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key)) if (static_branch_unlikely(&mcsafe_key))
return memcpy_mcsafe_unrolled(dst, src, cnt); return __memcpy_mcsafe(dst, src, cnt);
else else
#endif #endif
memcpy(dst, src, cnt); memcpy(dst, src, cnt);
......
...@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) ...@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML #ifndef CONFIG_UML
/* /*
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling * __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses. * Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks. * Writes to target are posted and don't generate machine checks.
*/ */
ENTRY(memcpy_mcsafe_unrolled) ENTRY(__memcpy_mcsafe)
cmpl $8, %edx cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */ /* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words jb .L_no_whole_words
...@@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled) ...@@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled)
jnz .L_copy_leading_bytes jnz .L_copy_leading_bytes
.L_8byte_aligned: .L_8byte_aligned:
/* Figure out how many whole cache lines (64-bytes) to copy */
movl %edx, %ecx
andl $63, %edx
shrl $6, %ecx
jz .L_no_whole_cache_lines
/* Loop copying whole cache lines */
.L_cache_w0: movq (%rsi), %r8
.L_cache_w1: movq 1*8(%rsi), %r9
.L_cache_w2: movq 2*8(%rsi), %r10
.L_cache_w3: movq 3*8(%rsi), %r11
movq %r8, (%rdi)
movq %r9, 1*8(%rdi)
movq %r10, 2*8(%rdi)
movq %r11, 3*8(%rdi)
.L_cache_w4: movq 4*8(%rsi), %r8
.L_cache_w5: movq 5*8(%rsi), %r9
.L_cache_w6: movq 6*8(%rsi), %r10
.L_cache_w7: movq 7*8(%rsi), %r11
movq %r8, 4*8(%rdi)
movq %r9, 5*8(%rdi)
movq %r10, 6*8(%rdi)
movq %r11, 7*8(%rdi)
leaq 64(%rsi), %rsi
leaq 64(%rdi), %rdi
decl %ecx
jnz .L_cache_w0
/* Are there any trailing 8-byte words? */
.L_no_whole_cache_lines:
movl %edx, %ecx movl %edx, %ecx
andl $7, %edx andl $7, %edx
shrl $3, %ecx shrl $3, %ecx
jz .L_no_whole_words jz .L_no_whole_words
/* Copy trailing words */ .L_copy_words:
.L_copy_trailing_words:
movq (%rsi), %r8 movq (%rsi), %r8
mov %r8, (%rdi) movq %r8, (%rdi)
leaq 8(%rsi), %rsi addq $8, %rsi
leaq 8(%rdi), %rdi addq $8, %rdi
decl %ecx decl %ecx
jnz .L_copy_trailing_words jnz .L_copy_words
/* Any trailing bytes? */ /* Any trailing bytes? */
.L_no_whole_words: .L_no_whole_words:
...@@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled) ...@@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled)
.L_done_memcpy_trap: .L_done_memcpy_trap:
xorq %rax, %rax xorq %rax, %rax
ret ret
ENDPROC(memcpy_mcsafe_unrolled) ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax" .section .fixup, "ax"
/* Return -EFAULT for any failure */ /* Return -EFAULT for any failure */
...@@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) ...@@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
.previous .previous
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment