Commit 68674f94 authored by Linus Torvalds's avatar Linus Torvalds

x86: don't use REP_GOOD or ERMS for small memory copies

The modern target to use is FSRM (Fast Short REP MOVS), and the other
cases should only be used for bigger areas (ie mainly things like page
copying and clearing).
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6a8f57ae
...@@ -10,13 +10,6 @@ ...@@ -10,13 +10,6 @@
.section .noinstr.text, "ax" .section .noinstr.text, "ax"
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
/* /*
* memcpy - Copy a memory block. * memcpy - Copy a memory block.
* *
...@@ -27,17 +20,21 @@ ...@@ -27,17 +20,21 @@
* *
* Output: * Output:
* rax original destination * rax original destination
*
* The FSRM alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep movsb' itself is small enough to replace the call, but the
* two register moves blow up the code. And one of them is "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/ */
SYM_TYPED_FUNC_START(__memcpy) SYM_TYPED_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
"jmp memcpy_erms", X86_FEATURE_ERMS
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb rep movsb
RET RET
SYM_FUNC_END(__memcpy) SYM_FUNC_END(__memcpy)
...@@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy) ...@@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS(memcpy, __memcpy) SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy) EXPORT_SYMBOL(memcpy)
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig) SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax movq %rdi, %rax
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment