Commit 2f19e06a authored by Fenghua Yu's avatar Fenghua Yu Committed by H. Peter Anvin

x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB

Support memset() with enhanced rep stosb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb
overrides the fast string alternative memset_c and the original function.
Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.comSigned-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 057e05c1
...@@ -2,9 +2,13 @@ ...@@ -2,9 +2,13 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/* /*
* ISO C memset - set a memory block to a byte value. * ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
* *
* rdi destination * rdi destination
* rsi value (char) * rsi value (char)
...@@ -31,6 +35,28 @@ ...@@ -31,6 +35,28 @@
.Lmemset_e: .Lmemset_e:
.previous .previous
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
* The code is simpler and shorter than the fast string function as well.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENTRY(memset) ENTRY(memset)
ENTRY(__memset) ENTRY(__memset)
CFI_STARTPROC CFI_STARTPROC
...@@ -112,16 +138,20 @@ ENTRY(__memset) ...@@ -112,16 +138,20 @@ ENTRY(__memset)
ENDPROC(memset) ENDPROC(memset)
ENDPROC(__memset) ENDPROC(__memset)
/* Some CPUs run faster using the string instructions. /* Some CPUs support enhanced REP MOVSB/STOSB feature.
It is also a lot simpler. Use this when possible */ * It is recommended to use this when possible.
*
#include <asm/cpufeature.h> * If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a" .section .altinstructions,"a"
.align 8 altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.quad memset .Lfinal-memset,.Lmemset_e-.Lmemset_c
.quad .Lmemset_c altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.word X86_FEATURE_REP_GOOD .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
.byte .Lfinal - memset
.byte .Lmemset_e - .Lmemset_c
.previous .previous
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment