Commit 9d8e2277 authored by Jan Beulich's avatar Jan Beulich Committed by Ingo Molnar

x86-64: Handle byte-wise tail copying in memcpy() without a loop

While hard to measure, reducing the number of possibly/likely
mis-predicted branches can generally be expected to be slightly
better.

Other than apparent at the first glance, this also doesn't grow
the function size (the alignment gap to the next function just
gets smaller).
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F218584020000780006F422@nat28.tlf.novell.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 2ab56091
...@@ -164,18 +164,19 @@ ENTRY(memcpy) ...@@ -164,18 +164,19 @@ ENTRY(memcpy)
retq retq
.p2align 4 .p2align 4
.Lless_3bytes: .Lless_3bytes:
cmpl $0, %edx subl $1, %edx
je .Lend jb .Lend
/* /*
* Move data from 1 bytes to 3 bytes. * Move data from 1 bytes to 3 bytes.
*/ */
.Lloop_1: movzbl (%rsi), %ecx
movb (%rsi), %r8b jz .Lstore_1byte
movb %r8b, (%rdi) movzbq 1(%rsi), %r8
incq %rdi movzbq (%rsi, %rdx), %r9
incq %rsi movb %r8b, 1(%rdi)
decl %edx movb %r9b, (%rdi, %rdx)
jnz .Lloop_1 .Lstore_1byte:
movb %cl, (%rdi)
.Lend: .Lend:
retq retq
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment