Commit 577e6a7f authored by Linus Torvalds's avatar Linus Torvalds

x86: inline the 'rep movs' in user copies for the FSRM case

This does the same thing for the user copies as commit 0db7058e
("x86/clear_user: Make it faster") did for clear_user().  In other
words, it inlines the "rep movs" case when X86_FEATURE_FSRM is set,
avoiding the function call entirely.

In order to do that, it makes the calling convention for the out-of-line
case ("copy_user_generic_unrolled") match the 'rep movs' calling
convention, although it does also end up clobbering a number of
additional registers.

Also, to simplify code sharing in the low-level assembly with the
__copy_user_nocache() function (that uses the normal C calling
convention), we end up with a kind of mixed return value for the
low-level asm code: it will return the result in both %rcx (to work as
an alternative for the 'rep movs' case), _and_ in %rax (for the nocache
case).

We could avoid this by wrapping __copy_user_nocache() callers in an
inline asm, but since the cost is just an extra register copy, it's
probably not worth it.
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3639a535
...@@ -18,29 +18,26 @@ ...@@ -18,29 +18,26 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */ /* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long __must_check unsigned long
copy_user_fast_string(void *to, const void *from, unsigned len);
__must_check unsigned long
copy_user_generic_unrolled(void *to, const void *from, unsigned len); copy_user_generic_unrolled(void *to, const void *from, unsigned len);
static __always_inline __must_check unsigned long static __always_inline __must_check unsigned long
copy_user_generic(void *to, const void *from, unsigned len) copy_user_generic(void *to, const void *from, unsigned long len)
{ {
unsigned ret;
stac(); stac();
/* /*
* If CPU has FSRM feature, use 'rep movs'. * If CPU has FSRM feature, use 'rep movs'.
* Otherwise, use copy_user_generic_unrolled. * Otherwise, use copy_user_generic_unrolled.
*/ */
alternative_call(copy_user_generic_unrolled, asm volatile(
copy_user_fast_string, "1:\n\t"
X86_FEATURE_FSRM, ALTERNATIVE("rep movsb",
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), "call copy_user_generic_unrolled", ALT_NOT(X86_FEATURE_FSRM))
"=d" (len)), "2:\n"
"1" (to), "2" (from), "3" (len) _ASM_EXTABLE_UA(1b, 2b)
: "memory", "rcx", "r8", "r9", "r10", "r11"); :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
: : "memory", "rax", "rdx", "r8", "r9", "r10", "r11");
clac(); clac();
return ret; return len;
} }
static __always_inline __must_check unsigned long static __always_inline __must_check unsigned long
......
...@@ -45,13 +45,29 @@ ...@@ -45,13 +45,29 @@
* Input: * Input:
* rdi destination * rdi destination
* rsi source * rsi source
* rdx count * rcx count
* *
* Output: * Output:
* eax uncopied bytes or 0 if successful. * rcx uncopied bytes or 0 if successful.
*
* NOTE! The calling convention is very intentionally the same as
* for 'rep movs', so that we can rewrite the function call with
* just a plain 'rep movs' on machines that have FSRM.
*
* HOWEVER! This function ends up having a lot of the code common
* with __copy_user_nocache(), which is a normal C function, and
* has a similar calling convention, but gets the 'count' in %rdx,
* and returns the result in %rax.
*
* To share as much code as possible, we end up returning the
* result in *both* %rcx/%rax, and we also move the initial count
* into %rdx.
*
* We can clobber rdx/rsi/rdi and r8-r11
*/ */
SYM_FUNC_START(copy_user_generic_unrolled) SYM_FUNC_START(copy_user_generic_unrolled)
cmpl $8,%edx movl %ecx,%edx
cmpl $8,%ecx
jb .Lcopy_user_short_string_bytes jb .Lcopy_user_short_string_bytes
ALIGN_DESTINATION ALIGN_DESTINATION
movl %edx,%ecx movl %edx,%ecx
...@@ -103,37 +119,6 @@ SYM_FUNC_START(copy_user_generic_unrolled) ...@@ -103,37 +119,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
SYM_FUNC_END(copy_user_generic_unrolled) SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled) EXPORT_SYMBOL(copy_user_generic_unrolled)
/*
* Some CPUs support FSRM for Fast Short REP MOVS.
*
* Only 4GB of copy is supported. This shouldn't be a problem
* because the kernel normally only writes from/to page sized chunks
* even if user space passed a longer buffer.
* And more would be dangerous because both Intel and AMD have
* errata with rep movsq > 4GB. If someone feels the need to fix
* this please consider this.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
SYM_FUNC_START(copy_user_fast_string)
movl %edx,%ecx
1: rep movsb
xorl %eax,%eax
RET
12: movl %ecx,%eax /* ecx is zerorest also */
RET
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_fast_string)
EXPORT_SYMBOL(copy_user_fast_string)
/* /*
* Try to copy last bytes and clear the rest if needed. * Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation, * Since protection fault in copy_from/to_user is not a normal situation,
...@@ -160,6 +145,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) ...@@ -160,6 +145,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
3: 3:
movl %edx,%eax movl %edx,%eax
movl %edx,%ecx
RET RET
_ASM_EXTABLE_CPY(1b, 2b) _ASM_EXTABLE_CPY(1b, 2b)
...@@ -203,6 +189,7 @@ SYM_CODE_START_LOCAL(copy_user_short_string) ...@@ -203,6 +189,7 @@ SYM_CODE_START_LOCAL(copy_user_short_string)
decl %ecx decl %ecx
jnz 21b jnz 21b
23: xor %eax,%eax 23: xor %eax,%eax
xor %ecx,%ecx
RET RET
40: leal (%rdx,%rcx,8),%edx 40: leal (%rdx,%rcx,8),%edx
......
...@@ -1286,7 +1286,6 @@ static const char *uaccess_safe_builtin[] = { ...@@ -1286,7 +1286,6 @@ static const char *uaccess_safe_builtin[] = {
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
"clear_user_original", "clear_user_original",
"copy_user_generic_unrolled", "copy_user_generic_unrolled",
"copy_user_fast_string",
"__copy_user_nocache", "__copy_user_nocache",
NULL NULL
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment