Commit daf52375 authored by Al Viro's avatar Al Viro

amd64: switch csum_partial_copy_generic() to new calling conventions

... and fold handling of misaligned case into it.

Implementation note: we stash the "will we need to rol8 the sum in the end"
flag into the MSB of %rcx (the lower 32 bits are used for length); the rest
is pretty straightforward.
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent fdf8bee9
...@@ -130,10 +130,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, ...@@ -130,10 +130,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
extern __wsum csum_partial(const void *buff, int len, __wsum sum); extern __wsum csum_partial(const void *buff, int len, __wsum sum);
/* Do not call this directly. Use the wrappers below */ /* Do not call this directly. Use the wrappers below */
extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst, extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
int len, __wsum sum,
int *src_err_ptr, int *dst_err_ptr);
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len); extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len); extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
......
...@@ -18,9 +18,6 @@ ...@@ -18,9 +18,6 @@
* rdi source * rdi source
* rsi destination * rsi destination
* edx len (32bit) * edx len (32bit)
* ecx sum (32bit)
* r8 src_err_ptr (int)
* r9 dst_err_ptr (int)
* *
* Output * Output
* eax 64bit sum. undefined in case of exception. * eax 64bit sum. undefined in case of exception.
...@@ -31,44 +28,32 @@ ...@@ -31,44 +28,32 @@
.macro source .macro source
10: 10:
_ASM_EXTABLE_UA(10b, .Lbad_source) _ASM_EXTABLE_UA(10b, .Lfault)
.endm .endm
.macro dest .macro dest
20: 20:
_ASM_EXTABLE_UA(20b, .Lbad_dest) _ASM_EXTABLE_UA(20b, .Lfault)
.endm .endm
/*
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
* potentially unmapped kernel address.
*/
.macro ignore L=.Lignore
30:
_ASM_EXTABLE(30b, \L)
.endm
SYM_FUNC_START(csum_partial_copy_generic) SYM_FUNC_START(csum_partial_copy_generic)
cmpl $3*64, %edx subq $5*8, %rsp
jle .Lignore movq %rbx, 0*8(%rsp)
movq %r12, 1*8(%rsp)
.Lignore: movq %r14, 2*8(%rsp)
subq $7*8, %rsp movq %r13, 3*8(%rsp)
movq %rbx, 2*8(%rsp) movq %r15, 4*8(%rsp)
movq %r12, 3*8(%rsp)
movq %r14, 4*8(%rsp)
movq %r13, 5*8(%rsp)
movq %r15, 6*8(%rsp)
movq %r8, (%rsp) movl $-1, %eax
movq %r9, 1*8(%rsp) xorl %r9d, %r9d
movl %ecx, %eax
movl %edx, %ecx movl %edx, %ecx
cmpl $8, %ecx
jb .Lshort
xorl %r9d, %r9d testb $7, %sil
movq %rcx, %r12 jne .Lunaligned
.Laligned:
movl %ecx, %r12d
shrq $6, %r12 shrq $6, %r12
jz .Lhandle_tail /* < 64 */ jz .Lhandle_tail /* < 64 */
...@@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic) ...@@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic)
source source
movq 56(%rdi), %r13 movq 56(%rdi), %r13
ignore 2f 30:
/*
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
* potentially unmapped kernel address.
*/
_ASM_EXTABLE(30b, 2f)
prefetcht0 5*64(%rdi) prefetcht0 5*64(%rdi)
2: 2:
adcq %rbx, %rax adcq %rbx, %rax
...@@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic) ...@@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic)
dest dest
movq %r13, 56(%rsi) movq %r13, 56(%rsi)
3:
leaq 64(%rdi), %rdi leaq 64(%rdi), %rdi
leaq 64(%rsi), %rsi leaq 64(%rsi), %rsi
...@@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic) ...@@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic)
/* do last up to 56 bytes */ /* do last up to 56 bytes */
.Lhandle_tail: .Lhandle_tail:
/* ecx: count */ /* ecx: count, rcx.63: the end result needs to be rol8 */
movl %ecx, %r10d movq %rcx, %r10
andl $63, %ecx andl $63, %ecx
shrl $3, %ecx shrl $3, %ecx
jz .Lfold jz .Lfold
...@@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic) ...@@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
.Lhandle_7: .Lhandle_7:
movl %r10d, %ecx movl %r10d, %ecx
andl $7, %ecx andl $7, %ecx
.L1: /* .Lshort rejoins the common path here */
shrl $1, %ecx shrl $1, %ecx
jz .Lhandle_1 jz .Lhandle_1
movl $2, %edx movl $2, %edx
...@@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic) ...@@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic)
adcl %r9d, %eax /* carry */ adcl %r9d, %eax /* carry */
.Lende: .Lende:
movq 2*8(%rsp), %rbx testq %r10, %r10
movq 3*8(%rsp), %r12 js .Lwas_odd
movq 4*8(%rsp), %r14 .Lout:
movq 5*8(%rsp), %r13 movq 0*8(%rsp), %rbx
movq 6*8(%rsp), %r15 movq 1*8(%rsp), %r12
addq $7*8, %rsp movq 2*8(%rsp), %r14
movq 3*8(%rsp), %r13
movq 4*8(%rsp), %r15
addq $5*8, %rsp
ret ret
.Lshort:
movl %ecx, %r10d
jmp .L1
.Lunaligned:
xorl %ebx, %ebx
testb $1, %sil
jne .Lodd
1: testb $2, %sil
je 2f
source
movw (%rdi), %bx
dest
movw %bx, (%rsi)
leaq 2(%rdi), %rdi
subq $2, %rcx
leaq 2(%rsi), %rsi
addq %rbx, %rax
2: testb $4, %sil
je .Laligned
source
movl (%rdi), %ebx
dest
movl %ebx, (%rsi)
leaq 4(%rdi), %rdi
subq $4, %rcx
leaq 4(%rsi), %rsi
addq %rbx, %rax
jmp .Laligned
.Lodd:
source
movb (%rdi), %bl
dest
movb %bl, (%rsi)
leaq 1(%rdi), %rdi
leaq 1(%rsi), %rsi
/* decrement, set MSB */
leaq -1(%rcx, %rcx), %rcx
rorq $1, %rcx
shll $8, %ebx
addq %rbx, %rax
jmp 1b
.Lwas_odd:
roll $8, %eax
jmp .Lout
/* Exception handlers. Very simple, zeroing is done in the wrappers */ /* Exception: just return 0 */
.Lbad_source: .Lfault:
movq (%rsp), %rax xorl %eax, %eax
testq %rax, %rax jmp .Lout
jz .Lende
movl $-EFAULT, (%rax)
jmp .Lende
.Lbad_dest:
movq 8(%rsp), %rax
testq %rax, %rax
jz .Lende
movl $-EFAULT, (%rax)
jmp .Lende
SYM_FUNC_END(csum_partial_copy_generic) SYM_FUNC_END(csum_partial_copy_generic)
...@@ -21,49 +21,16 @@ ...@@ -21,49 +21,16 @@
* src and dst are best aligned to 64bits. * src and dst are best aligned to 64bits.
*/ */
__wsum __wsum
csum_and_copy_from_user(const void __user *src, void *dst, csum_and_copy_from_user(const void __user *src, void *dst, int len)
int len)
{ {
int err = 0; __wsum sum;
__wsum isum = ~0U;
might_sleep(); might_sleep();
if (!user_access_begin(src, len)) if (!user_access_begin(src, len))
return 0; return 0;
sum = csum_partial_copy_generic((__force const void *)src, dst, len);
/*
* Why 6, not 7? To handle odd addresses aligned we
* would need to do considerable complications to fix the
* checksum which is defined as an 16bit accumulator. The
* fix alignment code is primarily for performance
* compatibility with 32bit and that will handle odd
* addresses slowly too.
*/
if (unlikely((unsigned long)src & 6)) {
while (((unsigned long)src & 6) && len >= 2) {
__u16 val16;
unsafe_get_user(val16, (const __u16 __user *)src, out);
*(__u16 *)dst = val16;
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
src += 2;
dst += 2;
len -= 2;
}
}
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, &err, NULL);
user_access_end();
if (unlikely(err))
isum = 0;
return isum;
out:
user_access_end(); user_access_end();
return 0; return sum;
} }
EXPORT_SYMBOL(csum_and_copy_from_user); EXPORT_SYMBOL(csum_and_copy_from_user);
...@@ -79,37 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user); ...@@ -79,37 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
* src and dst are best aligned to 64bits. * src and dst are best aligned to 64bits.
*/ */
__wsum __wsum
csum_and_copy_to_user(const void *src, void __user *dst, csum_and_copy_to_user(const void *src, void __user *dst, int len)
int len)
{ {
__wsum ret, isum = ~0U; __wsum sum;
int err = 0;
might_sleep(); might_sleep();
if (!user_access_begin(dst, len)) if (!user_access_begin(dst, len))
return 0; return 0;
sum = csum_partial_copy_generic(src, (void __force *)dst, len);
if (unlikely((unsigned long)dst & 6)) {
while (((unsigned long)dst & 6) && len >= 2) {
__u16 val16 = *(__u16 *)src;
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
unsafe_put_user(val16, (__u16 __user *)dst, out);
src += 2;
dst += 2;
len -= 2;
}
}
ret = csum_partial_copy_generic(src, (void __force *)dst,
len, isum, NULL, &err);
user_access_end();
return err ? 0 : ret;
out:
user_access_end(); user_access_end();
return 0; return sum;
} }
EXPORT_SYMBOL(csum_and_copy_to_user); EXPORT_SYMBOL(csum_and_copy_to_user);
...@@ -125,7 +71,7 @@ EXPORT_SYMBOL(csum_and_copy_to_user); ...@@ -125,7 +71,7 @@ EXPORT_SYMBOL(csum_and_copy_to_user);
__wsum __wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len) csum_partial_copy_nocheck(const void *src, void *dst, int len)
{ {
return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL); return csum_partial_copy_generic(src, dst, len);
} }
EXPORT_SYMBOL(csum_partial_copy_nocheck); EXPORT_SYMBOL(csum_partial_copy_nocheck);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment