Commit 8691ccd7 authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Ingo Molnar

x86/asm/crypto: Create stack frames in crypto functions

The crypto code has several callable non-leaf functions which don't
honor CONFIG_FRAME_POINTER, which can result in bad stack traces.

Create stack frames for them when CONFIG_FRAME_POINTER is enabled.
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/6c20192bcf1102ae18ae5a242cabf30ce9b29895.1453405861.git.jpoimboe@redhat.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 68874ac3
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h> #include <asm/inst.h>
#include <asm/frame.h>
/* /*
* The following macros are used to move an (un)aligned 16 byte value to/from * The following macros are used to move an (un)aligned 16 byte value to/from
...@@ -1800,11 +1801,12 @@ ENDPROC(_key_expansion_256b) ...@@ -1800,11 +1801,12 @@ ENDPROC(_key_expansion_256b)
* unsigned int key_len) * unsigned int key_len)
*/ */
ENTRY(aesni_set_key) ENTRY(aesni_set_key)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl KEYP pushl KEYP
movl 8(%esp), KEYP # ctx movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
movl 12(%esp), UKEYP # in_key movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
movl 16(%esp), %edx # key_len movl (FRAME_OFFSET+16)(%esp), %edx # key_len
#endif #endif
movups (UKEYP), %xmm0 # user key (first 16 bytes) movups (UKEYP), %xmm0 # user key (first 16 bytes)
movaps %xmm0, (KEYP) movaps %xmm0, (KEYP)
...@@ -1905,6 +1907,7 @@ ENTRY(aesni_set_key) ...@@ -1905,6 +1907,7 @@ ENTRY(aesni_set_key)
#ifndef __x86_64__ #ifndef __x86_64__
popl KEYP popl KEYP
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_set_key) ENDPROC(aesni_set_key)
...@@ -1912,12 +1915,13 @@ ENDPROC(aesni_set_key) ...@@ -1912,12 +1915,13 @@ ENDPROC(aesni_set_key)
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/ */
ENTRY(aesni_enc) ENTRY(aesni_enc)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 12(%esp), KEYP movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
movl 16(%esp), OUTP movl (FRAME_OFFSET+16)(%esp), OUTP # dst
movl 20(%esp), INP movl (FRAME_OFFSET+20)(%esp), INP # src
#endif #endif
movl 480(KEYP), KLEN # key length movl 480(KEYP), KLEN # key length
movups (INP), STATE # input movups (INP), STATE # input
...@@ -1927,6 +1931,7 @@ ENTRY(aesni_enc) ...@@ -1927,6 +1931,7 @@ ENTRY(aesni_enc)
popl KLEN popl KLEN
popl KEYP popl KEYP
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_enc) ENDPROC(aesni_enc)
...@@ -2101,12 +2106,13 @@ ENDPROC(_aesni_enc4) ...@@ -2101,12 +2106,13 @@ ENDPROC(_aesni_enc4)
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/ */
ENTRY(aesni_dec) ENTRY(aesni_dec)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 12(%esp), KEYP movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
movl 16(%esp), OUTP movl (FRAME_OFFSET+16)(%esp), OUTP # dst
movl 20(%esp), INP movl (FRAME_OFFSET+20)(%esp), INP # src
#endif #endif
mov 480(KEYP), KLEN # key length mov 480(KEYP), KLEN # key length
add $240, KEYP add $240, KEYP
...@@ -2117,6 +2123,7 @@ ENTRY(aesni_dec) ...@@ -2117,6 +2123,7 @@ ENTRY(aesni_dec)
popl KLEN popl KLEN
popl KEYP popl KEYP
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_dec) ENDPROC(aesni_dec)
...@@ -2292,14 +2299,15 @@ ENDPROC(_aesni_dec4) ...@@ -2292,14 +2299,15 @@ ENDPROC(_aesni_dec4)
* size_t len) * size_t len)
*/ */
ENTRY(aesni_ecb_enc) ENTRY(aesni_ecb_enc)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl LEN pushl LEN
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 16(%esp), KEYP movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
movl 20(%esp), OUTP movl (FRAME_OFFSET+20)(%esp), OUTP # dst
movl 24(%esp), INP movl (FRAME_OFFSET+24)(%esp), INP # src
movl 28(%esp), LEN movl (FRAME_OFFSET+28)(%esp), LEN # len
#endif #endif
test LEN, LEN # check length test LEN, LEN # check length
jz .Lecb_enc_ret jz .Lecb_enc_ret
...@@ -2342,6 +2350,7 @@ ENTRY(aesni_ecb_enc) ...@@ -2342,6 +2350,7 @@ ENTRY(aesni_ecb_enc)
popl KEYP popl KEYP
popl LEN popl LEN
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_ecb_enc) ENDPROC(aesni_ecb_enc)
...@@ -2350,14 +2359,15 @@ ENDPROC(aesni_ecb_enc) ...@@ -2350,14 +2359,15 @@ ENDPROC(aesni_ecb_enc)
* size_t len); * size_t len);
*/ */
ENTRY(aesni_ecb_dec) ENTRY(aesni_ecb_dec)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl LEN pushl LEN
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 16(%esp), KEYP movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
movl 20(%esp), OUTP movl (FRAME_OFFSET+20)(%esp), OUTP # dst
movl 24(%esp), INP movl (FRAME_OFFSET+24)(%esp), INP # src
movl 28(%esp), LEN movl (FRAME_OFFSET+28)(%esp), LEN # len
#endif #endif
test LEN, LEN test LEN, LEN
jz .Lecb_dec_ret jz .Lecb_dec_ret
...@@ -2401,6 +2411,7 @@ ENTRY(aesni_ecb_dec) ...@@ -2401,6 +2411,7 @@ ENTRY(aesni_ecb_dec)
popl KEYP popl KEYP
popl LEN popl LEN
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_ecb_dec) ENDPROC(aesni_ecb_dec)
...@@ -2409,16 +2420,17 @@ ENDPROC(aesni_ecb_dec) ...@@ -2409,16 +2420,17 @@ ENDPROC(aesni_ecb_dec)
* size_t len, u8 *iv) * size_t len, u8 *iv)
*/ */
ENTRY(aesni_cbc_enc) ENTRY(aesni_cbc_enc)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl IVP pushl IVP
pushl LEN pushl LEN
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 20(%esp), KEYP movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl 24(%esp), OUTP movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl 28(%esp), INP movl (FRAME_OFFSET+28)(%esp), INP # src
movl 32(%esp), LEN movl (FRAME_OFFSET+32)(%esp), LEN # len
movl 36(%esp), IVP movl (FRAME_OFFSET+36)(%esp), IVP # iv
#endif #endif
cmp $16, LEN cmp $16, LEN
jb .Lcbc_enc_ret jb .Lcbc_enc_ret
...@@ -2443,6 +2455,7 @@ ENTRY(aesni_cbc_enc) ...@@ -2443,6 +2455,7 @@ ENTRY(aesni_cbc_enc)
popl LEN popl LEN
popl IVP popl IVP
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_cbc_enc) ENDPROC(aesni_cbc_enc)
...@@ -2451,16 +2464,17 @@ ENDPROC(aesni_cbc_enc) ...@@ -2451,16 +2464,17 @@ ENDPROC(aesni_cbc_enc)
* size_t len, u8 *iv) * size_t len, u8 *iv)
*/ */
ENTRY(aesni_cbc_dec) ENTRY(aesni_cbc_dec)
FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl IVP pushl IVP
pushl LEN pushl LEN
pushl KEYP pushl KEYP
pushl KLEN pushl KLEN
movl 20(%esp), KEYP movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl 24(%esp), OUTP movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl 28(%esp), INP movl (FRAME_OFFSET+28)(%esp), INP # src
movl 32(%esp), LEN movl (FRAME_OFFSET+32)(%esp), LEN # len
movl 36(%esp), IVP movl (FRAME_OFFSET+36)(%esp), IVP # iv
#endif #endif
cmp $16, LEN cmp $16, LEN
jb .Lcbc_dec_just_ret jb .Lcbc_dec_just_ret
...@@ -2534,6 +2548,7 @@ ENTRY(aesni_cbc_dec) ...@@ -2534,6 +2548,7 @@ ENTRY(aesni_cbc_dec)
popl LEN popl LEN
popl IVP popl IVP
#endif #endif
FRAME_END
ret ret
ENDPROC(aesni_cbc_dec) ENDPROC(aesni_cbc_dec)
...@@ -2600,6 +2615,7 @@ ENDPROC(_aesni_inc) ...@@ -2600,6 +2615,7 @@ ENDPROC(_aesni_inc)
* size_t len, u8 *iv) * size_t len, u8 *iv)
*/ */
ENTRY(aesni_ctr_enc) ENTRY(aesni_ctr_enc)
FRAME_BEGIN
cmp $16, LEN cmp $16, LEN
jb .Lctr_enc_just_ret jb .Lctr_enc_just_ret
mov 480(KEYP), KLEN mov 480(KEYP), KLEN
...@@ -2653,6 +2669,7 @@ ENTRY(aesni_ctr_enc) ...@@ -2653,6 +2669,7 @@ ENTRY(aesni_ctr_enc)
.Lctr_enc_ret: .Lctr_enc_ret:
movups IV, (IVP) movups IV, (IVP)
.Lctr_enc_just_ret: .Lctr_enc_just_ret:
FRAME_END
ret ret
ENDPROC(aesni_ctr_enc) ENDPROC(aesni_ctr_enc)
...@@ -2679,6 +2696,7 @@ ENDPROC(aesni_ctr_enc) ...@@ -2679,6 +2696,7 @@ ENDPROC(aesni_ctr_enc)
* bool enc, u8 *iv) * bool enc, u8 *iv)
*/ */
ENTRY(aesni_xts_crypt8) ENTRY(aesni_xts_crypt8)
FRAME_BEGIN
cmpb $0, %cl cmpb $0, %cl
movl $0, %ecx movl $0, %ecx
movl $240, %r10d movl $240, %r10d
...@@ -2779,6 +2797,7 @@ ENTRY(aesni_xts_crypt8) ...@@ -2779,6 +2797,7 @@ ENTRY(aesni_xts_crypt8)
pxor INC, STATE4 pxor INC, STATE4
movdqu STATE4, 0x70(OUTP) movdqu STATE4, 0x70(OUTP)
FRAME_END
ret ret
ENDPROC(aesni_xts_crypt8) ENDPROC(aesni_xts_crypt8)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_TABLE_BYTE_LEN 272
...@@ -726,6 +727,7 @@ __camellia_enc_blk16: ...@@ -726,6 +727,7 @@ __camellia_enc_blk16:
* %xmm0..%xmm15: 16 encrypted blocks, order swapped: * %xmm0..%xmm15: 16 encrypted blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/ */
FRAME_BEGIN
leaq 8 * 16(%rax), %rcx; leaq 8 * 16(%rax), %rcx;
...@@ -780,6 +782,7 @@ __camellia_enc_blk16: ...@@ -780,6 +782,7 @@ __camellia_enc_blk16:
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
FRAME_END
ret; ret;
.align 8 .align 8
...@@ -812,6 +815,7 @@ __camellia_dec_blk16: ...@@ -812,6 +815,7 @@ __camellia_dec_blk16:
* %xmm0..%xmm15: 16 plaintext blocks, order swapped: * %xmm0..%xmm15: 16 plaintext blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/ */
FRAME_BEGIN
leaq 8 * 16(%rax), %rcx; leaq 8 * 16(%rax), %rcx;
...@@ -865,6 +869,7 @@ __camellia_dec_blk16: ...@@ -865,6 +869,7 @@ __camellia_dec_blk16:
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
%xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
FRAME_END
ret; ret;
.align 8 .align 8
...@@ -890,6 +895,7 @@ ENTRY(camellia_ecb_enc_16way) ...@@ -890,6 +895,7 @@ ENTRY(camellia_ecb_enc_16way)
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
*/ */
FRAME_BEGIN
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
...@@ -904,6 +910,7 @@ ENTRY(camellia_ecb_enc_16way) ...@@ -904,6 +910,7 @@ ENTRY(camellia_ecb_enc_16way)
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi); %xmm8, %rsi);
FRAME_END
ret; ret;
ENDPROC(camellia_ecb_enc_16way) ENDPROC(camellia_ecb_enc_16way)
...@@ -913,6 +920,7 @@ ENTRY(camellia_ecb_dec_16way) ...@@ -913,6 +920,7 @@ ENTRY(camellia_ecb_dec_16way)
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
*/ */
FRAME_BEGIN
cmpl $16, key_length(CTX); cmpl $16, key_length(CTX);
movl $32, %r8d; movl $32, %r8d;
...@@ -932,6 +940,7 @@ ENTRY(camellia_ecb_dec_16way) ...@@ -932,6 +940,7 @@ ENTRY(camellia_ecb_dec_16way)
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi); %xmm8, %rsi);
FRAME_END
ret; ret;
ENDPROC(camellia_ecb_dec_16way) ENDPROC(camellia_ecb_dec_16way)
...@@ -941,6 +950,7 @@ ENTRY(camellia_cbc_dec_16way) ...@@ -941,6 +950,7 @@ ENTRY(camellia_cbc_dec_16way)
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
*/ */
FRAME_BEGIN
cmpl $16, key_length(CTX); cmpl $16, key_length(CTX);
movl $32, %r8d; movl $32, %r8d;
...@@ -981,6 +991,7 @@ ENTRY(camellia_cbc_dec_16way) ...@@ -981,6 +991,7 @@ ENTRY(camellia_cbc_dec_16way)
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi); %xmm8, %rsi);
FRAME_END
ret; ret;
ENDPROC(camellia_cbc_dec_16way) ENDPROC(camellia_cbc_dec_16way)
...@@ -997,6 +1008,7 @@ ENTRY(camellia_ctr_16way) ...@@ -997,6 +1008,7 @@ ENTRY(camellia_ctr_16way)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
subq $(16 * 16), %rsp; subq $(16 * 16), %rsp;
movq %rsp, %rax; movq %rsp, %rax;
...@@ -1092,6 +1104,7 @@ ENTRY(camellia_ctr_16way) ...@@ -1092,6 +1104,7 @@ ENTRY(camellia_ctr_16way)
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi); %xmm8, %rsi);
FRAME_END
ret; ret;
ENDPROC(camellia_ctr_16way) ENDPROC(camellia_ctr_16way)
...@@ -1112,6 +1125,7 @@ camellia_xts_crypt_16way: ...@@ -1112,6 +1125,7 @@ camellia_xts_crypt_16way:
* %r8: index for input whitening key * %r8: index for input whitening key
* %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16 * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
*/ */
FRAME_BEGIN
subq $(16 * 16), %rsp; subq $(16 * 16), %rsp;
movq %rsp, %rax; movq %rsp, %rax;
...@@ -1234,6 +1248,7 @@ camellia_xts_crypt_16way: ...@@ -1234,6 +1248,7 @@ camellia_xts_crypt_16way:
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi); %xmm8, %rsi);
FRAME_END
ret; ret;
ENDPROC(camellia_xts_crypt_16way) ENDPROC(camellia_xts_crypt_16way)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_TABLE_BYTE_LEN 272
...@@ -766,6 +767,7 @@ __camellia_enc_blk32: ...@@ -766,6 +767,7 @@ __camellia_enc_blk32:
* %ymm0..%ymm15: 32 encrypted blocks, order swapped: * %ymm0..%ymm15: 32 encrypted blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/ */
FRAME_BEGIN
leaq 8 * 32(%rax), %rcx; leaq 8 * 32(%rax), %rcx;
...@@ -820,6 +822,7 @@ __camellia_enc_blk32: ...@@ -820,6 +822,7 @@ __camellia_enc_blk32:
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
%ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
FRAME_END
ret; ret;
.align 8 .align 8
...@@ -852,6 +855,7 @@ __camellia_dec_blk32: ...@@ -852,6 +855,7 @@ __camellia_dec_blk32:
* %ymm0..%ymm15: 16 plaintext blocks, order swapped: * %ymm0..%ymm15: 16 plaintext blocks, order swapped:
* 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
*/ */
FRAME_BEGIN
leaq 8 * 32(%rax), %rcx; leaq 8 * 32(%rax), %rcx;
...@@ -905,6 +909,7 @@ __camellia_dec_blk32: ...@@ -905,6 +909,7 @@ __camellia_dec_blk32:
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
%ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
FRAME_END
ret; ret;
.align 8 .align 8
...@@ -930,6 +935,7 @@ ENTRY(camellia_ecb_enc_32way) ...@@ -930,6 +935,7 @@ ENTRY(camellia_ecb_enc_32way)
* %rsi: dst (32 blocks) * %rsi: dst (32 blocks)
* %rdx: src (32 blocks) * %rdx: src (32 blocks)
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -948,6 +954,7 @@ ENTRY(camellia_ecb_enc_32way) ...@@ -948,6 +954,7 @@ ENTRY(camellia_ecb_enc_32way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(camellia_ecb_enc_32way) ENDPROC(camellia_ecb_enc_32way)
...@@ -957,6 +964,7 @@ ENTRY(camellia_ecb_dec_32way) ...@@ -957,6 +964,7 @@ ENTRY(camellia_ecb_dec_32way)
* %rsi: dst (32 blocks) * %rsi: dst (32 blocks)
* %rdx: src (32 blocks) * %rdx: src (32 blocks)
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -980,6 +988,7 @@ ENTRY(camellia_ecb_dec_32way) ...@@ -980,6 +988,7 @@ ENTRY(camellia_ecb_dec_32way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(camellia_ecb_dec_32way) ENDPROC(camellia_ecb_dec_32way)
...@@ -989,6 +998,7 @@ ENTRY(camellia_cbc_dec_32way) ...@@ -989,6 +998,7 @@ ENTRY(camellia_cbc_dec_32way)
* %rsi: dst (32 blocks) * %rsi: dst (32 blocks)
* %rdx: src (32 blocks) * %rdx: src (32 blocks)
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -1046,6 +1056,7 @@ ENTRY(camellia_cbc_dec_32way) ...@@ -1046,6 +1056,7 @@ ENTRY(camellia_cbc_dec_32way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(camellia_cbc_dec_32way) ENDPROC(camellia_cbc_dec_32way)
...@@ -1070,6 +1081,7 @@ ENTRY(camellia_ctr_32way) ...@@ -1070,6 +1081,7 @@ ENTRY(camellia_ctr_32way)
* %rdx: src (32 blocks) * %rdx: src (32 blocks)
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -1184,6 +1196,7 @@ ENTRY(camellia_ctr_32way) ...@@ -1184,6 +1196,7 @@ ENTRY(camellia_ctr_32way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(camellia_ctr_32way) ENDPROC(camellia_ctr_32way)
...@@ -1216,6 +1229,7 @@ camellia_xts_crypt_32way: ...@@ -1216,6 +1229,7 @@ camellia_xts_crypt_32way:
* %r8: index for input whitening key * %r8: index for input whitening key
* %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -1349,6 +1363,7 @@ camellia_xts_crypt_32way: ...@@ -1349,6 +1363,7 @@ camellia_xts_crypt_32way:
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(camellia_xts_crypt_32way) ENDPROC(camellia_xts_crypt_32way)
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
.file "cast5-avx-x86_64-asm_64.S" .file "cast5-avx-x86_64-asm_64.S"
...@@ -365,6 +366,7 @@ ENTRY(cast5_ecb_enc_16way) ...@@ -365,6 +366,7 @@ ENTRY(cast5_ecb_enc_16way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -388,6 +390,7 @@ ENTRY(cast5_ecb_enc_16way) ...@@ -388,6 +390,7 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11); vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11);
FRAME_END
ret; ret;
ENDPROC(cast5_ecb_enc_16way) ENDPROC(cast5_ecb_enc_16way)
...@@ -398,6 +401,7 @@ ENTRY(cast5_ecb_dec_16way) ...@@ -398,6 +401,7 @@ ENTRY(cast5_ecb_dec_16way)
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1; vmovdqu (0*4*4)(%rdx), RL1;
...@@ -420,6 +424,7 @@ ENTRY(cast5_ecb_dec_16way) ...@@ -420,6 +424,7 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11); vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11);
FRAME_END
ret; ret;
ENDPROC(cast5_ecb_dec_16way) ENDPROC(cast5_ecb_dec_16way)
...@@ -429,6 +434,7 @@ ENTRY(cast5_cbc_dec_16way) ...@@ -429,6 +434,7 @@ ENTRY(cast5_cbc_dec_16way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -469,6 +475,7 @@ ENTRY(cast5_cbc_dec_16way) ...@@ -469,6 +475,7 @@ ENTRY(cast5_cbc_dec_16way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(cast5_cbc_dec_16way) ENDPROC(cast5_cbc_dec_16way)
...@@ -479,6 +486,7 @@ ENTRY(cast5_ctr_16way) ...@@ -479,6 +486,7 @@ ENTRY(cast5_ctr_16way)
* %rdx: src * %rdx: src
* %rcx: iv (big endian, 64bit) * %rcx: iv (big endian, 64bit)
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -542,5 +550,6 @@ ENTRY(cast5_ctr_16way) ...@@ -542,5 +550,6 @@ ENTRY(cast5_ctr_16way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(cast5_ctr_16way) ENDPROC(cast5_ctr_16way)
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx.S" #include "glue_helper-asm-avx.S"
.file "cast6-avx-x86_64-asm_64.S" .file "cast6-avx-x86_64-asm_64.S"
...@@ -349,6 +350,7 @@ ENTRY(cast6_ecb_enc_8way) ...@@ -349,6 +350,7 @@ ENTRY(cast6_ecb_enc_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -358,6 +360,7 @@ ENTRY(cast6_ecb_enc_8way) ...@@ -358,6 +360,7 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(cast6_ecb_enc_8way) ENDPROC(cast6_ecb_enc_8way)
...@@ -367,6 +370,7 @@ ENTRY(cast6_ecb_dec_8way) ...@@ -367,6 +370,7 @@ ENTRY(cast6_ecb_dec_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -376,6 +380,7 @@ ENTRY(cast6_ecb_dec_8way) ...@@ -376,6 +380,7 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(cast6_ecb_dec_8way) ENDPROC(cast6_ecb_dec_8way)
...@@ -385,6 +390,7 @@ ENTRY(cast6_cbc_dec_8way) ...@@ -385,6 +390,7 @@ ENTRY(cast6_cbc_dec_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -399,6 +405,7 @@ ENTRY(cast6_cbc_dec_8way) ...@@ -399,6 +405,7 @@ ENTRY(cast6_cbc_dec_8way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(cast6_cbc_dec_8way) ENDPROC(cast6_cbc_dec_8way)
...@@ -409,6 +416,7 @@ ENTRY(cast6_ctr_8way) ...@@ -409,6 +416,7 @@ ENTRY(cast6_ctr_8way)
* %rdx: src * %rdx: src
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -424,6 +432,7 @@ ENTRY(cast6_ctr_8way) ...@@ -424,6 +432,7 @@ ENTRY(cast6_ctr_8way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(cast6_ctr_8way) ENDPROC(cast6_ctr_8way)
...@@ -434,6 +443,7 @@ ENTRY(cast6_xts_enc_8way) ...@@ -434,6 +443,7 @@ ENTRY(cast6_xts_enc_8way)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -446,6 +456,7 @@ ENTRY(cast6_xts_enc_8way) ...@@ -446,6 +456,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(cast6_xts_enc_8way) ENDPROC(cast6_xts_enc_8way)
...@@ -456,6 +467,7 @@ ENTRY(cast6_xts_dec_8way) ...@@ -456,6 +467,7 @@ ENTRY(cast6_xts_dec_8way)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -468,5 +480,6 @@ ENTRY(cast6_xts_dec_8way) ...@@ -468,5 +480,6 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(cast6_xts_dec_8way) ENDPROC(cast6_xts_dec_8way)
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h> #include <asm/inst.h>
#include <asm/frame.h>
.data .data
...@@ -94,6 +95,7 @@ ENDPROC(__clmul_gf128mul_ble) ...@@ -94,6 +95,7 @@ ENDPROC(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */ /* void clmul_ghash_mul(char *dst, const u128 *shash) */
ENTRY(clmul_ghash_mul) ENTRY(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA movups (%rdi), DATA
movups (%rsi), SHASH movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP movaps .Lbswap_mask, BSWAP
...@@ -101,6 +103,7 @@ ENTRY(clmul_ghash_mul) ...@@ -101,6 +103,7 @@ ENTRY(clmul_ghash_mul)
call __clmul_gf128mul_ble call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi) movups DATA, (%rdi)
FRAME_END
ret ret
ENDPROC(clmul_ghash_mul) ENDPROC(clmul_ghash_mul)
...@@ -109,6 +112,7 @@ ENDPROC(clmul_ghash_mul) ...@@ -109,6 +112,7 @@ ENDPROC(clmul_ghash_mul)
* const u128 *shash); * const u128 *shash);
*/ */
ENTRY(clmul_ghash_update) ENTRY(clmul_ghash_update)
FRAME_BEGIN
cmp $16, %rdx cmp $16, %rdx
jb .Lupdate_just_ret # check length jb .Lupdate_just_ret # check length
movaps .Lbswap_mask, BSWAP movaps .Lbswap_mask, BSWAP
...@@ -128,5 +132,6 @@ ENTRY(clmul_ghash_update) ...@@ -128,5 +132,6 @@ ENTRY(clmul_ghash_update)
PSHUFB_XMM BSWAP DATA PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi) movups DATA, (%rdi)
.Lupdate_just_ret: .Lupdate_just_ret:
FRAME_END
ret ret
ENDPROC(clmul_ghash_update) ENDPROC(clmul_ghash_update)
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx.S" #include "glue_helper-asm-avx.S"
.file "serpent-avx-x86_64-asm_64.S" .file "serpent-avx-x86_64-asm_64.S"
...@@ -681,6 +682,7 @@ ENTRY(serpent_ecb_enc_8way_avx) ...@@ -681,6 +682,7 @@ ENTRY(serpent_ecb_enc_8way_avx)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -688,6 +690,7 @@ ENTRY(serpent_ecb_enc_8way_avx) ...@@ -688,6 +690,7 @@ ENTRY(serpent_ecb_enc_8way_avx)
store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(serpent_ecb_enc_8way_avx) ENDPROC(serpent_ecb_enc_8way_avx)
...@@ -697,6 +700,7 @@ ENTRY(serpent_ecb_dec_8way_avx) ...@@ -697,6 +700,7 @@ ENTRY(serpent_ecb_dec_8way_avx)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_8way_avx) ...@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_8way_avx)
store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
FRAME_END
ret; ret;
ENDPROC(serpent_ecb_dec_8way_avx) ENDPROC(serpent_ecb_dec_8way_avx)
...@@ -713,6 +718,7 @@ ENTRY(serpent_cbc_dec_8way_avx) ...@@ -713,6 +718,7 @@ ENTRY(serpent_cbc_dec_8way_avx)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -720,6 +726,7 @@ ENTRY(serpent_cbc_dec_8way_avx) ...@@ -720,6 +726,7 @@ ENTRY(serpent_cbc_dec_8way_avx)
store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
FRAME_END
ret; ret;
ENDPROC(serpent_cbc_dec_8way_avx) ENDPROC(serpent_cbc_dec_8way_avx)
...@@ -730,6 +737,7 @@ ENTRY(serpent_ctr_8way_avx) ...@@ -730,6 +737,7 @@ ENTRY(serpent_ctr_8way_avx)
* %rdx: src * %rdx: src
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
RD2, RK0, RK1, RK2); RD2, RK0, RK1, RK2);
...@@ -738,6 +746,7 @@ ENTRY(serpent_ctr_8way_avx) ...@@ -738,6 +746,7 @@ ENTRY(serpent_ctr_8way_avx)
store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(serpent_ctr_8way_avx) ENDPROC(serpent_ctr_8way_avx)
...@@ -748,6 +757,7 @@ ENTRY(serpent_xts_enc_8way_avx) ...@@ -748,6 +757,7 @@ ENTRY(serpent_xts_enc_8way_avx)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
...@@ -758,6 +768,7 @@ ENTRY(serpent_xts_enc_8way_avx) ...@@ -758,6 +768,7 @@ ENTRY(serpent_xts_enc_8way_avx)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(serpent_xts_enc_8way_avx) ENDPROC(serpent_xts_enc_8way_avx)
...@@ -768,6 +779,7 @@ ENTRY(serpent_xts_dec_8way_avx) ...@@ -768,6 +779,7 @@ ENTRY(serpent_xts_dec_8way_avx)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
...@@ -778,5 +790,6 @@ ENTRY(serpent_xts_dec_8way_avx) ...@@ -778,5 +790,6 @@ ENTRY(serpent_xts_dec_8way_avx)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
FRAME_END
ret; ret;
ENDPROC(serpent_xts_dec_8way_avx) ENDPROC(serpent_xts_dec_8way_avx)
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx2.S" #include "glue_helper-asm-avx2.S"
.file "serpent-avx2-asm_64.S" .file "serpent-avx2-asm_64.S"
...@@ -673,6 +674,7 @@ ENTRY(serpent_ecb_enc_16way) ...@@ -673,6 +674,7 @@ ENTRY(serpent_ecb_enc_16way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -684,6 +686,7 @@ ENTRY(serpent_ecb_enc_16way) ...@@ -684,6 +686,7 @@ ENTRY(serpent_ecb_enc_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_ecb_enc_16way) ENDPROC(serpent_ecb_enc_16way)
...@@ -693,6 +696,7 @@ ENTRY(serpent_ecb_dec_16way) ...@@ -693,6 +696,7 @@ ENTRY(serpent_ecb_dec_16way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_16way) ...@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_ecb_dec_16way) ENDPROC(serpent_ecb_dec_16way)
...@@ -713,6 +718,7 @@ ENTRY(serpent_cbc_dec_16way) ...@@ -713,6 +718,7 @@ ENTRY(serpent_cbc_dec_16way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -725,6 +731,7 @@ ENTRY(serpent_cbc_dec_16way) ...@@ -725,6 +731,7 @@ ENTRY(serpent_cbc_dec_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_cbc_dec_16way) ENDPROC(serpent_cbc_dec_16way)
...@@ -735,6 +742,7 @@ ENTRY(serpent_ctr_16way) ...@@ -735,6 +742,7 @@ ENTRY(serpent_ctr_16way)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -748,6 +756,7 @@ ENTRY(serpent_ctr_16way) ...@@ -748,6 +756,7 @@ ENTRY(serpent_ctr_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_ctr_16way) ENDPROC(serpent_ctr_16way)
...@@ -758,6 +767,7 @@ ENTRY(serpent_xts_enc_16way) ...@@ -758,6 +767,7 @@ ENTRY(serpent_xts_enc_16way)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -772,6 +782,7 @@ ENTRY(serpent_xts_enc_16way) ...@@ -772,6 +782,7 @@ ENTRY(serpent_xts_enc_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_xts_enc_16way) ENDPROC(serpent_xts_enc_16way)
...@@ -782,6 +793,7 @@ ENTRY(serpent_xts_dec_16way) ...@@ -782,6 +793,7 @@ ENTRY(serpent_xts_dec_16way)
* %rdx: src (16 blocks) * %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
vzeroupper; vzeroupper;
...@@ -796,5 +808,6 @@ ENTRY(serpent_xts_dec_16way) ...@@ -796,5 +808,6 @@ ENTRY(serpent_xts_dec_16way)
vzeroupper; vzeroupper;
FRAME_END
ret; ret;
ENDPROC(serpent_xts_dec_16way) ENDPROC(serpent_xts_dec_16way)
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "sha1_mb_mgr_datastruct.S" #include "sha1_mb_mgr_datastruct.S"
...@@ -103,6 +104,7 @@ offset = \_offset ...@@ -103,6 +104,7 @@ offset = \_offset
# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
# arg 1 : rcx : state # arg 1 : rcx : state
ENTRY(sha1_mb_mgr_flush_avx2) ENTRY(sha1_mb_mgr_flush_avx2)
FRAME_BEGIN
push %rbx push %rbx
# If bit (32+3) is set, then all lanes are empty # If bit (32+3) is set, then all lanes are empty
...@@ -212,6 +214,7 @@ len_is_0: ...@@ -212,6 +214,7 @@ len_is_0:
return: return:
pop %rbx pop %rbx
FRAME_END
ret ret
return_null: return_null:
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "sha1_mb_mgr_datastruct.S" #include "sha1_mb_mgr_datastruct.S"
...@@ -98,6 +99,7 @@ lane_data = %r10 ...@@ -98,6 +99,7 @@ lane_data = %r10
# arg 1 : rcx : state # arg 1 : rcx : state
# arg 2 : rdx : job # arg 2 : rdx : job
ENTRY(sha1_mb_mgr_submit_avx2) ENTRY(sha1_mb_mgr_submit_avx2)
FRAME_BEGIN
push %rbx push %rbx
push %r12 push %r12
...@@ -192,6 +194,7 @@ len_is_0: ...@@ -192,6 +194,7 @@ len_is_0:
return: return:
pop %r12 pop %r12
pop %rbx pop %rbx
FRAME_END
ret ret
return_null: return_null:
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx.S" #include "glue_helper-asm-avx.S"
.file "twofish-avx-x86_64-asm_64.S" .file "twofish-avx-x86_64-asm_64.S"
...@@ -333,6 +334,7 @@ ENTRY(twofish_ecb_enc_8way) ...@@ -333,6 +334,7 @@ ENTRY(twofish_ecb_enc_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -342,6 +344,7 @@ ENTRY(twofish_ecb_enc_8way) ...@@ -342,6 +344,7 @@ ENTRY(twofish_ecb_enc_8way)
store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
FRAME_END
ret; ret;
ENDPROC(twofish_ecb_enc_8way) ENDPROC(twofish_ecb_enc_8way)
...@@ -351,6 +354,7 @@ ENTRY(twofish_ecb_dec_8way) ...@@ -351,6 +354,7 @@ ENTRY(twofish_ecb_dec_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -360,6 +364,7 @@ ENTRY(twofish_ecb_dec_8way) ...@@ -360,6 +364,7 @@ ENTRY(twofish_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(twofish_ecb_dec_8way) ENDPROC(twofish_ecb_dec_8way)
...@@ -369,6 +374,7 @@ ENTRY(twofish_cbc_dec_8way) ...@@ -369,6 +374,7 @@ ENTRY(twofish_cbc_dec_8way)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -383,6 +389,7 @@ ENTRY(twofish_cbc_dec_8way) ...@@ -383,6 +389,7 @@ ENTRY(twofish_cbc_dec_8way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(twofish_cbc_dec_8way) ENDPROC(twofish_cbc_dec_8way)
...@@ -393,6 +400,7 @@ ENTRY(twofish_ctr_8way) ...@@ -393,6 +400,7 @@ ENTRY(twofish_ctr_8way)
* %rdx: src * %rdx: src
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN
pushq %r12; pushq %r12;
...@@ -408,6 +416,7 @@ ENTRY(twofish_ctr_8way) ...@@ -408,6 +416,7 @@ ENTRY(twofish_ctr_8way)
popq %r12; popq %r12;
FRAME_END
ret; ret;
ENDPROC(twofish_ctr_8way) ENDPROC(twofish_ctr_8way)
...@@ -418,6 +427,7 @@ ENTRY(twofish_xts_enc_8way) ...@@ -418,6 +427,7 @@ ENTRY(twofish_xts_enc_8way)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -430,6 +440,7 @@ ENTRY(twofish_xts_enc_8way) ...@@ -430,6 +440,7 @@ ENTRY(twofish_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
FRAME_END
ret; ret;
ENDPROC(twofish_xts_enc_8way) ENDPROC(twofish_xts_enc_8way)
...@@ -440,6 +451,7 @@ ENTRY(twofish_xts_dec_8way) ...@@ -440,6 +451,7 @@ ENTRY(twofish_xts_dec_8way)
* %rdx: src * %rdx: src
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN
movq %rsi, %r11; movq %rsi, %r11;
...@@ -452,5 +464,6 @@ ENTRY(twofish_xts_dec_8way) ...@@ -452,5 +464,6 @@ ENTRY(twofish_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
FRAME_END
ret; ret;
ENDPROC(twofish_xts_dec_8way) ENDPROC(twofish_xts_dec_8way)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment