Commit 7a6d0071 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
 - Fix compiler warnings in inside-secure
 - Fix LS1021A support in caam
 - Avoid using RBP in x86 crypto code
 - Fix bug in talitos that prevents hashing with algif
 - Fix bugs talitos hashing code that cause incorrect hash result
 - Fix memory freeing path bug in drbg
 - Fix af_alg crash when two SG lists are chained

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: af_alg - update correct dst SGL entry
  crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel
  crypto: inside-secure - fix gcc-4.9 warnings
  crypto: talitos - Don't provide setkey for non hmac hashing algs
  crypto: talitos - fix hashing
  crypto: talitos - fix sha224
  crypto: x86/twofish - Fix RBP usage
  crypto: sha512-avx2 - Fix RBP usage
  crypto: x86/sha256-ssse3 - Fix RBP usage
  crypto: x86/sha256-avx2 - Fix RBP usage
  crypto: x86/sha256-avx - Fix RBP usage
  crypto: x86/sha1-ssse3 - Fix RBP usage
  crypto: x86/sha1-avx2 - Fix RBP usage
  crypto: x86/des3_ede - Fix RBP usage
  crypto: x86/cast6 - Fix RBP usage
  crypto: x86/cast5 - Fix RBP usage
  crypto: x86/camellia - Fix RBP usage
  crypto: x86/blowfish - Fix RBP usage
  crypto: drbg - fix freeing of resources
parents 6e80ecdd e117765a
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#define s3 ((16 + 2 + (3 * 256)) * 4) #define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */ /* register macros */
#define CTX %rdi #define CTX %r12
#define RIO %rsi #define RIO %rsi
#define RX0 %rax #define RX0 %rax
...@@ -56,12 +56,12 @@ ...@@ -56,12 +56,12 @@
#define RX2bh %ch #define RX2bh %ch
#define RX3bh %dh #define RX3bh %dh
#define RT0 %rbp #define RT0 %rdi
#define RT1 %rsi #define RT1 %rsi
#define RT2 %r8 #define RT2 %r8
#define RT3 %r9 #define RT3 %r9
#define RT0d %ebp #define RT0d %edi
#define RT1d %esi #define RT1d %esi
#define RT2d %r8d #define RT2d %r8d
#define RT3d %r9d #define RT3d %r9d
...@@ -120,13 +120,14 @@ ...@@ -120,13 +120,14 @@
ENTRY(__blowfish_enc_blk) ENTRY(__blowfish_enc_blk)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
* %rcx: bool, if true: xor output * %rcx: bool, if true: xor output
*/ */
movq %rbp, %r11; movq %r12, %r11;
movq %rdi, CTX;
movq %rsi, %r10; movq %rsi, %r10;
movq %rdx, RIO; movq %rdx, RIO;
...@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk) ...@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
round_enc(14); round_enc(14);
add_roundkey_enc(16); add_roundkey_enc(16);
movq %r11, %rbp; movq %r11, %r12;
movq %r10, RIO; movq %r10, RIO;
test %cl, %cl; test %cl, %cl;
...@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk) ...@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
ENTRY(blowfish_dec_blk) ENTRY(blowfish_dec_blk)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
movq %rbp, %r11; movq %r12, %r11;
movq %rdi, CTX;
movq %rsi, %r10; movq %rsi, %r10;
movq %rdx, RIO; movq %rdx, RIO;
...@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk) ...@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
movq %r10, RIO; movq %r10, RIO;
write_block(); write_block();
movq %r11, %rbp; movq %r11, %r12;
ret; ret;
ENDPROC(blowfish_dec_blk) ENDPROC(blowfish_dec_blk)
...@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk) ...@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
ENTRY(__blowfish_enc_blk_4way) ENTRY(__blowfish_enc_blk_4way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
* %rcx: bool, if true: xor output * %rcx: bool, if true: xor output
*/ */
pushq %rbp; pushq %r12;
pushq %rbx; pushq %rbx;
pushq %rcx; pushq %rcx;
preload_roundkey_enc(0); movq %rdi, CTX
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, RIO; movq %rdx, RIO;
preload_roundkey_enc(0);
read_block4(); read_block4();
round_enc4(0); round_enc4(0);
...@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way) ...@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
round_enc4(14); round_enc4(14);
add_preloaded_roundkey4(); add_preloaded_roundkey4();
popq %rbp; popq %r12;
movq %r11, RIO; movq %r11, RIO;
test %bpl, %bpl; test %r12b, %r12b;
jnz .L__enc_xor4; jnz .L__enc_xor4;
write_block4(); write_block4();
popq %rbx; popq %rbx;
popq %rbp; popq %r12;
ret; ret;
.L__enc_xor4: .L__enc_xor4:
xor_block4(); xor_block4();
popq %rbx; popq %rbx;
popq %rbp; popq %r12;
ret; ret;
ENDPROC(__blowfish_enc_blk_4way) ENDPROC(__blowfish_enc_blk_4way)
ENTRY(blowfish_dec_blk_4way) ENTRY(blowfish_dec_blk_4way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
pushq %rbp; pushq %r12;
pushq %rbx; pushq %rbx;
preload_roundkey_dec(17);
movq %rsi, %r11; movq %rdi, CTX;
movq %rsi, %r11
movq %rdx, RIO; movq %rdx, RIO;
preload_roundkey_dec(17);
read_block4(); read_block4();
round_dec4(17); round_dec4(17);
...@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way) ...@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
write_block4(); write_block4();
popq %rbx; popq %rbx;
popq %rbp; popq %r12;
ret; ret;
ENDPROC(blowfish_dec_blk_4way) ENDPROC(blowfish_dec_blk_4way)
...@@ -75,17 +75,17 @@ ...@@ -75,17 +75,17 @@
#define RCD1bh %dh #define RCD1bh %dh
#define RT0 %rsi #define RT0 %rsi
#define RT1 %rbp #define RT1 %r12
#define RT2 %r8 #define RT2 %r8
#define RT0d %esi #define RT0d %esi
#define RT1d %ebp #define RT1d %r12d
#define RT2d %r8d #define RT2d %r8d
#define RT2bl %r8b #define RT2bl %r8b
#define RXOR %r9 #define RXOR %r9
#define RRBP %r10 #define RR12 %r10
#define RDST %r11 #define RDST %r11
#define RXORd %r9d #define RXORd %r9d
...@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk) ...@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
* %rdx: src * %rdx: src
* %rcx: bool xor * %rcx: bool xor
*/ */
movq %rbp, RRBP; movq %r12, RR12;
movq %rcx, RXOR; movq %rcx, RXOR;
movq %rsi, RDST; movq %rsi, RDST;
...@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk) ...@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
enc_outunpack(mov, RT1); enc_outunpack(mov, RT1);
movq RRBP, %rbp; movq RR12, %r12;
ret; ret;
.L__enc_xor: .L__enc_xor:
enc_outunpack(xor, RT1); enc_outunpack(xor, RT1);
movq RRBP, %rbp; movq RR12, %r12;
ret; ret;
ENDPROC(__camellia_enc_blk) ENDPROC(__camellia_enc_blk)
...@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk) ...@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
movl $24, RXORd; movl $24, RXORd;
cmovel RXORd, RT2d; /* max */ cmovel RXORd, RT2d; /* max */
movq %rbp, RRBP; movq %r12, RR12;
movq %rsi, RDST; movq %rsi, RDST;
movq %rdx, RIO; movq %rdx, RIO;
...@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk) ...@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
dec_outunpack(); dec_outunpack();
movq RRBP, %rbp; movq RR12, %r12;
ret; ret;
ENDPROC(camellia_dec_blk) ENDPROC(camellia_dec_blk)
...@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way) ...@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
*/ */
pushq %rbx; pushq %rbx;
movq %rbp, RRBP; movq %r12, RR12;
movq %rcx, RXOR; movq %rcx, RXOR;
movq %rsi, RDST; movq %rsi, RDST;
movq %rdx, RIO; movq %rdx, RIO;
...@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way) ...@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
enc_outunpack2(mov, RT2); enc_outunpack2(mov, RT2);
movq RRBP, %rbp; movq RR12, %r12;
popq %rbx; popq %rbx;
ret; ret;
.L__enc2_xor: .L__enc2_xor:
enc_outunpack2(xor, RT2); enc_outunpack2(xor, RT2);
movq RRBP, %rbp; movq RR12, %r12;
popq %rbx; popq %rbx;
ret; ret;
ENDPROC(__camellia_enc_blk_2way) ENDPROC(__camellia_enc_blk_2way)
...@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way) ...@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
cmovel RXORd, RT2d; /* max */ cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR; movq %rbx, RXOR;
movq %rbp, RRBP; movq %r12, RR12;
movq %rsi, RDST; movq %rsi, RDST;
movq %rdx, RIO; movq %rdx, RIO;
...@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way) ...@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
dec_outunpack2(); dec_outunpack2();
movq RRBP, %rbp; movq RR12, %r12;
movq RXOR, %rbx; movq RXOR, %rbx;
ret; ret;
ENDPROC(camellia_dec_blk_2way) ENDPROC(camellia_dec_blk_2way)
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
/********************************************************************** /**********************************************************************
16-way AVX cast5 16-way AVX cast5
**********************************************************************/ **********************************************************************/
#define CTX %rdi #define CTX %r15
#define RL1 %xmm0 #define RL1 %xmm0
#define RR1 %xmm1 #define RR1 %xmm1
...@@ -70,8 +70,8 @@ ...@@ -70,8 +70,8 @@
#define RTMP %xmm15 #define RTMP %xmm15
#define RID1 %rbp #define RID1 %rdi
#define RID1d %ebp #define RID1d %edi
#define RID2 %rsi #define RID2 %rsi
#define RID2d %esi #define RID2d %esi
...@@ -226,7 +226,7 @@ ...@@ -226,7 +226,7 @@
.align 16 .align 16
__cast5_enc_blk16: __cast5_enc_blk16:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RL1: blocks 1 and 2 * RL1: blocks 1 and 2
* RR1: blocks 3 and 4 * RR1: blocks 3 and 4
* RL2: blocks 5 and 6 * RL2: blocks 5 and 6
...@@ -246,9 +246,11 @@ __cast5_enc_blk16: ...@@ -246,9 +246,11 @@ __cast5_enc_blk16:
* RR4: encrypted blocks 15 and 16 * RR4: encrypted blocks 15 and 16
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -283,7 +285,7 @@ __cast5_enc_blk16: ...@@ -283,7 +285,7 @@ __cast5_enc_blk16:
.L__skip_enc: .L__skip_enc:
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
...@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16) ...@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
.align 16 .align 16
__cast5_dec_blk16: __cast5_dec_blk16:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RL1: encrypted blocks 1 and 2 * RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4 * RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6 * RL2: encrypted blocks 5 and 6
...@@ -318,9 +320,11 @@ __cast5_dec_blk16: ...@@ -318,9 +320,11 @@ __cast5_dec_blk16:
* RR4: decrypted blocks 15 and 16 * RR4: decrypted blocks 15 and 16
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -356,7 +360,7 @@ __cast5_dec_blk16: ...@@ -356,7 +360,7 @@ __cast5_dec_blk16:
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM); outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM); outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
...@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16) ...@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way) ENTRY(cast5_ecb_enc_16way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1; vmovdqu (0*4*4)(%rdx), RL1;
...@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way) ...@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11); vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast5_ecb_enc_16way) ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way) ENTRY(cast5_ecb_dec_16way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1; vmovdqu (0*4*4)(%rdx), RL1;
...@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way) ...@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11); vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast5_ecb_dec_16way) ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way) ENTRY(cast5_cbc_dec_16way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way) ...@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
vmovdqu RR4, (6*16)(%r11); vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11); vmovdqu RL4, (7*16)(%r11);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast5_cbc_dec_16way) ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way) ENTRY(cast5_ctr_16way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
* %rcx: iv (big endian, 64bit) * %rcx: iv (big endian, 64bit)
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way) ...@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
vmovdqu RR4, (6*16)(%r11); vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11); vmovdqu RL4, (7*16)(%r11);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast5_ctr_16way) ENDPROC(cast5_ctr_16way)
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
/********************************************************************** /**********************************************************************
8-way AVX cast6 8-way AVX cast6
**********************************************************************/ **********************************************************************/
#define CTX %rdi #define CTX %r15
#define RA1 %xmm0 #define RA1 %xmm0
#define RB1 %xmm1 #define RB1 %xmm1
...@@ -70,8 +70,8 @@ ...@@ -70,8 +70,8 @@
#define RTMP %xmm15 #define RTMP %xmm15
#define RID1 %rbp #define RID1 %rdi
#define RID1d %ebp #define RID1d %edi
#define RID2 %rsi #define RID2 %rsi
#define RID2d %esi #define RID2d %esi
...@@ -264,15 +264,17 @@ ...@@ -264,15 +264,17 @@
.align 8 .align 8
__cast6_enc_blk8: __cast6_enc_blk8:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output: * output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -297,7 +299,7 @@ __cast6_enc_blk8: ...@@ -297,7 +299,7 @@ __cast6_enc_blk8:
QBAR(11); QBAR(11);
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
...@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) ...@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
.align 8 .align 8
__cast6_dec_blk8: __cast6_dec_blk8:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output: * output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/ */
pushq %rbp; pushq %r15;
pushq %rbx; pushq %rbx;
movq %rdi, CTX;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST; vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32; vmovd .L32_mask, R32;
...@@ -343,7 +347,7 @@ __cast6_dec_blk8: ...@@ -343,7 +347,7 @@ __cast6_dec_blk8:
QBAR(0); QBAR(0);
popq %rbx; popq %rbx;
popq %rbp; popq %r15;
vmovdqa .Lbswap_mask, RKM; vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
...@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) ...@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way) ENTRY(cast6_ecb_enc_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) ...@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ecb_enc_8way) ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way) ENTRY(cast6_ecb_dec_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
...@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) ...@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ecb_dec_8way) ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way) ENTRY(cast6_cbc_dec_8way)
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15;
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) ...@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_cbc_dec_8way) ENDPROC(cast6_cbc_dec_8way)
...@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) ...@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
* %rcx: iv (little endian, 128bit) * %rcx: iv (little endian, 128bit)
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r12; pushq %r12;
pushq %r15
movq %rdi, CTX;
movq %rsi, %r11; movq %rsi, %r11;
movq %rdx, %r12; movq %rdx, %r12;
...@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) ...@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
popq %r12; popq %r12;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_ctr_8way) ENDPROC(cast6_ctr_8way)
...@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) ...@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11; movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
...@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way) ...@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_xts_enc_8way) ENDPROC(cast6_xts_enc_8way)
...@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way) ...@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
* %rcx: iv (t αⁿ GF(2¹²⁸)) * %rcx: iv (t αⁿ GF(2¹²⁸))
*/ */
FRAME_BEGIN FRAME_BEGIN
pushq %r15;
movq %rdi, CTX
movq %rsi, %r11; movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
...@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way) ...@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */ /* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
popq %r15;
FRAME_END FRAME_END
ret; ret;
ENDPROC(cast6_xts_dec_8way) ENDPROC(cast6_xts_dec_8way)
...@@ -64,12 +64,12 @@ ...@@ -64,12 +64,12 @@
#define RW2bh %ch #define RW2bh %ch
#define RT0 %r15 #define RT0 %r15
#define RT1 %rbp #define RT1 %rsi
#define RT2 %r14 #define RT2 %r14
#define RT3 %rdx #define RT3 %rdx
#define RT0d %r15d #define RT0d %r15d
#define RT1d %ebp #define RT1d %esi
#define RT2d %r14d #define RT2d %r14d
#define RT3d %edx #define RT3d %edx
...@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk) ...@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
* %rsi: dst * %rsi: dst
* %rdx: src * %rdx: src
*/ */
pushq %rbp;
pushq %rbx; pushq %rbx;
pushq %r12; pushq %r12;
pushq %r13; pushq %r13;
pushq %r14; pushq %r14;
pushq %r15; pushq %r15;
pushq %rsi; /* dst */
read_block(%rdx, RL0, RR0); read_block(%rdx, RL0, RR0);
initial_permutation(RL0, RR0); initial_permutation(RL0, RR0);
...@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk) ...@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
round1(32+15, RL0, RR0, dummy2); round1(32+15, RL0, RR0, dummy2);
final_permutation(RR0, RL0); final_permutation(RR0, RL0);
popq %rsi /* dst */
write_block(%rsi, RR0, RL0); write_block(%rsi, RR0, RL0);
popq %r15; popq %r15;
...@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk) ...@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
popq %r13; popq %r13;
popq %r12; popq %r12;
popq %rbx; popq %rbx;
popq %rbp;
ret; ret;
ENDPROC(des3_ede_x86_64_crypt_blk) ENDPROC(des3_ede_x86_64_crypt_blk)
...@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ...@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
* %rdx: src (3 blocks) * %rdx: src (3 blocks)
*/ */
pushq %rbp;
pushq %rbx; pushq %rbx;
pushq %r12; pushq %r12;
pushq %r13; pushq %r13;
pushq %r14; pushq %r14;
pushq %r15; pushq %r15;
pushq %rsi /* dst */
/* load input */ /* load input */
movl 0 * 4(%rdx), RL0d; movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d; movl 1 * 4(%rdx), RR0d;
...@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ...@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
bswapl RR2d; bswapl RR2d;
bswapl RL2d; bswapl RL2d;
popq %rsi /* dst */
movl RR0d, 0 * 4(%rsi); movl RR0d, 0 * 4(%rsi);
movl RL0d, 1 * 4(%rsi); movl RL0d, 1 * 4(%rsi);
movl RR1d, 2 * 4(%rsi); movl RR1d, 2 * 4(%rsi);
...@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ...@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
popq %r13; popq %r13;
popq %r12; popq %r12;
popq %rbx; popq %rbx;
popq %rbp;
ret; ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way) ENDPROC(des3_ede_x86_64_crypt_blk_3way)
......
...@@ -89,7 +89,7 @@ ...@@ -89,7 +89,7 @@
#define REG_RE %rdx #define REG_RE %rdx
#define REG_RTA %r12 #define REG_RTA %r12
#define REG_RTB %rbx #define REG_RTB %rbx
#define REG_T1 %ebp #define REG_T1 %r11d
#define xmm_mov vmovups #define xmm_mov vmovups
#define avx2_zeroupper vzeroupper #define avx2_zeroupper vzeroupper
#define RND_F1 1 #define RND_F1 1
...@@ -637,7 +637,6 @@ _loop3: ...@@ -637,7 +637,6 @@ _loop3:
ENTRY(\name) ENTRY(\name)
push %rbx push %rbx
push %rbp
push %r12 push %r12
push %r13 push %r13
push %r14 push %r14
...@@ -673,7 +672,6 @@ _loop3: ...@@ -673,7 +672,6 @@ _loop3:
pop %r14 pop %r14
pop %r13 pop %r13
pop %r12 pop %r12
pop %rbp
pop %rbx pop %rbx
ret ret
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#define REG_A %ecx #define REG_A %ecx
#define REG_B %esi #define REG_B %esi
#define REG_C %edi #define REG_C %edi
#define REG_D %ebp #define REG_D %r12d
#define REG_E %edx #define REG_E %edx
#define REG_T1 %eax #define REG_T1 %eax
...@@ -74,10 +74,10 @@ ...@@ -74,10 +74,10 @@
ENTRY(\name) ENTRY(\name)
push %rbx push %rbx
push %rbp
push %r12 push %r12
push %rbp
mov %rsp, %rbp
mov %rsp, %r12
sub $64, %rsp # allocate workspace sub $64, %rsp # allocate workspace
and $~15, %rsp # align stack and $~15, %rsp # align stack
...@@ -99,10 +99,9 @@ ...@@ -99,10 +99,9 @@
xor %rax, %rax xor %rax, %rax
rep stosq rep stosq
mov %r12, %rsp # deallocate workspace mov %rbp, %rsp # deallocate workspace
pop %r12
pop %rbp pop %rbp
pop %r12
pop %rbx pop %rbx
ret ret
......
...@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP ...@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP
c = %ecx c = %ecx
d = %r8d d = %r8d
e = %edx e = %edx
TBL = %rbp TBL = %r12
a = %eax a = %eax
b = %ebx b = %ebx
...@@ -350,13 +350,13 @@ a = TMP_ ...@@ -350,13 +350,13 @@ a = TMP_
ENTRY(sha256_transform_avx) ENTRY(sha256_transform_avx)
.align 32 .align 32
pushq %rbx pushq %rbx
pushq %rbp pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
pushq %r12 pushq %rbp
movq %rsp, %rbp
mov %rsp, %r12
subq $STACK_SIZE, %rsp # allocate stack space subq $STACK_SIZE, %rsp # allocate stack space
and $~15, %rsp # align stack pointer and $~15, %rsp # align stack pointer
...@@ -452,13 +452,12 @@ loop2: ...@@ -452,13 +452,12 @@ loop2:
done_hash: done_hash:
mov %r12, %rsp mov %rbp, %rsp
popq %rbp
popq %r12
popq %r15 popq %r15
popq %r14 popq %r14
popq %r13 popq %r13
popq %rbp popq %r12
popq %rbx popq %rbx
ret ret
ENDPROC(sha256_transform_avx) ENDPROC(sha256_transform_avx)
......
...@@ -98,8 +98,6 @@ d = %r8d ...@@ -98,8 +98,6 @@ d = %r8d
e = %edx # clobbers NUM_BLKS e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP y3 = %esi # clobbers INP
TBL = %rbp
SRND = CTX # SRND is same register as CTX SRND = CTX # SRND is same register as CTX
a = %eax a = %eax
...@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ...@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
ENTRY(sha256_transform_rorx) ENTRY(sha256_transform_rorx)
.align 32 .align 32
pushq %rbx pushq %rbx
pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
...@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) ...@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
mov CTX, _CTX(%rsp) mov CTX, _CTX(%rsp)
loop0: loop0:
lea K256(%rip), TBL
## Load first 16 dwords from two blocks ## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0 VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1 VMOVDQ 1*32(INP),XTMP1
...@@ -597,19 +592,19 @@ last_block_enter: ...@@ -597,19 +592,19 @@ last_block_enter:
.align 16 .align 16
loop1: loop1:
vpaddd 0*32(TBL, SRND), X0, XFER vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND) vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32 FOUR_ROUNDS_AND_SCHED _XFER + 0*32
vpaddd 1*32(TBL, SRND), X0, XFER vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND) vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32 FOUR_ROUNDS_AND_SCHED _XFER + 1*32
vpaddd 2*32(TBL, SRND), X0, XFER vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND) vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32 FOUR_ROUNDS_AND_SCHED _XFER + 2*32
vpaddd 3*32(TBL, SRND), X0, XFER vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND) vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32 FOUR_ROUNDS_AND_SCHED _XFER + 3*32
...@@ -619,10 +614,11 @@ loop1: ...@@ -619,10 +614,11 @@ loop1:
loop2: loop2:
## Do last 16 rounds with no scheduling ## Do last 16 rounds with no scheduling
vpaddd 0*32(TBL, SRND), X0, XFER vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND) vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32 DO_4ROUNDS _XFER + 0*32
vpaddd 1*32(TBL, SRND), X1, XFER
vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND) vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32 DO_4ROUNDS _XFER + 1*32
add $2*32, SRND add $2*32, SRND
...@@ -676,9 +672,6 @@ loop3: ...@@ -676,9 +672,6 @@ loop3:
ja done_hash ja done_hash
do_last_block: do_last_block:
#### do last block
lea K256(%rip), TBL
VMOVDQ 0*16(INP),XWORD0 VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1 VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2 VMOVDQ 2*16(INP),XWORD2
...@@ -718,7 +711,6 @@ done_hash: ...@@ -718,7 +711,6 @@ done_hash:
popq %r14 popq %r14
popq %r13 popq %r13
popq %r12 popq %r12
popq %rbp
popq %rbx popq %rbx
ret ret
ENDPROC(sha256_transform_rorx) ENDPROC(sha256_transform_rorx)
......
...@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP ...@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP
c = %ecx c = %ecx
d = %r8d d = %r8d
e = %edx e = %edx
TBL = %rbp TBL = %r12
a = %eax a = %eax
b = %ebx b = %ebx
...@@ -356,13 +356,13 @@ a = TMP_ ...@@ -356,13 +356,13 @@ a = TMP_
ENTRY(sha256_transform_ssse3) ENTRY(sha256_transform_ssse3)
.align 32 .align 32
pushq %rbx pushq %rbx
pushq %rbp pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
pushq %r12 pushq %rbp
mov %rsp, %rbp
mov %rsp, %r12
subq $STACK_SIZE, %rsp subq $STACK_SIZE, %rsp
and $~15, %rsp and $~15, %rsp
...@@ -462,13 +462,12 @@ loop2: ...@@ -462,13 +462,12 @@ loop2:
done_hash: done_hash:
mov %r12, %rsp mov %rbp, %rsp
popq %rbp
popq %r12
popq %r15 popq %r15
popq %r14 popq %r14
popq %r13 popq %r13
popq %rbp popq %r12
popq %rbx popq %rbx
ret ret
......
...@@ -69,8 +69,9 @@ XFER = YTMP0 ...@@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9 BYTE_FLIP_MASK = %ymm9
# 1st arg # 1st arg is %rdi, which is saved to the stack and accessed later via %r12
CTX = %rdi CTX1 = %rdi
CTX2 = %r12
# 2nd arg # 2nd arg
INP = %rsi INP = %rsi
# 3rd arg # 3rd arg
...@@ -81,7 +82,7 @@ d = %r8 ...@@ -81,7 +82,7 @@ d = %r8
e = %rdx e = %rdx
y3 = %rsi y3 = %rsi
TBL = %rbp TBL = %rdi # clobbers CTX1
a = %rax a = %rax
b = %rbx b = %rbx
...@@ -91,26 +92,26 @@ g = %r10 ...@@ -91,26 +92,26 @@ g = %r10
h = %r11 h = %r11
old_h = %r11 old_h = %r11
T1 = %r12 T1 = %r12 # clobbers CTX2
y0 = %r13 y0 = %r13
y1 = %r14 y1 = %r14
y2 = %r15 y2 = %r15
y4 = %r12
# Local variables (stack frame) # Local variables (stack frame)
XFER_SIZE = 4*8 XFER_SIZE = 4*8
SRND_SIZE = 1*8 SRND_SIZE = 1*8
INP_SIZE = 1*8 INP_SIZE = 1*8
INPEND_SIZE = 1*8 INPEND_SIZE = 1*8
CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8 RSPSAVE_SIZE = 1*8
GPRSAVE_SIZE = 6*8 GPRSAVE_SIZE = 5*8
frame_XFER = 0 frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE frame_INPEND = frame_INP + INP_SIZE
frame_RSPSAVE = frame_INPEND + INPEND_SIZE frame_CTX = frame_INPEND + INPEND_SIZE
frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE frame_size = frame_GPRSAVE + GPRSAVE_SIZE
...@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) ...@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp) mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs # Save GPRs
mov %rbp, frame_GPRSAVE(%rsp) mov %rbx, 8*0+frame_GPRSAVE(%rsp)
mov %rbx, 8*1+frame_GPRSAVE(%rsp) mov %r12, 8*1+frame_GPRSAVE(%rsp)
mov %r12, 8*2+frame_GPRSAVE(%rsp) mov %r13, 8*2+frame_GPRSAVE(%rsp)
mov %r13, 8*3+frame_GPRSAVE(%rsp) mov %r14, 8*3+frame_GPRSAVE(%rsp)
mov %r14, 8*4+frame_GPRSAVE(%rsp) mov %r15, 8*4+frame_GPRSAVE(%rsp)
mov %r15, 8*5+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes shl $7, NUM_BLKS # convert to bytes
jz done_hash jz done_hash
...@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) ...@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp) mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest ## load initial digest
mov 8*0(CTX),a mov 8*0(CTX1), a
mov 8*1(CTX),b mov 8*1(CTX1), b
mov 8*2(CTX),c mov 8*2(CTX1), c
mov 8*3(CTX),d mov 8*3(CTX1), d
mov 8*4(CTX),e mov 8*4(CTX1), e
mov 8*5(CTX),f mov 8*5(CTX1), f
mov 8*6(CTX),g mov 8*6(CTX1), g
mov 8*7(CTX),h mov 8*7(CTX1), h
# save %rdi (CTX) before it gets clobbered
mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
...@@ -652,14 +655,15 @@ loop2: ...@@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp) subq $1, frame_SRND(%rsp)
jne loop2 jne loop2
addm 8*0(CTX),a mov frame_CTX(%rsp), CTX2
addm 8*1(CTX),b addm 8*0(CTX2), a
addm 8*2(CTX),c addm 8*1(CTX2), b
addm 8*3(CTX),d addm 8*2(CTX2), c
addm 8*4(CTX),e addm 8*3(CTX2), d
addm 8*5(CTX),f addm 8*4(CTX2), e
addm 8*6(CTX),g addm 8*5(CTX2), f
addm 8*7(CTX),h addm 8*6(CTX2), g
addm 8*7(CTX2), h
mov frame_INP(%rsp), INP mov frame_INP(%rsp), INP
add $128, INP add $128, INP
...@@ -669,12 +673,11 @@ loop2: ...@@ -669,12 +673,11 @@ loop2:
done_hash: done_hash:
# Restore GPRs # Restore GPRs
mov frame_GPRSAVE(%rsp) ,%rbp mov 8*0+frame_GPRSAVE(%rsp), %rbx
mov 8*1+frame_GPRSAVE(%rsp) ,%rbx mov 8*1+frame_GPRSAVE(%rsp), %r12
mov 8*2+frame_GPRSAVE(%rsp) ,%r12 mov 8*2+frame_GPRSAVE(%rsp), %r13
mov 8*3+frame_GPRSAVE(%rsp) ,%r13 mov 8*3+frame_GPRSAVE(%rsp), %r14
mov 8*4+frame_GPRSAVE(%rsp) ,%r14 mov 8*4+frame_GPRSAVE(%rsp), %r15
mov 8*5+frame_GPRSAVE(%rsp) ,%r15
# Restore Stack Pointer # Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp mov frame_RSPSAVE(%rsp), %rsp
......
...@@ -76,8 +76,8 @@ ...@@ -76,8 +76,8 @@
#define RT %xmm14 #define RT %xmm14
#define RR %xmm15 #define RR %xmm15
#define RID1 %rbp #define RID1 %r13
#define RID1d %ebp #define RID1d %r13d
#define RID2 %rsi #define RID2 %rsi
#define RID2d %esi #define RID2d %esi
...@@ -259,7 +259,7 @@ __twofish_enc_blk8: ...@@ -259,7 +259,7 @@ __twofish_enc_blk8:
vmovdqu w(CTX), RK1; vmovdqu w(CTX), RK1;
pushq %rbp; pushq %r13;
pushq %rbx; pushq %rbx;
pushq %rcx; pushq %rcx;
...@@ -282,7 +282,7 @@ __twofish_enc_blk8: ...@@ -282,7 +282,7 @@ __twofish_enc_blk8:
popq %rcx; popq %rcx;
popq %rbx; popq %rbx;
popq %rbp; popq %r13;
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
...@@ -301,7 +301,7 @@ __twofish_dec_blk8: ...@@ -301,7 +301,7 @@ __twofish_dec_blk8:
vmovdqu (w+4*4)(CTX), RK1; vmovdqu (w+4*4)(CTX), RK1;
pushq %rbp; pushq %r13;
pushq %rbx; pushq %rbx;
inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
...@@ -322,7 +322,7 @@ __twofish_dec_blk8: ...@@ -322,7 +322,7 @@ __twofish_dec_blk8:
vmovdqu (w)(CTX), RK1; vmovdqu (w)(CTX), RK1;
popq %rbx; popq %rbx;
popq %rbp; popq %r13;
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
......
...@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, ...@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
struct af_alg_ctx *ctx = ask->private; struct af_alg_ctx *ctx = ask->private;
struct af_alg_tsgl *sgl; struct af_alg_tsgl *sgl;
struct scatterlist *sg; struct scatterlist *sg;
unsigned int i, j; unsigned int i, j = 0;
while (!list_empty(&ctx->tsgl_list)) { while (!list_empty(&ctx->tsgl_list)) {
sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
list); list);
sg = sgl->sg; sg = sgl->sg;
for (i = 0, j = 0; i < sgl->cur; i++) { for (i = 0; i < sgl->cur; i++) {
size_t plen = min_t(size_t, used, sg[i].length); size_t plen = min_t(size_t, used, sg[i].length);
struct page *page = sg_page(sg + i); struct page *page = sg_page(sg + i);
......
...@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) ...@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
{ {
if (!drbg) if (!drbg)
return; return;
kzfree(drbg->V); kzfree(drbg->Vbuf);
drbg->Vbuf = NULL; drbg->V = NULL;
kzfree(drbg->C); kzfree(drbg->Cbuf);
drbg->Cbuf = NULL; drbg->C = NULL;
kzfree(drbg->scratchpadbuf); kzfree(drbg->scratchpadbuf);
drbg->scratchpadbuf = NULL; drbg->scratchpadbuf = NULL;
drbg->reseed_ctr = 0; drbg->reseed_ctr = 0;
......
config CRYPTO_DEV_FSL_CAAM config CRYPTO_DEV_FSL_CAAM
tristate "Freescale CAAM-Multicore driver backend" tristate "Freescale CAAM-Multicore driver backend"
depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
select SOC_BUS
help help
Enables the driver module for Freescale's Cryptographic Accelerator Enables the driver module for Freescale's Cryptographic Accelerator
and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
...@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API ...@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
To compile this as a module, choose M here: the module To compile this as a module, choose M here: the module
will be called caamrng. will be called caamrng.
config CRYPTO_DEV_FSL_CAAM_IMX
def_bool SOC_IMX6 || SOC_IMX7D
depends on CRYPTO_DEV_FSL_CAAM
config CRYPTO_DEV_FSL_CAAM_DEBUG config CRYPTO_DEV_FSL_CAAM_DEBUG
bool "Enable debug output in CAAM driver" bool "Enable debug output in CAAM driver"
depends on CRYPTO_DEV_FSL_CAAM depends on CRYPTO_DEV_FSL_CAAM
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/of_address.h> #include <linux/of_address.h>
#include <linux/of_irq.h> #include <linux/of_irq.h>
#include <linux/sys_soc.h>
#include "compat.h" #include "compat.h"
#include "regs.h" #include "regs.h"
...@@ -19,6 +20,8 @@ bool caam_little_end; ...@@ -19,6 +20,8 @@ bool caam_little_end;
EXPORT_SYMBOL(caam_little_end); EXPORT_SYMBOL(caam_little_end);
bool caam_dpaa2; bool caam_dpaa2;
EXPORT_SYMBOL(caam_dpaa2); EXPORT_SYMBOL(caam_dpaa2);
bool caam_imx;
EXPORT_SYMBOL(caam_imx);
#ifdef CONFIG_CAAM_QI #ifdef CONFIG_CAAM_QI
#include "qi.h" #include "qi.h"
...@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2); ...@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
* i.MX targets tend to have clock control subsystems that can * i.MX targets tend to have clock control subsystems that can
* enable/disable clocking to our device. * enable/disable clocking to our device.
*/ */
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
static inline struct clk *caam_drv_identify_clk(struct device *dev, static inline struct clk *caam_drv_identify_clk(struct device *dev,
char *clk_name) char *clk_name)
{ {
return devm_clk_get(dev, clk_name); return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
} }
#else
static inline struct clk *caam_drv_identify_clk(struct device *dev,
char *clk_name)
{
return NULL;
}
#endif
/* /*
* Descriptor to instantiate RNG State Handle 0 in normal mode and * Descriptor to instantiate RNG State Handle 0 in normal mode and
...@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev) ...@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
{ {
int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
u64 caam_id; u64 caam_id;
static const struct soc_device_attribute imx_soc[] = {
{.family = "Freescale i.MX"},
{},
};
struct device *dev; struct device *dev;
struct device_node *nprop, *np; struct device_node *nprop, *np;
struct caam_ctrl __iomem *ctrl; struct caam_ctrl __iomem *ctrl;
...@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev) ...@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
dev_set_drvdata(dev, ctrlpriv); dev_set_drvdata(dev, ctrlpriv);
nprop = pdev->dev.of_node; nprop = pdev->dev.of_node;
caam_imx = (bool)soc_device_match(imx_soc);
/* Enable clocking */ /* Enable clocking */
clk = caam_drv_identify_clk(&pdev->dev, "ipg"); clk = caam_drv_identify_clk(&pdev->dev, "ipg");
if (IS_ERR(clk)) { if (IS_ERR(clk)) {
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
*/ */
extern bool caam_little_end; extern bool caam_little_end;
extern bool caam_imx;
#define caam_to_cpu(len) \ #define caam_to_cpu(len) \
static inline u##len caam##len ## _to_cpu(u##len val) \ static inline u##len caam##len ## _to_cpu(u##len val) \
...@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg) ...@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
#else /* CONFIG_64BIT */ #else /* CONFIG_64BIT */
static inline void wr_reg64(void __iomem *reg, u64 data) static inline void wr_reg64(void __iomem *reg, u64 data)
{ {
#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX if (!caam_imx && caam_little_end) {
if (caam_little_end) {
wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
wr_reg32((u32 __iomem *)(reg), data); wr_reg32((u32 __iomem *)(reg), data);
} else } else {
#endif
{
wr_reg32((u32 __iomem *)(reg), data >> 32); wr_reg32((u32 __iomem *)(reg), data >> 32);
wr_reg32((u32 __iomem *)(reg) + 1, data); wr_reg32((u32 __iomem *)(reg) + 1, data);
} }
...@@ -168,42 +166,41 @@ static inline void wr_reg64(void __iomem *reg, u64 data) ...@@ -168,42 +166,41 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
static inline u64 rd_reg64(void __iomem *reg) static inline u64 rd_reg64(void __iomem *reg)
{ {
#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX if (!caam_imx && caam_little_end)
if (caam_little_end)
return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
(u64)rd_reg32((u32 __iomem *)(reg))); (u64)rd_reg32((u32 __iomem *)(reg)));
else
#endif
return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
(u64)rd_reg32((u32 __iomem *)(reg) + 1)); (u64)rd_reg32((u32 __iomem *)(reg) + 1));
} }
#endif /* CONFIG_64BIT */ #endif /* CONFIG_64BIT */
static inline u64 cpu_to_caam_dma64(dma_addr_t value)
{
if (caam_imx)
return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
(u64)cpu_to_caam32(upper_32_bits(value)));
return cpu_to_caam64(value);
}
static inline u64 caam_dma64_to_cpu(u64 value)
{
if (caam_imx)
return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
(u64)caam32_to_cpu(upper_32_bits(value)));
return caam64_to_cpu(value);
}
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
#ifdef CONFIG_SOC_IMX7D #define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
#define cpu_to_caam_dma(value) \ #define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
(u64)cpu_to_caam32(upper_32_bits(value)))
#define caam_dma_to_cpu(value) \
(((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
(u64)caam32_to_cpu(upper_32_bits(value)))
#else
#define cpu_to_caam_dma(value) cpu_to_caam64(value)
#define caam_dma_to_cpu(value) caam64_to_cpu(value)
#endif /* CONFIG_SOC_IMX7D */
#else #else
#define cpu_to_caam_dma(value) cpu_to_caam32(value) #define cpu_to_caam_dma(value) cpu_to_caam32(value)
#define caam_dma_to_cpu(value) caam32_to_cpu(value) #define caam_dma_to_cpu(value) caam32_to_cpu(value)
#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
#define cpu_to_caam_dma64(value) \
(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
(u64)cpu_to_caam32(upper_32_bits(value)))
#else
#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
#endif
/* /*
* jr_outentry * jr_outentry
* Represents each entry in a JobR output ring * Represents each entry in a JobR output ring
......
...@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) ...@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv; struct safexcel_crypto_priv *priv = ctx->priv;
struct skcipher_request req; struct skcipher_request req;
struct safexcel_inv_result result = { 0 }; struct safexcel_inv_result result = {};
int ring = ctx->base.ring; int ring = ctx->base.ring;
memset(&req, 0, sizeof(struct skcipher_request)); memset(&req, 0, sizeof(struct skcipher_request));
......
...@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) ...@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
struct safexcel_crypto_priv *priv = ctx->priv; struct safexcel_crypto_priv *priv = ctx->priv;
struct ahash_request req; struct ahash_request req;
struct safexcel_inv_result result = { 0 }; struct safexcel_inv_result result = {};
int ring = ctx->base.ring; int ring = ctx->base.ring;
memset(&req, 0, sizeof(struct ahash_request)); memset(&req, 0, sizeof(struct ahash_request));
......
...@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, ...@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
req_ctx->swinit = 0; req_ctx->swinit = 0;
} else { } else {
desc->ptr[1] = zero_entry; desc->ptr[1] = zero_entry;
}
/* Indicate next op is not the first. */ /* Indicate next op is not the first. */
req_ctx->first = 0; req_ctx->first = 0;
}
/* HMAC key */ /* HMAC key */
if (ctx->keylen) if (ctx->keylen)
...@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, ...@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
sg_count = edesc->src_nents ?: 1; sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1) if (is_sec1 && sg_count > 1)
sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
else else
sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
DMA_TO_DEVICE); DMA_TO_DEVICE);
...@@ -3057,6 +3057,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, ...@@ -3057,6 +3057,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
t_alg->algt.alg.hash.final = ahash_final; t_alg->algt.alg.hash.final = ahash_final;
t_alg->algt.alg.hash.finup = ahash_finup; t_alg->algt.alg.hash.finup = ahash_finup;
t_alg->algt.alg.hash.digest = ahash_digest; t_alg->algt.alg.hash.digest = ahash_digest;
if (!strncmp(alg->cra_name, "hmac", 4))
t_alg->algt.alg.hash.setkey = ahash_setkey; t_alg->algt.alg.hash.setkey = ahash_setkey;
t_alg->algt.alg.hash.import = ahash_import; t_alg->algt.alg.hash.import = ahash_import;
t_alg->algt.alg.hash.export = ahash_export; t_alg->algt.alg.hash.export = ahash_export;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment