Commit 59990684 authored by Jussi Kivilinna's avatar Jussi Kivilinna Committed by Herbert Xu

crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions...

crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
Signed-off-by: default avatarJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Acked-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 5186e395
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
* http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
*/ */
#include <linux/linkage.h>
#define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_TABLE_BYTE_LEN 272
/* struct camellia_ctx: */ /* struct camellia_ctx: */
...@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: ...@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9)); %rcx, (%r9));
ret; ret;
ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8 .align 8
roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
...@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: ...@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9)); %rax, (%r9));
ret; ret;
ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/* /*
* IN/OUT: * IN/OUT:
...@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: ...@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
.text .text
.align 8 .align 8
.type __camellia_enc_blk16,@function;
__camellia_enc_blk16: __camellia_enc_blk16:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
...@@ -793,10 +795,9 @@ __camellia_enc_blk16: ...@@ -793,10 +795,9 @@ __camellia_enc_blk16:
%xmm15, %rax, %rcx, 24); %xmm15, %rax, %rcx, 24);
jmp .Lenc_done; jmp .Lenc_done;
ENDPROC(__camellia_enc_blk16)
.align 8 .align 8
.type __camellia_dec_blk16,@function;
__camellia_dec_blk16: __camellia_dec_blk16:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
...@@ -877,12 +878,9 @@ __camellia_dec_blk16: ...@@ -877,12 +878,9 @@ __camellia_dec_blk16:
((key_table + (24) * 8) + 4)(CTX)); ((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24; jmp .Ldec_max24;
ENDPROC(__camellia_dec_blk16)
.align 8 ENTRY(camellia_ecb_enc_16way)
.global camellia_ecb_enc_16way
.type camellia_ecb_enc_16way,@function;
camellia_ecb_enc_16way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
...@@ -903,12 +901,9 @@ camellia_ecb_enc_16way: ...@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
%xmm8, %rsi); %xmm8, %rsi);
ret; ret;
ENDPROC(camellia_ecb_enc_16way)
.align 8 ENTRY(camellia_ecb_dec_16way)
.global camellia_ecb_dec_16way
.type camellia_ecb_dec_16way,@function;
camellia_ecb_dec_16way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
...@@ -934,12 +929,9 @@ camellia_ecb_dec_16way: ...@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
%xmm8, %rsi); %xmm8, %rsi);
ret; ret;
ENDPROC(camellia_ecb_dec_16way)
.align 8 ENTRY(camellia_cbc_dec_16way)
.global camellia_cbc_dec_16way
.type camellia_cbc_dec_16way,@function;
camellia_cbc_dec_16way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
...@@ -986,6 +978,7 @@ camellia_cbc_dec_16way: ...@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
%xmm8, %rsi); %xmm8, %rsi);
ret; ret;
ENDPROC(camellia_cbc_dec_16way)
#define inc_le128(x, minus_one, tmp) \ #define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \ vpcmpeqq minus_one, x, tmp; \
...@@ -993,11 +986,7 @@ camellia_cbc_dec_16way: ...@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
vpslldq $8, tmp, tmp; \ vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x; vpsubq tmp, x, x;
.align 8 ENTRY(camellia_ctr_16way)
.global camellia_ctr_16way
.type camellia_ctr_16way,@function;
camellia_ctr_16way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst (16 blocks) * %rsi: dst (16 blocks)
...@@ -1100,3 +1089,4 @@ camellia_ctr_16way: ...@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
%xmm8, %rsi); %xmm8, %rsi);
ret; ret;
ENDPROC(camellia_ctr_16way)
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
* *
*/ */
#include <linux/linkage.h>
.file "camellia-x86_64-asm_64.S" .file "camellia-x86_64-asm_64.S"
.text .text
...@@ -188,10 +190,7 @@ ...@@ -188,10 +190,7 @@
bswapq RAB0; \ bswapq RAB0; \
movq RAB0, 4*2(RIO); movq RAB0, 4*2(RIO);
.global __camellia_enc_blk; ENTRY(__camellia_enc_blk)
.type __camellia_enc_blk,@function;
__camellia_enc_blk:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst * %rsi: dst
...@@ -214,33 +213,31 @@ __camellia_enc_blk: ...@@ -214,33 +213,31 @@ __camellia_enc_blk:
movl $24, RT1d; /* max */ movl $24, RT1d; /* max */
cmpb $16, key_length(CTX); cmpb $16, key_length(CTX);
je __enc_done; je .L__enc_done;
enc_fls(24); enc_fls(24);
enc_rounds(24); enc_rounds(24);
movl $32, RT1d; /* max */ movl $32, RT1d; /* max */
__enc_done: .L__enc_done:
testb RXORbl, RXORbl; testb RXORbl, RXORbl;
movq RDST, RIO; movq RDST, RIO;
jnz __enc_xor; jnz .L__enc_xor;
enc_outunpack(mov, RT1); enc_outunpack(mov, RT1);
movq RRBP, %rbp; movq RRBP, %rbp;
ret; ret;
__enc_xor: .L__enc_xor:
enc_outunpack(xor, RT1); enc_outunpack(xor, RT1);
movq RRBP, %rbp; movq RRBP, %rbp;
ret; ret;
ENDPROC(__camellia_enc_blk)
.global camellia_dec_blk; ENTRY(camellia_dec_blk)
.type camellia_dec_blk,@function;
camellia_dec_blk:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst * %rsi: dst
...@@ -258,12 +255,12 @@ camellia_dec_blk: ...@@ -258,12 +255,12 @@ camellia_dec_blk:
dec_inpack(RT2); dec_inpack(RT2);
cmpb $24, RT2bl; cmpb $24, RT2bl;
je __dec_rounds16; je .L__dec_rounds16;
dec_rounds(24); dec_rounds(24);
dec_fls(24); dec_fls(24);
__dec_rounds16: .L__dec_rounds16:
dec_rounds(16); dec_rounds(16);
dec_fls(16); dec_fls(16);
dec_rounds(8); dec_rounds(8);
...@@ -276,6 +273,7 @@ __dec_rounds16: ...@@ -276,6 +273,7 @@ __dec_rounds16:
movq RRBP, %rbp; movq RRBP, %rbp;
ret; ret;
ENDPROC(camellia_dec_blk)
/********************************************************************** /**********************************************************************
2-way camellia 2-way camellia
...@@ -426,10 +424,7 @@ __dec_rounds16: ...@@ -426,10 +424,7 @@ __dec_rounds16:
bswapq RAB1; \ bswapq RAB1; \
movq RAB1, 12*2(RIO); movq RAB1, 12*2(RIO);
.global __camellia_enc_blk_2way; ENTRY(__camellia_enc_blk_2way)
.type __camellia_enc_blk_2way,@function;
__camellia_enc_blk_2way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst * %rsi: dst
...@@ -453,16 +448,16 @@ __camellia_enc_blk_2way: ...@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
movl $24, RT2d; /* max */ movl $24, RT2d; /* max */
cmpb $16, key_length(CTX); cmpb $16, key_length(CTX);
je __enc2_done; je .L__enc2_done;
enc_fls2(24); enc_fls2(24);
enc_rounds2(24); enc_rounds2(24);
movl $32, RT2d; /* max */ movl $32, RT2d; /* max */
__enc2_done: .L__enc2_done:
test RXORbl, RXORbl; test RXORbl, RXORbl;
movq RDST, RIO; movq RDST, RIO;
jnz __enc2_xor; jnz .L__enc2_xor;
enc_outunpack2(mov, RT2); enc_outunpack2(mov, RT2);
...@@ -470,17 +465,15 @@ __enc2_done: ...@@ -470,17 +465,15 @@ __enc2_done:
popq %rbx; popq %rbx;
ret; ret;
__enc2_xor: .L__enc2_xor:
enc_outunpack2(xor, RT2); enc_outunpack2(xor, RT2);
movq RRBP, %rbp; movq RRBP, %rbp;
popq %rbx; popq %rbx;
ret; ret;
ENDPROC(__camellia_enc_blk_2way)
.global camellia_dec_blk_2way; ENTRY(camellia_dec_blk_2way)
.type camellia_dec_blk_2way,@function;
camellia_dec_blk_2way:
/* input: /* input:
* %rdi: ctx, CTX * %rdi: ctx, CTX
* %rsi: dst * %rsi: dst
...@@ -499,12 +492,12 @@ camellia_dec_blk_2way: ...@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
dec_inpack2(RT2); dec_inpack2(RT2);
cmpb $24, RT2bl; cmpb $24, RT2bl;
je __dec2_rounds16; je .L__dec2_rounds16;
dec_rounds2(24); dec_rounds2(24);
dec_fls2(24); dec_fls2(24);
__dec2_rounds16: .L__dec2_rounds16:
dec_rounds2(16); dec_rounds2(16);
dec_fls2(16); dec_fls2(16);
dec_rounds2(8); dec_rounds2(8);
...@@ -518,3 +511,4 @@ __dec2_rounds16: ...@@ -518,3 +511,4 @@ __dec2_rounds16:
movq RRBP, %rbp; movq RRBP, %rbp;
movq RXOR, %rbx; movq RXOR, %rbx;
ret; ret;
ENDPROC(camellia_dec_blk_2way)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment