Commit 55a7e88f authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: x86/camellia - switch to XTS template

Now that the XTS template can wrap accelerated ECB modes, it can be
used to implement Camellia in XTS mode as well, which turns out to
be at least as fast, and sometimes even faster.
Acked-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 4d6a5a4b
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h> #include <asm/frame.h>
#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_TABLE_BYTE_LEN 272
...@@ -593,10 +592,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) ...@@ -593,10 +592,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.Lbswap128_mask: .Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For XTS mode IV generation */
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
/* /*
* pre-SubByte transform * pre-SubByte transform
* *
...@@ -1111,179 +1106,3 @@ SYM_FUNC_START(camellia_ctr_16way) ...@@ -1111,179 +1106,3 @@ SYM_FUNC_START(camellia_ctr_16way)
FRAME_END FRAME_END
ret; ret;
SYM_FUNC_END(camellia_ctr_16way) SYM_FUNC_END(camellia_ctr_16way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
.align 8
SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
* %r8: index for input whitening key
* %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
*/
FRAME_BEGIN
subq $(16 * 16), %rsp;
movq %rsp, %rax;
vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
/* load IV */
vmovdqu (%rcx), %xmm0;
vpxor 0 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 15 * 16(%rax);
vmovdqu %xmm0, 0 * 16(%rsi);
/* construct IVs */
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 1 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 14 * 16(%rax);
vmovdqu %xmm0, 1 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 2 * 16(%rdx), %xmm0, %xmm13;
vmovdqu %xmm0, 2 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 3 * 16(%rdx), %xmm0, %xmm12;
vmovdqu %xmm0, 3 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 4 * 16(%rdx), %xmm0, %xmm11;
vmovdqu %xmm0, 4 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 5 * 16(%rdx), %xmm0, %xmm10;
vmovdqu %xmm0, 5 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 6 * 16(%rdx), %xmm0, %xmm9;
vmovdqu %xmm0, 6 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 7 * 16(%rdx), %xmm0, %xmm8;
vmovdqu %xmm0, 7 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 8 * 16(%rdx), %xmm0, %xmm7;
vmovdqu %xmm0, 8 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 9 * 16(%rdx), %xmm0, %xmm6;
vmovdqu %xmm0, 9 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 10 * 16(%rdx), %xmm0, %xmm5;
vmovdqu %xmm0, 10 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 11 * 16(%rdx), %xmm0, %xmm4;
vmovdqu %xmm0, 11 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 12 * 16(%rdx), %xmm0, %xmm3;
vmovdqu %xmm0, 12 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 13 * 16(%rdx), %xmm0, %xmm2;
vmovdqu %xmm0, 13 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 14 * 16(%rdx), %xmm0, %xmm1;
vmovdqu %xmm0, 14 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vpxor 15 * 16(%rdx), %xmm0, %xmm15;
vmovdqu %xmm15, 0 * 16(%rax);
vmovdqu %xmm0, 15 * 16(%rsi);
gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
vmovdqu %xmm0, (%rcx);
/* inpack16_pre: */
vmovq (key_table)(CTX, %r8, 8), %xmm15;
vpshufb .Lpack_bswap, %xmm15, %xmm15;
vpxor 0 * 16(%rax), %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
vpxor %xmm3, %xmm15, %xmm3;
vpxor %xmm4, %xmm15, %xmm4;
vpxor %xmm5, %xmm15, %xmm5;
vpxor %xmm6, %xmm15, %xmm6;
vpxor %xmm7, %xmm15, %xmm7;
vpxor %xmm8, %xmm15, %xmm8;
vpxor %xmm9, %xmm15, %xmm9;
vpxor %xmm10, %xmm15, %xmm10;
vpxor %xmm11, %xmm15, %xmm11;
vpxor %xmm12, %xmm15, %xmm12;
vpxor %xmm13, %xmm15, %xmm13;
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
CALL_NOSPEC r9;
addq $(16 * 16), %rsp;
vpxor 0 * 16(%rsi), %xmm7, %xmm7;
vpxor 1 * 16(%rsi), %xmm6, %xmm6;
vpxor 2 * 16(%rsi), %xmm5, %xmm5;
vpxor 3 * 16(%rsi), %xmm4, %xmm4;
vpxor 4 * 16(%rsi), %xmm3, %xmm3;
vpxor 5 * 16(%rsi), %xmm2, %xmm2;
vpxor 6 * 16(%rsi), %xmm1, %xmm1;
vpxor 7 * 16(%rsi), %xmm0, %xmm0;
vpxor 8 * 16(%rsi), %xmm15, %xmm15;
vpxor 9 * 16(%rsi), %xmm14, %xmm14;
vpxor 10 * 16(%rsi), %xmm13, %xmm13;
vpxor 11 * 16(%rsi), %xmm12, %xmm12;
vpxor 12 * 16(%rsi), %xmm11, %xmm11;
vpxor 13 * 16(%rsi), %xmm10, %xmm10;
vpxor 14 * 16(%rsi), %xmm9, %xmm9;
vpxor 15 * 16(%rsi), %xmm8, %xmm8;
write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
%xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
%xmm8, %rsi);
FRAME_END
ret;
SYM_FUNC_END(camellia_xts_crypt_16way)
SYM_FUNC_START(camellia_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
xorl %r8d, %r8d; /* input whitening key, 0 for enc */
leaq __camellia_enc_blk16, %r9;
jmp camellia_xts_crypt_16way;
SYM_FUNC_END(camellia_xts_enc_16way)
SYM_FUNC_START(camellia_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
cmpl $16, key_length(CTX);
movl $32, %r8d;
movl $24, %eax;
cmovel %eax, %r8d; /* input whitening key, last for dec */
leaq __camellia_dec_blk16, %r9;
jmp camellia_xts_crypt_16way;
SYM_FUNC_END(camellia_xts_dec_16way)
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/frame.h> #include <asm/frame.h>
#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_TABLE_BYTE_LEN 272
...@@ -629,12 +628,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) ...@@ -629,12 +628,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.Lbswap128_mask: .Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For XTS mode */
.Lxts_gf128mul_and_shl1_mask_0:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lxts_gf128mul_and_shl1_mask_1:
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
/* /*
* pre-SubByte transform * pre-SubByte transform
* *
...@@ -1201,203 +1194,3 @@ SYM_FUNC_START(camellia_ctr_32way) ...@@ -1201,203 +1194,3 @@ SYM_FUNC_START(camellia_ctr_32way)
FRAME_END FRAME_END
ret; ret;
SYM_FUNC_END(camellia_ctr_32way) SYM_FUNC_END(camellia_ctr_32way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
vpsrad $31, iv, tmp0; \
vpaddq iv, iv, tmp1; \
vpsllq $2, iv, iv; \
vpshufd $0x13, tmp0, tmp0; \
vpsrad $31, tmp1, tmp1; \
vpand mask2, tmp0, tmp0; \
vpshufd $0x13, tmp1, tmp1; \
vpxor tmp0, iv, iv; \
vpand mask1, tmp1, tmp1; \
vpxor tmp1, iv, iv;
.align 8
SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
* %rdx: src (32 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
* %r8: index for input whitening key
* %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32
*/
FRAME_BEGIN
vzeroupper;
subq $(16 * 32), %rsp;
movq %rsp, %rax;
vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12;
/* load IV and construct second IV */
vmovdqu (%rcx), %xmm0;
vmovdqa %xmm0, %xmm15;
gf128mul_x_ble(%xmm0, %xmm12, %xmm13);
vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13;
vinserti128 $1, %xmm0, %ymm15, %ymm0;
vpxor 0 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 15 * 32(%rax);
vmovdqu %ymm0, 0 * 32(%rsi);
/* construct IVs */
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 1 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 14 * 32(%rax);
vmovdqu %ymm0, 1 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 2 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 13 * 32(%rax);
vmovdqu %ymm0, 2 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 3 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 12 * 32(%rax);
vmovdqu %ymm0, 3 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 4 * 32(%rdx), %ymm0, %ymm11;
vmovdqu %ymm0, 4 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 5 * 32(%rdx), %ymm0, %ymm10;
vmovdqu %ymm0, 5 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 6 * 32(%rdx), %ymm0, %ymm9;
vmovdqu %ymm0, 6 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 7 * 32(%rdx), %ymm0, %ymm8;
vmovdqu %ymm0, 7 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 8 * 32(%rdx), %ymm0, %ymm7;
vmovdqu %ymm0, 8 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 9 * 32(%rdx), %ymm0, %ymm6;
vmovdqu %ymm0, 9 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 10 * 32(%rdx), %ymm0, %ymm5;
vmovdqu %ymm0, 10 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 11 * 32(%rdx), %ymm0, %ymm4;
vmovdqu %ymm0, 11 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 12 * 32(%rdx), %ymm0, %ymm3;
vmovdqu %ymm0, 12 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 13 * 32(%rdx), %ymm0, %ymm2;
vmovdqu %ymm0, 13 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 14 * 32(%rdx), %ymm0, %ymm1;
vmovdqu %ymm0, 14 * 32(%rsi);
gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
vpxor 15 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 0 * 32(%rax);
vmovdqu %ymm0, 15 * 32(%rsi);
vextracti128 $1, %ymm0, %xmm0;
gf128mul_x_ble(%xmm0, %xmm12, %xmm15);
vmovdqu %xmm0, (%rcx);
/* inpack32_pre: */
vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
vpshufb .Lpack_bswap, %ymm15, %ymm15;
vpxor 0 * 32(%rax), %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
vpxor %ymm3, %ymm15, %ymm3;
vpxor %ymm4, %ymm15, %ymm4;
vpxor %ymm5, %ymm15, %ymm5;
vpxor %ymm6, %ymm15, %ymm6;
vpxor %ymm7, %ymm15, %ymm7;
vpxor %ymm8, %ymm15, %ymm8;
vpxor %ymm9, %ymm15, %ymm9;
vpxor %ymm10, %ymm15, %ymm10;
vpxor %ymm11, %ymm15, %ymm11;
vpxor 12 * 32(%rax), %ymm15, %ymm12;
vpxor 13 * 32(%rax), %ymm15, %ymm13;
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
CALL_NOSPEC r9;
addq $(16 * 32), %rsp;
vpxor 0 * 32(%rsi), %ymm7, %ymm7;
vpxor 1 * 32(%rsi), %ymm6, %ymm6;
vpxor 2 * 32(%rsi), %ymm5, %ymm5;
vpxor 3 * 32(%rsi), %ymm4, %ymm4;
vpxor 4 * 32(%rsi), %ymm3, %ymm3;
vpxor 5 * 32(%rsi), %ymm2, %ymm2;
vpxor 6 * 32(%rsi), %ymm1, %ymm1;
vpxor 7 * 32(%rsi), %ymm0, %ymm0;
vpxor 8 * 32(%rsi), %ymm15, %ymm15;
vpxor 9 * 32(%rsi), %ymm14, %ymm14;
vpxor 10 * 32(%rsi), %ymm13, %ymm13;
vpxor 11 * 32(%rsi), %ymm12, %ymm12;
vpxor 12 * 32(%rsi), %ymm11, %ymm11;
vpxor 13 * 32(%rsi), %ymm10, %ymm10;
vpxor 14 * 32(%rsi), %ymm9, %ymm9;
vpxor 15 * 32(%rsi), %ymm8, %ymm8;
write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
%ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
%ymm8, %rsi);
vzeroupper;
FRAME_END
ret;
SYM_FUNC_END(camellia_xts_crypt_32way)
SYM_FUNC_START(camellia_xts_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
* %rdx: src (32 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
xorl %r8d, %r8d; /* input whitening key, 0 for enc */
leaq __camellia_enc_blk32, %r9;
jmp camellia_xts_crypt_32way;
SYM_FUNC_END(camellia_xts_enc_32way)
SYM_FUNC_START(camellia_xts_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
* %rdx: src (32 blocks)
* %rcx: iv (t αⁿ GF(2¹²⁸))
*/
cmpl $16, key_length(CTX);
movl $32, %r8d;
movl $24, %eax;
cmovel %eax, %r8d; /* input whitening key, last for dec */
leaq __camellia_dec_blk32, %r9;
jmp camellia_xts_crypt_32way;
SYM_FUNC_END(camellia_xts_dec_32way)
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include <asm/crypto/glue_helper.h> #include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h> #include <crypto/algapi.h>
#include <crypto/internal/simd.h> #include <crypto/internal/simd.h>
#include <crypto/xts.h>
#include <linux/crypto.h> #include <linux/crypto.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -26,11 +25,6 @@ asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); ...@@ -26,11 +25,6 @@ asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv); le128 *iv);
asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
static const struct common_glue_ctx camellia_enc = { static const struct common_glue_ctx camellia_enc = {
.num_funcs = 4, .num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
...@@ -69,22 +63,6 @@ static const struct common_glue_ctx camellia_ctr = { ...@@ -69,22 +63,6 @@ static const struct common_glue_ctx camellia_ctr = {
} } } }
}; };
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_enc_32way }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_enc_16way }
}, {
.num_blocks = 1,
.fn_u = { .xts = camellia_xts_enc }
} }
};
static const struct common_glue_ctx camellia_dec = { static const struct common_glue_ctx camellia_dec = {
.num_funcs = 4, .num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
...@@ -123,22 +101,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { ...@@ -123,22 +101,6 @@ static const struct common_glue_ctx camellia_dec_cbc = {
} } } }
}; };
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_dec_32way }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_dec_16way }
}, {
.num_blocks = 1,
.fn_u = { .xts = camellia_xts_dec }
} }
};
static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen) unsigned int keylen)
{ {
...@@ -170,24 +132,6 @@ static int ctr_crypt(struct skcipher_request *req) ...@@ -170,24 +132,6 @@ static int ctr_crypt(struct skcipher_request *req)
return glue_ctr_req_128bit(&camellia_ctr, req); return glue_ctr_req_128bit(&camellia_ctr, req);
} }
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
}
static struct skcipher_alg camellia_algs[] = { static struct skcipher_alg camellia_algs[] = {
{ {
.base.cra_name = "__ecb(camellia)", .base.cra_name = "__ecb(camellia)",
...@@ -231,20 +175,6 @@ static struct skcipher_alg camellia_algs[] = { ...@@ -231,20 +175,6 @@ static struct skcipher_alg camellia_algs[] = {
.setkey = camellia_setkey, .setkey = camellia_setkey,
.encrypt = ctr_crypt, .encrypt = ctr_crypt,
.decrypt = ctr_crypt, .decrypt = ctr_crypt,
}, {
.base.cra_name = "__xts(camellia)",
.base.cra_driver_name = "__xts-camellia-aesni-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
.max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
}, },
}; };
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include <asm/crypto/glue_helper.h> #include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h> #include <crypto/algapi.h>
#include <crypto/internal/simd.h> #include <crypto/internal/simd.h>
#include <crypto/xts.h>
#include <linux/crypto.h> #include <linux/crypto.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -31,26 +30,6 @@ asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, ...@@ -31,26 +30,6 @@ asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv); le128 *iv);
EXPORT_SYMBOL_GPL(camellia_ctr_16way); EXPORT_SYMBOL_GPL(camellia_ctr_16way);
asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
}
EXPORT_SYMBOL_GPL(camellia_xts_enc);
void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
}
EXPORT_SYMBOL_GPL(camellia_xts_dec);
static const struct common_glue_ctx camellia_enc = { static const struct common_glue_ctx camellia_enc = {
.num_funcs = 3, .num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
...@@ -83,19 +62,6 @@ static const struct common_glue_ctx camellia_ctr = { ...@@ -83,19 +62,6 @@ static const struct common_glue_ctx camellia_ctr = {
} } } }
}; };
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_enc_16way }
}, {
.num_blocks = 1,
.fn_u = { .xts = camellia_xts_enc }
} }
};
static const struct common_glue_ctx camellia_dec = { static const struct common_glue_ctx camellia_dec = {
.num_funcs = 3, .num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
...@@ -128,19 +94,6 @@ static const struct common_glue_ctx camellia_dec_cbc = { ...@@ -128,19 +94,6 @@ static const struct common_glue_ctx camellia_dec_cbc = {
} } } }
}; };
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = camellia_xts_dec_16way }
}, {
.num_blocks = 1,
.fn_u = { .xts = camellia_xts_dec }
} }
};
static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen) unsigned int keylen)
{ {
...@@ -172,44 +125,6 @@ static int ctr_crypt(struct skcipher_request *req) ...@@ -172,44 +125,6 @@ static int ctr_crypt(struct skcipher_request *req)
return glue_ctr_req_128bit(&camellia_ctr, req); return glue_ctr_req_128bit(&camellia_ctr, req);
} }
int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
/* first half of xts-key is for crypt */
err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2);
if (err)
return err;
/* second half of xts-key is for tweak */
return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
}
EXPORT_SYMBOL_GPL(xts_camellia_setkey);
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
}
static struct skcipher_alg camellia_algs[] = { static struct skcipher_alg camellia_algs[] = {
{ {
.base.cra_name = "__ecb(camellia)", .base.cra_name = "__ecb(camellia)",
...@@ -253,21 +168,7 @@ static struct skcipher_alg camellia_algs[] = { ...@@ -253,21 +168,7 @@ static struct skcipher_alg camellia_algs[] = {
.setkey = camellia_setkey, .setkey = camellia_setkey,
.encrypt = ctr_crypt, .encrypt = ctr_crypt,
.decrypt = ctr_crypt, .decrypt = ctr_crypt,
}, { }
.base.cra_name = "__xts(camellia)",
.base.cra_driver_name = "__xts-camellia-aesni",
.base.cra_priority = 400,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
.max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
}; };
static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
......
...@@ -19,18 +19,10 @@ struct camellia_ctx { ...@@ -19,18 +19,10 @@ struct camellia_ctx {
u32 key_length; u32 key_length;
}; };
struct camellia_xts_ctx {
struct camellia_ctx tweak_ctx;
struct camellia_ctx crypt_ctx;
};
extern int __camellia_setkey(struct camellia_ctx *cctx, extern int __camellia_setkey(struct camellia_ctx *cctx,
const unsigned char *key, const unsigned char *key,
unsigned int key_len); unsigned int key_len);
extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen);
/* regular block cipher functions */ /* regular block cipher functions */
asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
bool xor); bool xor);
...@@ -49,11 +41,6 @@ asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); ...@@ -49,11 +41,6 @@ asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv); le128 *iv);
asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
{ {
__camellia_enc_blk(ctx, dst, src, false); __camellia_enc_blk(ctx, dst, src, false);
...@@ -83,9 +70,4 @@ extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, ...@@ -83,9 +70,4 @@ extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv); le128 *iv);
extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
#endif /* ASM_X86_CAMELLIA_H */ #endif /* ASM_X86_CAMELLIA_H */
...@@ -1305,7 +1305,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 ...@@ -1305,7 +1305,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
select CRYPTO_CAMELLIA_X86_64 select CRYPTO_CAMELLIA_X86_64
select CRYPTO_GLUE_HELPER_X86 select CRYPTO_GLUE_HELPER_X86
select CRYPTO_SIMD select CRYPTO_SIMD
select CRYPTO_XTS imply CRYPTO_XTS
help help
Camellia cipher algorithm module (x86_64/AES-NI/AVX). Camellia cipher algorithm module (x86_64/AES-NI/AVX).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment