Commit ea9459ef authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu

crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()

Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0.  This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 1d27e1f5
...@@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc) ...@@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
.previous .previous
/* /*
* _aesni_gf128mul_x_ble: internal ABI * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
* Multiply in GF(2^128) for XTS IVs
* input: * input:
* IV: current IV * IV: current IV
* GF128MUL_MASK == mask with 0x87 and 0x01 * GF128MUL_MASK == mask with 0x87 and 0x01
* output: * output:
* IV: next IV * IV: next IV
* changed: * changed:
* CTR: == temporary value * KEY: == temporary value
*/ */
#define _aesni_gf128mul_x_ble() \ .macro _aesni_gf128mul_x_ble
pshufd $0x13, IV, KEY; \ pshufd $0x13, IV, KEY
paddq IV, IV; \ paddq IV, IV
psrad $31, KEY; \ psrad $31, KEY
pand GF128MUL_MASK, KEY; \ pand GF128MUL_MASK, KEY
pxor KEY, IV; pxor KEY, IV
.endm
/* .macro _aesni_xts_crypt enc
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
FRAME_BEGIN FRAME_BEGIN
#ifndef __x86_64__ #ifndef __x86_64__
pushl IVP pushl IVP
...@@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc) ...@@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
movups (IVP), IV movups (IVP), IV
mov 480(KEYP), KLEN mov 480(KEYP), KLEN
.if !\enc
add $240, KEYP
test $15, LEN
jz .Lxts_loop4\@
sub $16, LEN
.endif
.Lxts_enc_loop4: .Lxts_loop4\@:
sub $64, LEN sub $64, LEN
jl .Lxts_enc_1x jl .Lxts_1x\@
movdqa IV, STATE1 movdqa IV, STATE1
movdqu 0x00(INP), IN movdqu 0x00(INP), IN
pxor IN, STATE1 pxor IN, STATE1
movdqu IV, 0x00(OUTP) movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble() _aesni_gf128mul_x_ble
movdqa IV, STATE2 movdqa IV, STATE2
movdqu 0x10(INP), IN movdqu 0x10(INP), IN
pxor IN, STATE2 pxor IN, STATE2
movdqu IV, 0x10(OUTP) movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble() _aesni_gf128mul_x_ble
movdqa IV, STATE3 movdqa IV, STATE3
movdqu 0x20(INP), IN movdqu 0x20(INP), IN
pxor IN, STATE3 pxor IN, STATE3
movdqu IV, 0x20(OUTP) movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble() _aesni_gf128mul_x_ble
movdqa IV, STATE4 movdqa IV, STATE4
movdqu 0x30(INP), IN movdqu 0x30(INP), IN
pxor IN, STATE4 pxor IN, STATE4
movdqu IV, 0x30(OUTP) movdqu IV, 0x30(OUTP)
.if \enc
call _aesni_enc4 call _aesni_enc4
.else
call _aesni_dec4
.endif
movdqu 0x00(OUTP), IN movdqu 0x00(OUTP), IN
pxor IN, STATE1 pxor IN, STATE1
...@@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc) ...@@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
pxor IN, STATE4 pxor IN, STATE4
movdqu STATE4, 0x30(OUTP) movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble() _aesni_gf128mul_x_ble
add $64, INP add $64, INP
add $64, OUTP add $64, OUTP
test LEN, LEN test LEN, LEN
jnz .Lxts_enc_loop4 jnz .Lxts_loop4\@
.Lxts_enc_ret_iv: .Lxts_ret_iv\@:
movups IV, (IVP) movups IV, (IVP)
.Lxts_enc_ret: .Lxts_ret\@:
#ifndef __x86_64__ #ifndef __x86_64__
popl KLEN popl KLEN
popl KEYP popl KEYP
...@@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc) ...@@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
FRAME_END FRAME_END
RET RET
.Lxts_enc_1x: .Lxts_1x\@:
add $64, LEN add $64, LEN
jz .Lxts_enc_ret_iv jz .Lxts_ret_iv\@
.if \enc
sub $16, LEN sub $16, LEN
jl .Lxts_enc_cts4 jl .Lxts_cts4\@
.endif
.Lxts_enc_loop1: .Lxts_loop1\@:
movdqu (INP), STATE movdqu (INP), STATE
.if \enc
pxor IV, STATE pxor IV, STATE
call _aesni_enc1 call _aesni_enc1
.else
add $16, INP
sub $16, LEN
jl .Lxts_cts1\@
pxor IV, STATE pxor IV, STATE
_aesni_gf128mul_x_ble() call _aesni_dec1
.endif
pxor IV, STATE
_aesni_gf128mul_x_ble
test LEN, LEN test LEN, LEN
jz .Lxts_enc_out jz .Lxts_out\@
.if \enc
add $16, INP add $16, INP
sub $16, LEN sub $16, LEN
jl .Lxts_enc_cts1 jl .Lxts_cts1\@
.endif
movdqu STATE, (OUTP) movdqu STATE, (OUTP)
add $16, OUTP add $16, OUTP
jmp .Lxts_enc_loop1 jmp .Lxts_loop1\@
.Lxts_enc_out: .Lxts_out\@:
movdqu STATE, (OUTP) movdqu STATE, (OUTP)
jmp .Lxts_enc_ret_iv jmp .Lxts_ret_iv\@
.Lxts_enc_cts4: .if \enc
.Lxts_cts4\@:
movdqa STATE4, STATE movdqa STATE4, STATE
sub $16, OUTP sub $16, OUTP
.Lxts_cts1\@:
.else
.Lxts_cts1\@:
movdqa IV, STATE4
_aesni_gf128mul_x_ble
.Lxts_enc_cts1: pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
.endif
#ifndef __x86_64__ #ifndef __x86_64__
lea .Lcts_permute_table, T1 lea .Lcts_permute_table, T1
#else #else
...@@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc) ...@@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc)
pblendvb IN2, IN1 pblendvb IN2, IN1
movaps IN1, STATE movaps IN1, STATE
.if \enc
pxor IV, STATE pxor IV, STATE
call _aesni_enc1 call _aesni_enc1
pxor IV, STATE pxor IV, STATE
.else
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
.endif
movups STATE, (OUTP) movups STATE, (OUTP)
jmp .Lxts_enc_ret jmp .Lxts_ret\@
.endm
/*
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
_aesni_xts_crypt 1
SYM_FUNC_END(aesni_xts_enc) SYM_FUNC_END(aesni_xts_enc)
/* /*
...@@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc) ...@@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc)
* const u8 *src, unsigned int len, le128 *iv) * const u8 *src, unsigned int len, le128 *iv)
*/ */
SYM_FUNC_START(aesni_xts_dec) SYM_FUNC_START(aesni_xts_dec)
FRAME_BEGIN _aesni_xts_crypt 0
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl (FRAME_OFFSET+28)(%esp), INP # src
movl (FRAME_OFFSET+32)(%esp), LEN # len
movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV
mov 480(KEYP), KLEN
add $240, KEYP
test $15, LEN
jz .Lxts_dec_loop4
sub $16, LEN
.Lxts_dec_loop4:
sub $64, LEN
jl .Lxts_dec_1x
movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
call _aesni_dec4
movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
movdqu 0x10(OUTP), IN
pxor IN, STATE2
movdqu STATE2, 0x10(OUTP)
movdqu 0x20(OUTP), IN
pxor IN, STATE3
movdqu STATE3, 0x20(OUTP)
movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
add $64, INP
add $64, OUTP
test LEN, LEN
jnz .Lxts_dec_loop4
.Lxts_dec_ret_iv:
movups IV, (IVP)
.Lxts_dec_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
RET
.Lxts_dec_1x:
add $64, LEN
jz .Lxts_dec_ret_iv
.Lxts_dec_loop1:
movdqu (INP), STATE
add $16, INP
sub $16, LEN
jl .Lxts_dec_cts1
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
_aesni_gf128mul_x_ble()
test LEN, LEN
jz .Lxts_dec_out
movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_dec_loop1
.Lxts_dec_out:
movdqu STATE, (OUTP)
jmp .Lxts_dec_ret_iv
.Lxts_dec_cts1:
movdqa IV, STATE4
_aesni_gf128mul_x_ble()
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
add LEN, INP /* rewind input pointer */
add $16, LEN /* # bytes in final block */
movups (INP), IN1
mov T1, IVP
add $32, IVP
add LEN, T1
sub LEN, IVP
add OUTP, LEN
movups (T1), %xmm4
movaps STATE, IN2
pshufb %xmm4, STATE
movups STATE, (LEN)
movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
movups STATE, (OUTP)
jmp .Lxts_dec_ret
SYM_FUNC_END(aesni_xts_dec) SYM_FUNC_END(aesni_xts_dec)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment