Commit ec8c02d9 authored by Dave Watson's avatar Dave Watson Committed by Herbert Xu

crypto: aesni - Introduce READ_PARTIAL_BLOCK macro

Introduce READ_PARTIAL_BLOCK macro, and use it in the two existing
partial block cases: AAD and the end of ENC_DEC.   In particular,
the ENC_DEC case should be faster, since we read by 8/4 bytes if
possible.

This macro will also be used to read partial blocks between
enc_update and dec_update calls.
Signed-off-by: default avatarDave Watson <davejwatson@fb.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 517a448e
......@@ -415,15 +415,13 @@ _zero_cipher_left\@:
vmovdqu %xmm14, AadHash(arg2)
vmovdqu %xmm9, CurCount(arg2)
cmp $16, arg5
jl _only_less_than_16\@
# check for 0 length
mov arg5, %r13
and $15, %r13 # r13 = (arg5 mod 16)
je _multiple_of_16_bytes\@
# handle the last <16 Byte block seperately
# handle the last <16 Byte block separately
mov %r13, PBlockLen(arg2)
......@@ -434,49 +432,39 @@ _zero_cipher_left\@:
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
vmovdqu %xmm9, PBlockEncKey(arg2)
sub $16, %r11
add %r13, %r11
vmovdqu (arg4, %r11), %xmm1 # receive the last <16 Byte block
cmp $16, arg5
jge _large_enough_update\@
lea (arg4,%r11,1), %r10
mov %r13, %r12
READ_PARTIAL_BLOCK %r10 %r12 %xmm1
lea SHIFT_MASK+16(%rip), %r12
sub %r13, %r12 # adjust the shuffle mask pointer to be
# able to shift 16-r13 bytes (r13 is the
# number of bytes in plaintext mod 16)
vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
jmp _final_ghash_mul\@
_only_less_than_16\@:
# check for 0 length
mov arg5, %r13
and $15, %r13 # r13 = (arg5 mod 16)
je _multiple_of_16_bytes\@
# handle the last <16 Byte block separately
jmp _final_ghash_mul\@
_large_enough_update\@:
sub $16, %r11
add %r13, %r11
vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
# receive the last <16 Byte block
vmovdqu (arg4, %r11, 1), %xmm1
vmovdqu %xmm9, PBlockEncKey(arg2)
sub %r13, %r11
add $16, %r11
lea SHIFT_MASK+16(%rip), %r12
sub %r13, %r12 # adjust the shuffle mask pointer to be
# able to shift 16-r13 bytes (r13 is the
# number of bytes in plaintext mod 16)
_get_last_16_byte_loop\@:
movb (arg4, %r11), %al
movb %al, TMP1 (%rsp , %r11)
add $1, %r11
cmp %r13, %r11
jne _get_last_16_byte_loop\@
vmovdqu TMP1(%rsp), %xmm1
sub $16, %r11
# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
# (r13 is the number of bytes in plaintext mod 16)
sub %r13, %r12
# get the appropriate shuffle mask
vmovdqu (%r12), %xmm2
# shift right 16-r13 bytes
vpshufb %xmm2, %xmm1, %xmm1
_final_ghash_mul\@:
.if \ENC_DEC == DEC
......@@ -490,8 +478,6 @@ _final_ghash_mul\@:
vpxor %xmm2, %xmm14, %xmm14
vmovdqu %xmm14, AadHash(arg2)
sub %r13, %r11
add $16, %r11
.else
vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
......@@ -501,8 +487,6 @@ _final_ghash_mul\@:
vpxor %xmm9, %xmm14, %xmm14
vmovdqu %xmm14, AadHash(arg2)
sub %r13, %r11
add $16, %r11
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
.endif
......@@ -721,6 +705,38 @@ _get_AAD_done\@:
\PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
.endm
# Reads DLEN bytes starting at DPTR and stores in XMMDst
# where 0 < DLEN < 16
# Clobbers %rax, DLEN
.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
vpxor \XMMDst, \XMMDst, \XMMDst
cmp $8, \DLEN
jl _read_lt8_\@
mov (\DPTR), %rax
vpinsrq $0, %rax, \XMMDst, \XMMDst
sub $8, \DLEN
jz _done_read_partial_block_\@
xor %eax, %eax
_read_next_byte_\@:
shl $8, %rax
mov 7(\DPTR, \DLEN, 1), %al
dec \DLEN
jnz _read_next_byte_\@
vpinsrq $1, %rax, \XMMDst, \XMMDst
jmp _done_read_partial_block_\@
_read_lt8_\@:
xor %eax, %eax
_read_next_byte_lt8_\@:
shl $8, %rax
mov -1(\DPTR, \DLEN, 1), %al
dec \DLEN
jnz _read_next_byte_lt8_\@
vpinsrq $0, %rax, \XMMDst, \XMMDst
_done_read_partial_block_\@:
.endm
#ifdef CONFIG_AS_AVX
###############################################################################
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment