Commit 8861249c authored by megha.dey@linux.intel.com's avatar megha.dey@linux.intel.com Committed by Herbert Xu

crypto: x86/sha1 - Fix reads beyond the number of blocks passed

It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
reading ahead beyond its intended data, and causing a crash if the next
block is beyond page boundary:
http://marc.info/?l=linux-crypto-vger&m=149373371023377

This patch makes sure that there is no overflow for any buffer length.

It passes the tests written by Jan Stancek that revealed this problem:
https://github.com/jstancek/sha1-avx2-crash

I have re-enabled sha1-avx2 by reverting commit
b82ce244

Cc: <stable@vger.kernel.org>
Fixes: b82ce244 ("crypto: sha1-ssse3 - Disable avx2")
Originally-by: default avatarIlya Albrekht <ilya.albrekht@intel.com>
Tested-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarMegha Dey <megha.dey@linux.intel.com>
Reported-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 28389575
...@@ -117,11 +117,10 @@ ...@@ -117,11 +117,10 @@
.set T1, REG_T1 .set T1, REG_T1
.endm .endm
#define K_BASE %r8
#define HASH_PTR %r9 #define HASH_PTR %r9
#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10 #define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13 #define BUFFER_PTR2 %r13
#define BUFFER_END %r11
#define PRECALC_BUF %r14 #define PRECALC_BUF %r14
#define WK_BUF %r15 #define WK_BUF %r15
...@@ -205,14 +204,14 @@ ...@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling * blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds * 1 vector iteration per 8 rounds
*/ */
vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1) .elseif ((i & 7) == 1)
vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP WY_TMP, WY_TMP
.elseif ((i & 7) == 2) .elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4) .elseif ((i & 7) == 4)
vpaddd K_XMM(K_BASE), WY, WY_TMP vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7) .elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7) vmovdqu WY_TMP, PRECALC_WK(i&~7)
...@@ -255,7 +254,7 @@ ...@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7) .elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY vpxor WY_TMP2, WY_TMP, WY
vpaddd K_XMM(K_BASE), WY, WY_TMP vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7) vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY PRECALC_ROTATE_WY
...@@ -291,7 +290,7 @@ ...@@ -291,7 +290,7 @@
vpsrld $30, WY, WY vpsrld $30, WY, WY
vpor WY, WY_TMP, WY vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7) .elseif ((i & 7) == 7)
vpaddd K_XMM(K_BASE), WY, WY_TMP vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7) vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY PRECALC_ROTATE_WY
...@@ -446,6 +445,16 @@ ...@@ -446,6 +445,16 @@
.endm .endm
/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
* %1 + %2 >= %3 ? %4 : 0
*/
.macro ADD_IF_GE a, b, c, d
mov \a, RTA
add $\d, RTA
cmp $\c, \b
cmovge RTA, \a
.endm
/* /*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/ */
...@@ -463,13 +472,16 @@ ...@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF lea (2*4*80+32)(%rsp), WK_BUF
# Precalc WK for first 2 blocks # Precalc WK for first 2 blocks
PRECALC_OFFSET = 0 ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0 .set i, 0
.rept 160 .rept 160
PRECALC i PRECALC i
.set i, i + 1 .set i, i + 1
.endr .endr
PRECALC_OFFSET = 128
/* Go to next block if needed */
ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF xchg WK_BUF, PRECALC_BUF
.align 32 .align 32
...@@ -479,8 +491,8 @@ _loop: ...@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block, * we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE * it is set below by: cmovae BUFFER_PTR, K_BASE
*/ */
cmp K_BASE, BUFFER_PTR test BLOCKS_CTR, BLOCKS_CTR
jne _begin jnz _begin
.align 32 .align 32
jmp _end jmp _end
.align 32 .align 32
...@@ -512,10 +524,10 @@ _loop0: ...@@ -512,10 +524,10 @@ _loop0:
.set j, j+2 .set j, j+2
.endr .endr
add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ /* Update Counter */
cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ sub $1, BLOCKS_CTR
cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ /* Move to the next block only if needed*/
ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/* /*
* rounds * rounds
* 60,62,64,66,68 * 60,62,64,66,68
...@@ -532,8 +544,8 @@ _loop0: ...@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E UPDATE_HASH 16(HASH_PTR), E
cmp K_BASE, BUFFER_PTR /* is current block the last one? */ test BLOCKS_CTR, BLOCKS_CTR
je _loop jz _loop
mov TB, B mov TB, B
...@@ -575,10 +587,10 @@ _loop2: ...@@ -575,10 +587,10 @@ _loop2:
.set j, j+2 .set j, j+2
.endr .endr
add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ /* update counter */
sub $1, BLOCKS_CTR
cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ /* Move to the next block only if needed*/
cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
jmp _loop3 jmp _loop3
_loop3: _loop3:
...@@ -641,19 +653,12 @@ _loop3: ...@@ -641,19 +653,12 @@ _loop3:
avx2_zeroupper avx2_zeroupper
lea K_XMM_AR(%rip), K_BASE /* Setup initial values */
mov CTX, HASH_PTR mov CTX, HASH_PTR
mov BUF, BUFFER_PTR mov BUF, BUFFER_PTR
lea 64(BUF), BUFFER_PTR2
shl $6, CNT /* mul by 64 */
add BUF, CNT
add $64, CNT
mov CNT, BUFFER_END
cmp BUFFER_END, BUFFER_PTR2 mov BUF, BUFFER_PTR2
cmovae K_BASE, BUFFER_PTR2 mov CNT, BLOCKS_CTR
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
......
...@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, ...@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
static bool avx2_usable(void) static bool avx2_usable(void)
{ {
if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
&& boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI1)
&& boot_cpu_has(X86_FEATURE_BMI2)) && boot_cpu_has(X86_FEATURE_BMI2))
return true; return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment