Commit d3ce3b18 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fix from Herbert Xu:
 "Fix a bug in the implementation of the x86 accelerated version of
  poly1305"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: x86/poly1305 - fix overflow during partial reduction
parents 95ea5529 678cce40
...@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2) ...@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
vpaddq t2,t1,t1 vpaddq t2,t1,t1
vmovq t1x,d4 vmovq t1x,d4
# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
# amount. Careful: we must not assume the carry bits 'd0 >> 26',
# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
# integers. It's true in a single-block implementation, but not here.
# d1 += d0 >> 26 # d1 += d0 >> 26
mov d0,%rax mov d0,%rax
shr $26,%rax shr $26,%rax
...@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2) ...@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
# h0 += (d4 >> 26) * 5 # h0 += (d4 >> 26) * 5
mov d4,%rax mov d4,%rax
shr $26,%rax shr $26,%rax
lea (%eax,%eax,4),%eax lea (%rax,%rax,4),%rax
add %eax,%ebx add %rax,%rbx
# h4 = d4 & 0x3ffffff # h4 = d4 & 0x3ffffff
mov d4,%rax mov d4,%rax
and $0x3ffffff,%eax and $0x3ffffff,%eax
mov %eax,h4 mov %eax,h4
# h1 += h0 >> 26 # h1 += h0 >> 26
mov %ebx,%eax mov %rbx,%rax
shr $26,%eax shr $26,%rax
add %eax,h1 add %eax,h1
# h0 = h0 & 0x3ffffff # h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx andl $0x3ffffff,%ebx
......
...@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2) ...@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
# h0 += (d4 >> 26) * 5 # h0 += (d4 >> 26) * 5
mov d4,%rax mov d4,%rax
shr $26,%rax shr $26,%rax
lea (%eax,%eax,4),%eax lea (%rax,%rax,4),%rax
add %eax,%ebx add %rax,%rbx
# h4 = d4 & 0x3ffffff # h4 = d4 & 0x3ffffff
mov d4,%rax mov d4,%rax
and $0x3ffffff,%eax and $0x3ffffff,%eax
mov %eax,h4 mov %eax,h4
# h1 += h0 >> 26 # h1 += h0 >> 26
mov %ebx,%eax mov %rbx,%rax
shr $26,%eax shr $26,%rax
add %eax,h1 add %eax,h1
# h0 = h0 & 0x3ffffff # h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx andl $0x3ffffff,%ebx
...@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2) ...@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
paddq t2,t1 paddq t2,t1
movq t1,d4 movq t1,d4
# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
# amount. Careful: we must not assume the carry bits 'd0 >> 26',
# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
# integers. It's true in a single-block implementation, but not here.
# d1 += d0 >> 26 # d1 += d0 >> 26
mov d0,%rax mov d0,%rax
shr $26,%rax shr $26,%rax
...@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2) ...@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
# h0 += (d4 >> 26) * 5 # h0 += (d4 >> 26) * 5
mov d4,%rax mov d4,%rax
shr $26,%rax shr $26,%rax
lea (%eax,%eax,4),%eax lea (%rax,%rax,4),%rax
add %eax,%ebx add %rax,%rbx
# h4 = d4 & 0x3ffffff # h4 = d4 & 0x3ffffff
mov d4,%rax mov d4,%rax
and $0x3ffffff,%eax and $0x3ffffff,%eax
mov %eax,h4 mov %eax,h4
# h1 += h0 >> 26 # h1 += h0 >> 26
mov %ebx,%eax mov %rbx,%rax
shr $26,%eax shr $26,%rax
add %eax,h1 add %eax,h1
# h0 = h0 & 0x3ffffff # h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx andl $0x3ffffff,%ebx
......
...@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = { ...@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = {
.psize = 80, .psize = 80,
.digest = "\x13\x00\x00\x00\x00\x00\x00\x00" .digest = "\x13\x00\x00\x00\x00\x00\x00\x00"
"\x00\x00\x00\x00\x00\x00\x00\x00", "\x00\x00\x00\x00\x00\x00\x00\x00",
}, }, { /* Regression test for overflow in AVX2 implementation */
.plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff",
.psize = 300,
.digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
"\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
}
}; };
/* NHPoly1305 test vectors from https://github.com/google/adiantum */ /* NHPoly1305 test vectors from https://github.com/google/adiantum */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment