Commit e0abaeba authored by Andrew Morton's avatar Andrew Morton Committed by David S. Miller

[CRYPTO]: reduce sha512_transform() stack usage, speedup

Patch moves large temporary u64 W[80] from stack to ctx struct:

* reduces stack usage by 640 bytes
* saves one 640-byte memset() per sha512_transform()
  (we still do it after *all* iterations are done)
* quite unexpectedly saves 1.6k of code on i386
  because stack offsets now fit into 8bits
  and many stack addressing insns got 3 bytes smaller:

# size sha512.o.org sha512.o
text       data     bss     dec     hex filename
8281        372       0    8653    21cd sha512.o.org
6649        372       0    7021    1b6d sha512.o

# objdump -d sha512.o.org | cut -b9- >sha512.d.org
# objdump -d sha512.o | cut -b9- >sha512.d
# diff -u sha512.d.org sha512.d
[snip]
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bc30e521
...@@ -30,6 +30,7 @@ struct sha512_ctx { ...@@ -30,6 +30,7 @@ struct sha512_ctx {
u64 state[8]; u64 state[8];
u32 count[4]; u32 count[4];
u8 buf[128]; u8 buf[128];
u64 W[80];
}; };
static inline u64 Ch(u64 x, u64 y, u64 z) static inline u64 Ch(u64 x, u64 y, u64 z)
...@@ -113,10 +114,9 @@ static inline void BLEND_OP(int I, u64 *W) ...@@ -113,10 +114,9 @@ static inline void BLEND_OP(int I, u64 *W)
} }
static void static void
sha512_transform(u64 *state, const u8 *input) sha512_transform(u64 *state, u64 *W, const u8 *input)
{ {
u64 a, b, c, d, e, f, g, h, t1, t2; u64 a, b, c, d, e, f, g, h, t1, t2;
u64 W[80];
int i; int i;
...@@ -157,7 +157,6 @@ sha512_transform(u64 *state, const u8 *input) ...@@ -157,7 +157,6 @@ sha512_transform(u64 *state, const u8 *input)
/* erase our data */ /* erase our data */
a = b = c = d = e = f = g = h = t1 = t2 = 0; a = b = c = d = e = f = g = h = t1 = t2 = 0;
memset(W, 0, 80 * sizeof(u64));
} }
static void static void
...@@ -215,10 +214,10 @@ sha512_update(void *ctx, const u8 *data, unsigned int len) ...@@ -215,10 +214,10 @@ sha512_update(void *ctx, const u8 *data, unsigned int len)
/* Transform as many times as possible. */ /* Transform as many times as possible. */
if (len >= part_len) { if (len >= part_len) {
memcpy(&sctx->buf[index], data, part_len); memcpy(&sctx->buf[index], data, part_len);
sha512_transform(sctx->state, sctx->buf); sha512_transform(sctx->state, sctx->W, sctx->buf);
for (i = part_len; i + 127 < len; i+=128) for (i = part_len; i + 127 < len; i+=128)
sha512_transform(sctx->state, &data[i]); sha512_transform(sctx->state, sctx->W, &data[i]);
index = 0; index = 0;
} else { } else {
...@@ -227,6 +226,9 @@ sha512_update(void *ctx, const u8 *data, unsigned int len) ...@@ -227,6 +226,9 @@ sha512_update(void *ctx, const u8 *data, unsigned int len)
/* Buffer remaining input */ /* Buffer remaining input */
memcpy(&sctx->buf[index], &data[i], len - i); memcpy(&sctx->buf[index], &data[i], len - i);
/* erase our data */
memset(sctx->W, 0, sizeof(sctx->W));
} }
static void static void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment