Commit 178e834c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
 "This fixes the following issues:

   - oversize stack frames on mn10300 in sha3-generic

   - warning on old compilers in sha3-generic

   - API error in sun4i_ss_prng

   - potential dead-lock in sun4i_ss_prng

   - null-pointer dereference in sha512-mb

   - endless loop when DECO acquire fails in caam

   - kernel oops when hashing empty message in talitos"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate
  crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate
  crypto: caam - fix endless loop when DECO acquire fails
  crypto: sha3-generic - Use __optimize to support old compilers
  compiler-gcc.h: __nostackprotector needs gcc-4.4 and up
  compiler-gcc.h: Introduce __optimize function attribute
  crypto: sha3-generic - deal with oversize stack frames
  crypto: talitos - fix Kernel Oops on hashing an empty file
  crypto: sha512-mb - initialize pending lengths correctly
parents 7928b2cb 2e7d1d61
...@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) ...@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
{ {
unsigned int j; unsigned int j;
state->lens[0] = 0; /* initially all lanes are unused */
state->lens[1] = 1; state->lens[0] = 0xFFFFFFFF00000000;
state->lens[2] = 2; state->lens[1] = 0xFFFFFFFF00000001;
state->lens[3] = 3; state->lens[2] = 0xFFFFFFFF00000002;
state->lens[3] = 0xFFFFFFFF00000003;
state->unused_lanes = 0xFF03020100; state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++) for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL; state->ldata[j].job_in_lane = NULL;
......
...@@ -20,6 +20,20 @@ ...@@ -20,6 +20,20 @@
#include <crypto/sha3.h> #include <crypto/sha3.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
/*
* On some 32-bit architectures (mn10300 and h8300), GCC ends up using
* over 1 KB of stack if we inline the round calculation into the loop
* in keccakf(). On the other hand, on 64-bit architectures with plenty
* of [64-bit wide] general purpose registers, not inlining it severely
* hurts performance. So let's use 64-bitness as a heuristic to decide
* whether to inline or not.
*/
#ifdef CONFIG_64BIT
#define SHA3_INLINE inline
#else
#define SHA3_INLINE noinline
#endif
#define KECCAK_ROUNDS 24 #define KECCAK_ROUNDS 24
static const u64 keccakf_rndc[24] = { static const u64 keccakf_rndc[24] = {
...@@ -35,12 +49,9 @@ static const u64 keccakf_rndc[24] = { ...@@ -35,12 +49,9 @@ static const u64 keccakf_rndc[24] = {
/* update the state with given number of rounds */ /* update the state with given number of rounds */
static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) static SHA3_INLINE void keccakf_round(u64 st[25])
{ {
u64 t[5], tt, bc[5]; u64 t[5], tt, bc[5];
int round;
for (round = 0; round < KECCAK_ROUNDS; round++) {
/* Theta */ /* Theta */
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
...@@ -139,7 +150,14 @@ static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25]) ...@@ -139,7 +150,14 @@ static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
st[22] ^= bc[ 2]; st[22] ^= bc[ 2];
st[23] ^= bc[ 3]; st[23] ^= bc[ 3];
st[24] ^= bc[ 4]; st[24] ^= bc[ 4];
}
static void __optimize("O3") keccakf(u64 st[25])
{
int round;
for (round = 0; round < KECCAK_ROUNDS; round++) {
keccakf_round(st);
/* Iota */ /* Iota */
st[0] ^= keccakf_rndc[round]; st[0] ^= keccakf_rndc[round];
} }
......
...@@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, ...@@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
* without any error (HW optimizations for later * without any error (HW optimizations for later
* CAAM eras), then try again. * CAAM eras), then try again.
*/ */
if (ret)
break;
rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
!(rdsta_val & (1 << sh_idx))) !(rdsta_val & (1 << sh_idx))) {
ret = -EAGAIN; ret = -EAGAIN;
if (ret)
break; break;
}
dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
/* Clear the contents before recreating the descriptor */ /* Clear the contents before recreating the descriptor */
memset(desc, 0x00, CAAM_CMD_SZ * 7); memset(desc, 0x00, CAAM_CMD_SZ * 7);
......
...@@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, ...@@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng);
ss = algt->ss; ss = algt->ss;
spin_lock(&ss->slock); spin_lock_bh(&ss->slock);
writel(mode, ss->base + SS_CTL); writel(mode, ss->base + SS_CTL);
...@@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, ...@@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
} }
writel(0, ss->base + SS_CTL); writel(0, ss->base + SS_CTL);
spin_unlock(&ss->slock); spin_unlock_bh(&ss->slock);
return dlen; return 0;
} }
...@@ -1138,6 +1138,10 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src, ...@@ -1138,6 +1138,10 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
struct talitos_private *priv = dev_get_drvdata(dev); struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv); bool is_sec1 = has_ftr_sec1(priv);
if (!src) {
to_talitos_ptr(ptr, 0, 0, is_sec1);
return 1;
}
if (sg_count == 1) { if (sg_count == 1) {
to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1); to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
return sg_count; return sg_count;
......
...@@ -167,8 +167,6 @@ ...@@ -167,8 +167,6 @@
#if GCC_VERSION >= 40100 #if GCC_VERSION >= 40100
# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
#endif #endif
#if GCC_VERSION >= 40300 #if GCC_VERSION >= 40300
...@@ -196,6 +194,11 @@ ...@@ -196,6 +194,11 @@
#endif /* __CHECKER__ */ #endif /* __CHECKER__ */
#endif /* GCC_VERSION >= 40300 */ #endif /* GCC_VERSION >= 40300 */
#if GCC_VERSION >= 40400
#define __optimize(level) __attribute__((__optimize__(level)))
#define __nostackprotector __optimize("no-stack-protector")
#endif /* GCC_VERSION >= 40400 */
#if GCC_VERSION >= 40500 #if GCC_VERSION >= 40500
#ifndef __CHECKER__ #ifndef __CHECKER__
......
...@@ -277,6 +277,10 @@ unsigned long read_word_at_a_time(const void *addr) ...@@ -277,6 +277,10 @@ unsigned long read_word_at_a_time(const void *addr)
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#ifndef __optimize
# define __optimize(level)
#endif
/* Compile time object size, -1 for unknown */ /* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size #ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1 # define __compiletime_object_size(obj) -1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment