Commit 60468255 authored by Jussi Kivilinna's avatar Jussi Kivilinna Committed by Russell King

ARM: 8119/1: crypto: sha1: add ARM NEON implementation

This patch adds ARM NEON assembly implementation of SHA-1 algorithm.

tcrypt benchmark results on Cortex-A8, sha1-arm-asm vs sha1-neon-asm:

block-size      bytes/update    old-vs-new
16              16              1.04x
64              16              1.02x
64              64              1.05x
256             16              1.03x
256             64              1.04x
256             256             1.30x
1024            16              1.03x
1024            256             1.36x
1024            1024            1.52x
2048            16              1.03x
2048            256             1.39x
2048            1024            1.55x
2048            2048            1.59x
4096            16              1.03x
4096            256             1.40x
4096            1024            1.57x
4096            4096            1.62x
8192            16              1.03x
8192            256             1.40x
8192            1024            1.58x
8192            4096            1.63x
8192            8192            1.63x
Acked-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarJussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: default avatarRussell King <rmk+kernel@arm.linux.org.uk>
parent 1f8673d3
...@@ -5,10 +5,12 @@ ...@@ -5,10 +5,12 @@
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
aes-arm-y := aes-armv4.o aes_glue.o aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
quiet_cmd_perl = PERL $@ quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@) cmd_perl = $(PERL) $(<) > $(@)
......
This diff is collapsed.
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <crypto/sha.h> #include <crypto/sha.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <asm/crypto/sha1.h>
asmlinkage void sha1_block_data_order(u32 *digest, asmlinkage void sha1_block_data_order(u32 *digest,
...@@ -65,7 +66,7 @@ static int __sha1_update(struct sha1_state *sctx, const u8 *data, ...@@ -65,7 +66,7 @@ static int __sha1_update(struct sha1_state *sctx, const u8 *data,
} }
static int sha1_update(struct shash_desc *desc, const u8 *data, int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len) unsigned int len)
{ {
struct sha1_state *sctx = shash_desc_ctx(desc); struct sha1_state *sctx = shash_desc_ctx(desc);
...@@ -81,6 +82,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, ...@@ -81,6 +82,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
res = __sha1_update(sctx, data, len, partial); res = __sha1_update(sctx, data, len, partial);
return res; return res;
} }
EXPORT_SYMBOL_GPL(sha1_update_arm);
/* Add padding and return the message digest. */ /* Add padding and return the message digest. */
...@@ -135,7 +137,7 @@ static int sha1_import(struct shash_desc *desc, const void *in) ...@@ -135,7 +137,7 @@ static int sha1_import(struct shash_desc *desc, const void *in)
static struct shash_alg alg = { static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE, .digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init, .init = sha1_init,
.update = sha1_update, .update = sha1_update_arm,
.final = sha1_final, .final = sha1_final,
.export = sha1_export, .export = sha1_export,
.import = sha1_import, .import = sha1_import,
......
/*
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
* ARM NEON instructions.
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is based on sha1_generic.c and sha1_ssse3_glue.c:
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/crypto/sha1.h>
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
static int sha1_neon_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_neon(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_neon(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = sha1_update_arm(desc, data, len);
} else {
kernel_neon_begin();
res = __sha1_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!may_use_simd()) {
sha1_update_arm(desc, padding, padlen);
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha1_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_neon_update(desc, padding, padlen, index);
}
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_neon_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_neon_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_neon_init,
.update = sha1_neon_update,
.final = sha1_neon_final,
.export = sha1_neon_export,
.import = sha1_neon_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_neon_mod_init(void)
{
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit sha1_neon_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_neon_mod_init);
module_exit(sha1_neon_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
MODULE_ALIAS("sha1");
#ifndef ASM_ARM_CRYPTO_SHA1_H
#define ASM_ARM_CRYPTO_SHA1_H
#include <linux/crypto.h>
#include <crypto/sha.h>
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
#endif
...@@ -540,6 +540,17 @@ config CRYPTO_SHA1_ARM ...@@ -540,6 +540,17 @@ config CRYPTO_SHA1_ARM
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM assembler. using optimized ARM assembler.
config CRYPTO_SHA1_ARM_NEON
tristate "SHA1 digest algorithm (ARM NEON)"
depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
select CRYPTO_SHA1_ARM
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
using optimized ARM NEON assembly, when NEON instructions are
available.
config CRYPTO_SHA1_PPC config CRYPTO_SHA1_PPC
tristate "SHA1 digest algorithm (powerpc)" tristate "SHA1 digest algorithm (powerpc)"
depends on PPC depends on PPC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment