Commit f2f770d7 authored by Sami Tolvanen's avatar Sami Tolvanen Committed by Herbert Xu

crypto: arm/sha256 - Add optimized SHA-256/224

Add Andy Polyakov's optimized assembly and NEON implementations for
SHA-256/224.

The sha256-armv4.pl script for generating the assembly code is from
OpenSSL commit 51f8d095562f36cdaa6893597b5c609e943b0565.

Compared to sha256-generic these implementations have the following
tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805):

  bs    b/u      sha256-neon  sha256-asm
  16    16       x1.32        x1.19
  64    16       x1.27        x1.15
  64    64       x1.36        x1.20
  256   16       x1.22        x1.11
  256   64       x1.36        x1.19
  256   256      x1.59        x1.23
  1024  16       x1.21        x1.10
  1024  256      x1.65        x1.23
  1024  1024     x1.76        x1.25
  2048  16       x1.21        x1.10
  2048  256      x1.66        x1.23
  2048  1024     x1.78        x1.25
  2048  2048     x1.79        x1.25
  4096  16       x1.20        x1.09
  4096  256      x1.66        x1.23
  4096  1024     x1.79        x1.26
  4096  4096     x1.82        x1.26
  8192  16       x1.20        x1.09
  8192  256      x1.67        x1.23
  8192  1024     x1.80        x1.26
  8192  4096     x1.85        x1.28
  8192  8192     x1.85        x1.27

Where bs refers to block size and b/u to bytes per update.
Signed-off-by: default avatarSami Tolvanen <samitolvanen@google.com>
Cc: Andy Polyakov <appro@openssl.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 87b16756
...@@ -46,6 +46,13 @@ config CRYPTO_SHA2_ARM_CE ...@@ -46,6 +46,13 @@ config CRYPTO_SHA2_ARM_CE
SHA-256 secure hash standard (DFIPS 180-2) implemented SHA-256 secure hash standard (DFIPS 180-2) implemented
using special ARMv8 Crypto Extensions. using special ARMv8 Crypto Extensions.
config CRYPTO_SHA256_ARM
tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
select CRYPTO_HASH
help
SHA-256 secure hash standard (DFIPS 180-2) implemented
using optimized ARM assembler and NEON, when available.
config CRYPTO_SHA512_ARM_NEON config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on KERNEL_MODE_NEON depends on KERNEL_MODE_NEON
......
...@@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o ...@@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
...@@ -16,6 +17,8 @@ aes-arm-y := aes-armv4.o aes_glue.o ...@@ -16,6 +17,8 @@ aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
...@@ -28,4 +31,7 @@ quiet_cmd_perl = PERL $@ ...@@ -28,4 +31,7 @@ quiet_cmd_perl = PERL $@
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl) $(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
...@@ -163,7 +163,7 @@ static struct shash_alg algs[] = { { ...@@ -163,7 +163,7 @@ static struct shash_alg algs[] = { {
.base = { .base = {
.cra_name = "sha224", .cra_name = "sha224",
.cra_driver_name = "sha224-ce", .cra_driver_name = "sha224-ce",
.cra_priority = 200, .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE, .cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE, .cra_module = THIS_MODULE,
...@@ -180,7 +180,7 @@ static struct shash_alg algs[] = { { ...@@ -180,7 +180,7 @@ static struct shash_alg algs[] = { {
.base = { .base = {
.cra_name = "sha256", .cra_name = "sha256",
.cra_driver_name = "sha256-ce", .cra_driver_name = "sha256-ce",
.cra_priority = 200, .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE, .cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE, .cra_module = THIS_MODULE,
......
This diff is collapsed.
This diff is collapsed.
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using optimized ARM assembler and NEON instructions.
*
* Copyright 2015 Google Inc.
*
* This file is based on sha256_ssse3_glue.c:
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
unsigned int num_blks);
int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_block_data_order(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
return 0;
}
return __sha256_update(desc, data, len, partial);
}
/* Add padding and return the message digest. */
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
/* We need to fill a whole block for __sha256_update */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_update(desc, padding, padlen, index);
}
__sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_final(desc, D);
memcpy(out, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
int sha256_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
int sha256_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_init,
.update = sha256_update,
.final = sha256_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_init,
.update = sha256_update,
.final = sha224_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-asm",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha256_mod_init(void)
{
int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
if (res < 0)
return res;
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
res = crypto_register_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
if (res < 0)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
return res;
}
static void __exit sha256_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
crypto_unregister_shashes(sha256_neon_algs,
ARRAY_SIZE(sha256_neon_algs));
}
module_init(sha256_mod_init);
module_exit(sha256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
MODULE_ALIAS_CRYPTO("sha256");
#ifndef _CRYPTO_SHA256_GLUE_H
#define _CRYPTO_SHA256_GLUE_H
#include <linux/crypto.h>
#include <crypto/sha.h>
extern struct shash_alg sha256_neon_algs[2];
extern int sha256_init(struct shash_desc *desc);
extern int sha224_init(struct shash_desc *desc);
extern int __sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial);
extern int sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len);
extern int sha256_export(struct shash_desc *desc, void *out);
extern int sha256_import(struct shash_desc *desc, const void *in);
#endif /* _CRYPTO_SHA256_GLUE_H */
/*
* Glue code for the SHA256 Secure Hash Algorithm assembly implementation
* using NEON instructions.
*
* Copyright 2015 Google Inc.
*
* This file is based on sha512_neon_glue.c:
* Copyright 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha256_glue.h"
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
unsigned int num_blks);
static int __sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_block_data_order_neon(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_block_data_order_neon(sctx->state, data + done, rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static int sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = __sha256_update(desc, data, len, partial);
} else {
kernel_neon_begin();
res = __sha256_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha256_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
if (!may_use_simd()) {
sha256_update(desc, padding, padlen);
sha256_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha256_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_neon_update(desc, padding, padlen, index);
}
__sha256_neon_update(desc, (const u8 *)&bits,
sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memzero_explicit(sctx, sizeof(*sctx));
return 0;
}
static int sha224_neon_final(struct shash_desc *desc, u8 *out)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_neon_final(desc, D);
memcpy(out, D, SHA224_DIGEST_SIZE);
memzero_explicit(D, SHA256_DIGEST_SIZE);
return 0;
}
struct shash_alg sha256_neon_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_init,
.update = sha256_neon_update,
.final = sha256_neon_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_init,
.update = sha256_neon_update,
.final = sha224_neon_final,
.export = sha256_export,
.import = sha256_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment