Commit b01df1c1 authored by Daniel Axtens's avatar Daniel Axtens Committed by Herbert Xu

crypto: powerpc - Add CRC-T10DIF acceleration

T10DIF is a CRC16 used heavily in NVMe.

It turns out we can accelerate it with a CRC32 library and a few
little tricks.

Provide the accelerator based the refactored CRC32 code.

Cc: Anton Blanchard <anton@samba.org>
Thanks-to: Hong Bo Peng <penghb@cn.ibm.com>
Signed-off-by: default avatarDaniel Axtens <dja@axtens.net>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 08c7dd1b
...@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o ...@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
md5-ppc-y := md5-asm.o md5-glue.o md5-ppc-y := md5-asm.o md5-glue.o
...@@ -17,3 +18,4 @@ sha1-powerpc-y := sha1-powerpc-asm.o sha1.o ...@@ -17,3 +18,4 @@ sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
This diff is collapsed.
/*
* Calculate a CRC T10-DIF with vpmsum acceleration
*
* Copyright 2017, Daniel Axtens, IBM Corporation.
* [based on crc32c-vpmsum_glue.c]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*/
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/switch_to.h>
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN-1)
#define VECTOR_BREAKPOINT 64
u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len)
{
unsigned int prealign;
unsigned int tail;
u32 crc = crci;
if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt())
return crc_t10dif_generic(crc, p, len);
if ((unsigned long)p & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
crc = crc_t10dif_generic(crc, p, prealign);
len -= prealign;
p += prealign;
}
if (len & ~VMX_ALIGN_MASK) {
crc <<= 16;
pagefault_disable();
enable_kernel_altivec();
crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
pagefault_enable();
crc >>= 16;
}
tail = len & VMX_ALIGN_MASK;
if (tail) {
p += len & ~VMX_ALIGN_MASK;
crc = crc_t10dif_generic(crc, p, tail);
}
return crc & 0xffff;
}
static int crct10dif_vpmsum_init(struct shash_desc *desc)
{
u16 *crc = shash_desc_ctx(desc);
*crc = 0;
return 0;
}
static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u16 *crc = shash_desc_ctx(desc);
*crc = crct10dif_vpmsum(*crc, data, length);
return 0;
}
static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out)
{
u16 *crcp = shash_desc_ctx(desc);
*(u16 *)out = *crcp;
return 0;
}
static struct shash_alg alg = {
.init = crct10dif_vpmsum_init,
.update = crct10dif_vpmsum_update,
.final = crct10dif_vpmsum_final,
.descsize = CRC_T10DIF_DIGEST_SIZE,
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-vpmsum",
.cra_priority = 200,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init crct10dif_vpmsum_mod_init(void)
{
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crct10dif_vpmsum_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init);
module_exit(crct10dif_vpmsum_mod_fini);
MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crct10dif");
MODULE_ALIAS_CRYPTO("crct10dif-vpmsum");
...@@ -513,6 +513,15 @@ config CRYPTO_CRCT10DIF_PCLMUL ...@@ -513,6 +513,15 @@ config CRYPTO_CRCT10DIF_PCLMUL
'crct10dif-plcmul' module, which is faster when computing the 'crct10dif-plcmul' module, which is faster when computing the
crct10dif checksum as compared with the generic table implementation. crct10dif checksum as compared with the generic table implementation.
config CRYPTO_CRCT10DIF_VPMSUM
tristate "CRC32T10DIF powerpc64 hardware acceleration"
depends on PPC64 && ALTIVEC && CRC_T10DIF
select CRYPTO_HASH
help
CRC10T10DIF algorithm implemented using vector polynomial
multiply-sum (vpmsum) instructions, introduced in POWER8. Enable on
POWER8 and newer processors for improved performance.
config CRYPTO_GHASH config CRYPTO_GHASH
tristate "GHASH digest algorithm" tristate "GHASH digest algorithm"
select CRYPTO_GF128MUL select CRYPTO_GF128MUL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment