Commit 151fc0ed authored by Yuqi Gu's avatar Yuqi Gu Committed by Marko Mäkelä

MDEV-23495: Refine Arm64 PMULL runtime check in MariaDB

Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).

The PR #1645 offers a solution to fix this issue. But it does not consider
the condition that the target platform does support crc32 but not support PMULL.

In this condition, it should leverage the Arm64 crc32 instruction (__crc32c) and
just only skip parallel computation (pmull/vmull) rather than skip all hardware
crc32 instruction of computation.

The PR also removes unnecessary CRC32_ZERO branch in 'crc32c_aarch64' for MariaDB,
formats the indent and coding style.

Change-Id: I76371a6bd767b4985600e8cca10983d71b7e9459
Signed-off-by: default avatarYuqi Gu <yuqi.gu@arm.com>
parent 0b4ed0b7
......@@ -911,9 +911,7 @@ extern MYSQL_PLUGIN_IMPORT my_crc32_t my_checksum;
#if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
int crc32_aarch64_available(void);
#if defined(HAVE_ARMV8_CRYPTO)
int crc32c_aarch64_available(void);
#endif
const char *crc32c_aarch64_available(void);
#endif
#ifdef DBUG_ASSERT_EXISTS
......
......@@ -8,40 +8,49 @@
#include <asm/hwcap.h>
#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
# define HWCAP_CRC32 (1 << 7)
#endif
#ifndef HWCAP_PMULL
# define HWCAP_PMULL (1 << 4)
#endif
static int pmull_supported;
/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A
so the runtime check. */
* Runtime check API.
*/
int crc32_aarch64_available(void)
{
unsigned long auxv= getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0;
}
#if defined(HAVE_ARMV8_CRYPTO)
const char *crc32c_aarch64_available(void)
{
unsigned long auxv= getauxval(AT_HWCAP);
#ifndef HWCAP_PMULL
#define HWCAP_PMULL (1 << 4)
#endif
if (!(auxv & HWCAP_CRC32))
return NULL;
/* Check if target ARM machine support crc32 + pmull for computing crc32c */
int crc32c_aarch64_available(void)
{
return !(~getauxval(AT_HWCAP) & (HWCAP_CRC32 | HWCAP_PMULL));
pmull_supported= (auxv & HWCAP_PMULL) != 0;
if (pmull_supported)
return "Using ARMv8 crc32 + pmull instructions";
else
return "Using ARMv8 crc32 instructions";
}
#endif /* HAVE_ARMV8_CRYPTO */
#endif /* HAVE_ARMV8_CRC */
#endif /* __GNUC__ && HAVE_ARMV8_CRC */
#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
/* Request crc extension capabilities from the assembler */
asm(".arch_extension crc");
#ifdef HAVE_ARMV8_CRYPTO
# ifdef HAVE_ARMV8_CRYPTO
/* crypto extension */
asm(".arch_extension crypto");
#endif
# endif
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
......@@ -49,12 +58,9 @@ asm(".arch_extension crypto");
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32C3X8(buffer, ITR) \
__asm__("crc32cx %w[c1], %w[c1], %x[v]":[c1]"+r"(crc1):[v]"r"(*((const uint64_t *)buffer + 42*1 + (ITR))));\
__asm__("crc32cx %w[c2], %w[c2], %x[v]":[c2]"+r"(crc2):[v]"r"(*((const uint64_t *)buffer + 42*2 + (ITR))));\
__asm__("crc32cx %w[c0], %w[c0], %x[v]":[c0]"+r"(crc0):[v]"r"(*((const uint64_t *)buffer + 42*0 + (ITR))));
#define CRC32C3X8_ZERO \
__asm__("crc32cx %w[c0], %w[c0], xzr":[c0]"+r"(crc0));
__asm__("crc32cx %w[c1], %w[c1], %x[v]":[c1]"+r"(crc1):[v]"r"(*((const uint64_t *)buffer + 42*1 + (ITR))));\
__asm__("crc32cx %w[c2], %w[c2], %x[v]":[c2]"+r"(crc2):[v]"r"(*((const uint64_t *)buffer + 42*2 + (ITR))));\
__asm__("crc32cx %w[c0], %w[c0], %x[v]":[c0]"+r"(crc0):[v]"r"(*((const uint64_t *)buffer + 42*0 + (ITR))));
#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
......@@ -68,250 +74,194 @@ asm(".arch_extension crypto");
#define CRC32CB(crc, value) (crc) = __crc32cb((crc), (value))
#define CRC32C3X8(buffer, ITR) \
crc1 = __crc32cd(crc1, *((const uint64_t *)buffer + 42*1 + (ITR)));\
crc2 = __crc32cd(crc2, *((const uint64_t *)buffer + 42*2 + (ITR)));\
crc0 = __crc32cd(crc0, *((const uint64_t *)buffer + 42*0 + (ITR)));
#define CRC32C3X8_ZERO \
crc0 = __crc32cd(crc0, (const uint64_t)0);
crc1 = __crc32cd(crc1, *((const uint64_t *)buffer + 42*1 + (ITR)));\
crc2 = __crc32cd(crc2, *((const uint64_t *)buffer + 42*2 + (ITR)));\
crc0 = __crc32cd(crc0, *((const uint64_t *)buffer + 42*0 + (ITR)));
#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
#define CRC32C7X3X8(buffer, ITR) do {\
CRC32C3X8(buffer, ((ITR) * 7 + 0)) \
CRC32C3X8(buffer, ((ITR) * 7 + 1)) \
CRC32C3X8(buffer, ((ITR) * 7 + 2)) \
CRC32C3X8(buffer, ((ITR) * 7 + 3)) \
CRC32C3X8(buffer, ((ITR) * 7 + 4)) \
CRC32C3X8(buffer, ((ITR) * 7 + 5)) \
CRC32C3X8(buffer, ((ITR) * 7 + 6)) \
} while(0)
#define CRC32C7X3X8_ZERO do {\
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
CRC32C3X8_ZERO \
} while(0)
CRC32C3X8(buffer, ((ITR) * 7 + 0)) \
CRC32C3X8(buffer, ((ITR) * 7 + 1)) \
CRC32C3X8(buffer, ((ITR) * 7 + 2)) \
CRC32C3X8(buffer, ((ITR) * 7 + 3)) \
CRC32C3X8(buffer, ((ITR) * 7 + 4)) \
CRC32C3X8(buffer, ((ITR) * 7 + 5)) \
CRC32C3X8(buffer, ((ITR) * 7 + 6)) \
} while(0)
#define PREF4X64L1(buffer, PREF_OFFSET, ITR) \
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
#define PREF1KL1(buffer, PREF_OFFSET) \
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
PREF4X64L1(buffer,(PREF_OFFSET), 4) \
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
PREF4X64L1(buffer,(PREF_OFFSET), 12)
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
PREF4X64L1(buffer,(PREF_OFFSET), 4) \
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
PREF4X64L1(buffer,(PREF_OFFSET), 12)
#define PREF4X64L2(buffer, PREF_OFFSET, ITR) \
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
#define PREF1KL2(buffer, PREF_OFFSET) \
PREF4X64L2(buffer,(PREF_OFFSET), 0) \
PREF4X64L2(buffer,(PREF_OFFSET), 4) \
PREF4X64L2(buffer,(PREF_OFFSET), 8) \
PREF4X64L2(buffer,(PREF_OFFSET), 12)
PREF4X64L2(buffer,(PREF_OFFSET), 0) \
PREF4X64L2(buffer,(PREF_OFFSET), 4) \
PREF4X64L2(buffer,(PREF_OFFSET), 8) \
PREF4X64L2(buffer,(PREF_OFFSET), 12)
uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
{
uint32_t crc0, crc1, crc2;
int64_t length = (int64_t)len;
crc = 0xFFFFFFFFU;
if (buffer) {
uint32_t crc0, crc1, crc2;
int64_t length= (int64_t)len;
crc= 0xFFFFFFFFU;
/* Pmull runtime check here.
* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).
*
* Consider the condition that the target platform does support hardware crc32
* but not support PMULL. In this condition, it should leverage the aarch64
* crc32 instruction (__crc32c) and just only skip parallel computation (pmull/vmull)
* rather than skip all hardware crc32 instruction of computation.
*/
if (pmull_supported)
{
/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */
#ifdef HAVE_ARMV8_CRYPTO
/* Crypto extension Support
* Process 1024 Bytes (per block)
* Parallel computation with 1024 Bytes (per block)
* Intrinsics Support
*/
#ifdef HAVE_ARMV8_CRYPTO
/* Intrinsics Support */
#ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
uint64_t t0, t1;
/* Process per block size of 1024 Bytes
* A block size = 8 + 42*3*sizeof(uint64_t) + 8
*/
while ((length -= 1024) >= 0) {
/* Prefetch 3*1024 data for avoiding L2 cache miss */
PREF1KL2(buffer, 1024*3);
/* Do first 8 bytes here for better pipelining */
crc0 = __crc32cd(crc, *(const uint64_t *)buffer);
crc1 = 0;
crc2 = 0;
buffer += sizeof(uint64_t);
/* Process block inline
* Process crc0 last to avoid dependency with above
*/
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer += 42*3*sizeof(uint64_t);
/* Prefetch data for following block to avoid L1 cache miss */
PREF1KL1(buffer, 1024);
/* Last 8 bytes
* Merge crc0 and crc1 into crc2
* crc1 multiply by K2
* crc0 multiply by K1
*/
t1 = (uint64_t)vmull_p64(crc1, k2);
t0 = (uint64_t)vmull_p64(crc0, k1);
crc = __crc32cd(crc2, *(const uint64_t *)buffer);
crc1 = __crc32cd(0, t1);
crc ^= crc1;
crc0 = __crc32cd(0, t0);
crc ^= crc0;
buffer += sizeof(uint64_t);
}
#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/*No intrinsics*/
__asm__("mov x16, #0xf38a \n\t"
"movk x16, #0xe417, lsl 16 \n\t"
"mov v1.2d[0], x16 \n\t"
"mov x16, #0x8014 \n\t"
"movk x16, #0x8f15, lsl 16 \n\t"
"mov v0.2d[0], x16 \n\t"
:::"x16");
while ((length -= 1024) >= 0) {
PREF1KL2(buffer, 1024*3);
__asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
:[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
crc1 = 0;
crc2 = 0;
buffer += sizeof(uint64_t);
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer += 42*3*sizeof(uint64_t);
PREF1KL1(buffer, 1024);
__asm__("mov v2.2d[0], %x[c1] \n\t"
"pmull v2.1q, v2.1d, v0.1d \n\t"
"mov v3.2d[0], %x[c0] \n\t"
"pmull v3.1q, v3.1d, v1.1d \n\t"
"crc32cx %w[c], %w[c2], %x[v] \n\t"
"mov %x[c1], v2.2d[0] \n\t"
"crc32cx %w[c1], wzr, %x[c1] \n\t"
"eor %w[c], %w[c], %w[c1] \n\t"
"mov %x[c0], v3.2d[0] \n\t"
"crc32cx %w[c0], wzr, %x[c0] \n\t"
"eor %w[c], %w[c], %w[c0] \n\t"
:[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
:[v]"r"(*((const uint64_t *)buffer)));
buffer += sizeof(uint64_t);
}
#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/* Done if Input data size is aligned with 1024 */
if(!(length += 1024))
return (~crc);
# ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
const poly64_t k1= 0xe417f38a, k2= 0x8f158014;
uint64_t t0, t1;
/* Process per block size of 1024 Bytes
* A block size = 8 + 42*3*sizeof(uint64_t) + 8
*/
while ((length-= 1024) >= 0)
{
/* Prefetch 3*1024 data for avoiding L2 cache miss */
PREF1KL2(buffer, 1024*3);
/* Do first 8 bytes here for better pipelining */
crc0= __crc32cd(crc, *(const uint64_t *)buffer);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
/* Process block inline
* Process crc0 last to avoid dependency with above
*/
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
/* Prefetch data for following block to avoid L1 cache miss */
PREF1KL1(buffer, 1024);
/* Last 8 bytes
* Merge crc0 and crc1 into crc2
* crc1 multiply by K2
* crc0 multiply by K1
*/
t1= (uint64_t)vmull_p64(crc1, k2);
t0= (uint64_t)vmull_p64(crc0, k1);
crc= __crc32cd(crc2, *(const uint64_t *)buffer);
crc1= __crc32cd(0, t1);
crc^= crc1;
crc0= __crc32cd(0, t0);
crc^= crc0;
buffer+= sizeof(uint64_t);
}
# else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/*No intrinsics*/
__asm__("mov x16, #0xf38a \n\t"
"movk x16, #0xe417, lsl 16 \n\t"
"mov v1.2d[0], x16 \n\t"
"mov x16, #0x8014 \n\t"
"movk x16, #0x8f15, lsl 16 \n\t"
"mov v0.2d[0], x16 \n\t"
:::"x16");
while ((length-= 1024) >= 0)
{
PREF1KL2(buffer, 1024*3);
__asm__("crc32cx %w[c0], %w[c], %x[v]\n\t"
:[c0]"=r"(crc0):[c]"r"(crc), [v]"r"(*(const uint64_t *)buffer):);
crc1= 0;
crc2= 0;
buffer+= sizeof(uint64_t);
CRC32C7X3X8(buffer, 0);
CRC32C7X3X8(buffer, 1);
CRC32C7X3X8(buffer, 2);
CRC32C7X3X8(buffer, 3);
CRC32C7X3X8(buffer, 4);
CRC32C7X3X8(buffer, 5);
buffer+= 42*3*sizeof(uint64_t);
PREF1KL1(buffer, 1024);
__asm__("mov v2.2d[0], %x[c1] \n\t"
"pmull v2.1q, v2.1d, v0.1d \n\t"
"mov v3.2d[0], %x[c0] \n\t"
"pmull v3.1q, v3.1d, v1.1d \n\t"
"crc32cx %w[c], %w[c2], %x[v] \n\t"
"mov %x[c1], v2.2d[0] \n\t"
"crc32cx %w[c1], wzr, %x[c1] \n\t"
"eor %w[c], %w[c], %w[c1] \n\t"
"mov %x[c0], v3.2d[0] \n\t"
"crc32cx %w[c0], wzr, %x[c0] \n\t"
"eor %w[c], %w[c], %w[c0] \n\t"
:[c1]"+r"(crc1), [c0]"+r"(crc0), [c2]"+r"(crc2), [c]"+r"(crc)
:[v]"r"(*((const uint64_t *)buffer)));
buffer+= sizeof(uint64_t);
}
# endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
/* Done if Input data size is aligned with 1024 */
if (!(length+= 1024))
return ~crc;
#endif /* HAVE_ARMV8_CRYPTO */
while ((length -= sizeof(uint64_t)) >= 0) {
CRC32CX(crc, *(uint64_t *)buffer);
buffer += sizeof(uint64_t);
}
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t)) {
CRC32CW(crc, *(uint32_t *)buffer);
buffer += sizeof(uint32_t);
}
if (length & sizeof(uint16_t)) {
CRC32CH(crc, *(uint16_t *)buffer);
buffer += sizeof(uint16_t);
}
if (length & sizeof(uint8_t))
CRC32CB(crc, *buffer);
} else {
#ifdef HAVE_ARMV8_CRYPTO
#ifdef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
const poly64_t k1 = 0xe417f38a;
uint64_t t0;
while ((length -= 1024) >= 0) {
crc0 = __crc32cd(crc, 0);
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
/* Merge crc0 into crc: crc0 multiply by K1 */
t0 = (uint64_t)vmull_p64(crc0, k1);
crc = __crc32cd(0, t0);
}
#else /* !HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
__asm__("mov x16, #0xf38a \n\t"
"movk x16, #0xe417, lsl 16 \n\t"
"mov v1.2d[0], x16 \n\t"
:::"x16");
while ((length -= 1024) >= 0) {
__asm__("crc32cx %w[c0], %w[c], xzr\n\t"
:[c0]"=r"(crc0):[c]"r"(crc));
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
CRC32C7X3X8_ZERO;
__asm__("mov v3.2d[0], %x[c0] \n\t"
"pmull v3.1q, v3.1d, v1.1d \n\t"
"mov %x[c0], v3.2d[0] \n\t"
"crc32cx %w[c], wzr, %x[c0] \n\t"
:[c]"=r"(crc)
:[c0]"r"(crc0));
}
#endif /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
if(!(length += 1024))
return (~crc);
#endif /* HAVE_ARMV8_CRYPTO */
while ((length -= sizeof(uint64_t)) >= 0)
CRC32CX(crc, 0);
} // end if pmull_supported
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t))
CRC32CW(crc, 0);
while ((length-= sizeof(uint64_t)) >= 0)
{
CRC32CX(crc, *(uint64_t *)buffer);
buffer+= sizeof(uint64_t);
}
if (length & sizeof(uint16_t))
CRC32CH(crc, 0);
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t))
{
CRC32CW(crc, *(uint32_t *)buffer);
buffer+= sizeof(uint32_t);
}
if (length & sizeof(uint8_t))
CRC32CB(crc, 0);
}
if (length & sizeof(uint16_t))
{
CRC32CH(crc, *(uint16_t *)buffer);
buffer+= sizeof(uint16_t);
}
return (~crc);
if (length & sizeof(uint8_t))
CRC32CB(crc, *buffer);
return ~crc;
}
/* There are multiple approaches to calculate crc.
......
......@@ -342,11 +342,11 @@ allocations, would not hurt if called twice, but would be pointless. */
void ut_crc32_init()
{
#ifndef HAVE_CRC32_VPMSUM
# if defined(__GNUC__) && defined(HAVE_ARMV8_CRC) && defined(HAVE_ARMV8_CRYPTO)
if (crc32c_aarch64_available())
# if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
if (const char *crc32c_implementation= crc32c_aarch64_available())
{
ut_crc32_low= crc32c_aarch64;
ut_crc32_implementation= "Using ARMv8 crc32 + pmull instructions";
ut_crc32_implementation= crc32c_implementation;
return;
}
# elif defined(TRY_SSE4_2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment