Commit dec3f8ca authored by mysqlonarm's avatar mysqlonarm Committed by GitHub

MDEV-22641: Provide SIMD optimized wrapper for zlib crc32() (#1558)

Existing implementation used my_checksum (from mysys)
for calculating table checksum and binlog checksum.

This implementation was optimized for powerpc only and lacked
SIMD implementation for x86 (using clmul) and ARM
(using ACLE) instead used zlib-crc32.

mariabackup had its own copy of the crc32 implementation
using hardware optimized implementation only for x86 and lagged
hardware based implementation for powerpc and ARM.

Patch helps unifies all such calls and help aggregate all of them
using an unified interface my_checksum().

Said unification also enables hardware optimized calls for all
architecture viz. x86, ARM, POWERPC.
Default always fallback to zlib crc32.

Thanks to Daniel Black for reviewing, fixing and testing
PowerPC changes. Thanks to Marko and Daniel for early code feedback.
parent 6a6aa1c0
...@@ -170,7 +170,6 @@ INCLUDE(systemd) ...@@ -170,7 +170,6 @@ INCLUDE(systemd)
INCLUDE(mysql_add_executable) INCLUDE(mysql_add_executable)
INCLUDE(symlinks) INCLUDE(symlinks)
INCLUDE(compile_flags) INCLUDE(compile_flags)
INCLUDE(crc32)
INCLUDE(pmem) INCLUDE(pmem)
# Handle options # Handle options
......
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
IF(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
include(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#define CRC32CX(crc, value) __asm__(\"crc32cx %w[c], %w[c], %x[v]\":[c]\"+r\"(crc):[v]\"r\"(value))
asm(\".arch_extension crc\");
unsigned int foo(unsigned int ret) {
CRC32CX(ret, 0);
return ret;
}
int main() { foo(0); }" HAVE_ARMV8_CRC)
CHECK_CXX_SOURCE_COMPILES("
asm(\".arch_extension crypto\");
unsigned int foo(unsigned int ret) {
__asm__(\"pmull v2.1q, v2.1d, v1.1d\");
return ret;
}
int main() { foo(0); }" HAVE_ARMV8_CRYPTO)
CHECK_C_COMPILER_FLAG(-march=armv8-a+crc+crypto HAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
IF(HAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
SET(ARMV8_CRC_COMPILE_FLAGS "${ARMV8_CRC_COMPILE_FLAGS} -march=armv8-a+crc+crypto")
ENDIF()
SET(CRC32_LIBRARY crc32_armv8_neon)
ADD_SUBDIRECTORY(extra/crc32_armv8_neon)
ENDIF()
ENDIF()
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
SET(HAVE_CRC32_VPMSUM 1)
SET(CRC32_LIBRARY crc32-vpmsum)
ADD_SUBDIRECTORY(extra/crc32-vpmsum)
ENDIF()
ADD_CONVENIENCE_LIBRARY(${CRC32_LIBRARY} $<TARGET_OBJECTS:crc32c> $<TARGET_OBJECTS:crc32ieee>)
ADD_LIBRARY(crc32c OBJECT vec_crc32.c)
ADD_LIBRARY(crc32ieee OBJECT vec_crc32.c)
GET_PROPERTY(CFLAGS_CRC32_VPMSUM TARGET ${CRC32_LIBRARY} PROPERTY COMPILE_FLAGS)
SET_TARGET_PROPERTIES(crc32c crc32ieee PROPERTIES COMPILE_FLAGS "${CFLAGS_CRC32_VPMSUM} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector")
SET_TARGET_PROPERTIES(crc32ieee PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32ieee_vpmsum;CRC32_CONSTANTS_HEADER=\"crc32ieee_constants.h\"")
SET_TARGET_PROPERTIES(crc32c PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32c_vpmsum;CRC32_CONSTANTS_HEADER=\"crc32c_constants.h\"")
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include)
INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
ADD_CONVENIENCE_LIBRARY(${CRC32_LIBRARY} $<TARGET_OBJECTS:common_crc32c_armv8>)
ADD_LIBRARY(common_crc32c_armv8 OBJECT crc32_armv8.c)
SET_TARGET_PROPERTIES(common_crc32c_armv8 PROPERTIES COMPILE_FLAGS "${ARMV8_CRC_COMPILE_FLAGS}")
...@@ -33,7 +33,6 @@ INCLUDE_DIRECTORIES( ...@@ -33,7 +33,6 @@ INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}/sql ${CMAKE_SOURCE_DIR}/sql
${CMAKE_CURRENT_SOURCE_DIR}/quicklz ${CMAKE_CURRENT_SOURCE_DIR}/quicklz
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/crc
) )
IF(NOT HAVE_SYSTEM_REGEX) IF(NOT HAVE_SYSTEM_REGEX)
...@@ -84,9 +83,8 @@ MYSQL_ADD_EXECUTABLE(mariadb-backup ...@@ -84,9 +83,8 @@ MYSQL_ADD_EXECUTABLE(mariadb-backup
# Export all symbols on Unix, for better crash callstacks # Export all symbols on Unix, for better crash callstacks
SET_TARGET_PROPERTIES(mariadb-backup PROPERTIES ENABLE_EXPORTS TRUE) SET_TARGET_PROPERTIES(mariadb-backup PROPERTIES ENABLE_EXPORTS TRUE)
ADD_SUBDIRECTORY(crc)
TARGET_LINK_LIBRARIES(mariadb-backup sql sql_builtins crc) TARGET_LINK_LIBRARIES(mariadb-backup sql sql_builtins)
IF(NOT HAVE_SYSTEM_REGEX) IF(NOT HAVE_SYSTEM_REGEX)
TARGET_LINK_LIBRARIES(mariadb-backup pcre2-posix) TARGET_LINK_LIBRARIES(mariadb-backup pcre2-posix)
ENDIF() ENDIF()
...@@ -109,7 +107,6 @@ MYSQL_ADD_EXECUTABLE(mbstream ...@@ -109,7 +107,6 @@ MYSQL_ADD_EXECUTABLE(mbstream
TARGET_LINK_LIBRARIES(mbstream TARGET_LINK_LIBRARIES(mbstream
mysys mysys
crc
) )
ADD_DEPENDENCIES(mbstream GenError) ADD_DEPENDENCIES(mbstream GenError)
......
# Copyright (c) 2017 Percona LLC and/or its affiliates.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
PROJECT(crc C)
IF(NOT CMAKE_CROSSCOMPILING AND NOT MSVC)
STRING(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor)
IF(processor MATCHES "86" OR processor MATCHES "amd64" OR processor MATCHES "x64")
# Check for PCLMUL instruction
CHECK_C_SOURCE_RUNS("
int main()
{
asm volatile (\"pclmulqdq \\$0x00, %%xmm1, %%xmm0\":::\"cc\");
return 0;
}" HAVE_CLMUL_INSTRUCTION)
ENDIF()
ENDIF()
IF(HAVE_CLMUL_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_CLMUL_INSTRUCTION)
ENDIF()
ADD_LIBRARY(crc STATIC crc_glue.c crc-intel-pclmul.c)
/******************************************************
Copyright (c) 2017 Percona LLC and/or its affiliates.
Zlib compatible CRC-32 implementation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*******************************************************/
#cmakedefine HAVE_CLMUL_INSTRUCTION 1
/******************************************************
Copyright (c) 2017 Percona LLC and/or its affiliates.
CRC32 using Intel's PCLMUL instruction.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*******************************************************/
#include <stdint.h>
#include <stddef.h>
void
crc32_intel_pclmul(uint32_t *pcrc, const uint8_t *inbuf, size_t inlen);
/******************************************************
Copyright (c) 2017 Percona LLC and/or its affiliates.
Zlib compatible CRC-32 implementation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*******************************************************/
#include "my_config.h"
#include "crc_glue.h"
#include "crc-intel-pclmul.h"
#include <stdint.h>
#include <string.h>
#include <zlib.h>
#if defined(__GNUC__) && defined(__x86_64__)
static int pclmul_enabled = 0;
#endif
#if defined(__GNUC__) && defined(__x86_64__)
static
uint32_t
cpuid(uint32_t* ecx, uint32_t* edx)
{
uint32_t level;
asm("cpuid" : "=a" (level) : "a" (0) : "ebx", "ecx", "edx");
if (level < 1) {
return level;
}
asm("cpuid" : "=c" (*ecx), "=d" (*edx)
: "a" (1)
: "ebx");
return level;
}
#endif
void crc_init() {
#if defined(__GNUC__) && defined(__x86_64__)
uint32_t ecx, edx;
if (cpuid(&ecx, &edx) > 0) {
pclmul_enabled = ((ecx >> 19) & 1) && ((ecx >> 1) & 1);
}
#endif
}
unsigned long crc32_iso3309(unsigned long crc, const unsigned char *buf, unsigned int len)
{
#if __GNUC__ >= 4 && defined(__x86_64__) && defined(HAVE_CLMUL_INSTRUCTION)
if (pclmul_enabled) {
uint32_t crc_accum = (uint32_t) ~crc;
crc32_intel_pclmul(&crc_accum, buf, len);
return ~crc_accum;
}
#endif
return crc32(crc, buf, len);
}
/******************************************************
Copyright (c) 2017 Percona LLC and/or its affiliates.
Zlib compatible CRC-32 implementation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*******************************************************/
#ifdef __cplusplus
extern "C" {
#endif
void crc_init();
unsigned long crc32_iso3309(unsigned long crc, const unsigned char *buf, unsigned int len);
#ifdef __cplusplus
}
#endif
...@@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA ...@@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
#include "common.h" #include "common.h"
#include "xbstream.h" #include "xbstream.h"
#include "datasink.h" #include "datasink.h"
#include "crc_glue.h"
#define XBSTREAM_VERSION "1.0" #define XBSTREAM_VERSION "1.0"
#define XBSTREAM_BUFFER_SIZE (10 * 1024 * 1024UL) #define XBSTREAM_BUFFER_SIZE (10 * 1024 * 1024UL)
...@@ -98,7 +97,7 @@ main(int argc, char **argv) ...@@ -98,7 +97,7 @@ main(int argc, char **argv)
{ {
MY_INIT(argv[0]); MY_INIT(argv[0]);
crc_init(); my_checksum_init();
if (get_options(&argc, &argv)) { if (get_options(&argc, &argv)) {
goto err; goto err;
......
...@@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA ...@@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
#include <zlib.h> #include <zlib.h>
#include "common.h" #include "common.h"
#include "xbstream.h" #include "xbstream.h"
#include "crc_glue.h"
/* Allocate 1 MB for the payload buffer initially */ /* Allocate 1 MB for the payload buffer initially */
#define INIT_BUFFER_LEN (1024 * 1024) #define INIT_BUFFER_LEN (1024 * 1024)
...@@ -71,8 +70,7 @@ xb_rstream_result_t ...@@ -71,8 +70,7 @@ xb_rstream_result_t
xb_stream_validate_checksum(xb_rstream_chunk_t *chunk) xb_stream_validate_checksum(xb_rstream_chunk_t *chunk)
{ {
ulong checksum; ulong checksum;
checksum = my_checksum(0, chunk->data, chunk->length);
checksum = crc32_iso3309(0, (unsigned char *)chunk->data, (uint)chunk->length);
if (checksum != chunk->checksum) { if (checksum != chunk->checksum) {
msg("xb_stream_read_chunk(): invalid checksum at offset " msg("xb_stream_read_chunk(): invalid checksum at offset "
"0x%llx: expected 0x%lx, read 0x%lx.", "0x%llx: expected 0x%lx, read 0x%lx.",
......
...@@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA ...@@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
#include <zlib.h> #include <zlib.h>
#include "common.h" #include "common.h"
#include "xbstream.h" #include "xbstream.h"
#include "crc_glue.h"
/* Group writes smaller than this into a single chunk */ /* Group writes smaller than this into a single chunk */
#define XB_STREAM_MIN_CHUNK_SIZE (10 * 1024 * 1024) #define XB_STREAM_MIN_CHUNK_SIZE (10 * 1024 * 1024)
...@@ -216,7 +215,7 @@ xb_stream_write_chunk(xb_wstream_file_t *file, const void *buf, size_t len) ...@@ -216,7 +215,7 @@ xb_stream_write_chunk(xb_wstream_file_t *file, const void *buf, size_t len)
int8store(ptr, len); /* Payload length */ int8store(ptr, len); /* Payload length */
ptr += 8; ptr += 8;
checksum = crc32_iso3309(0, (const uchar *)buf, (uint)len); /* checksum */ checksum = my_checksum(0, buf, len);
pthread_mutex_lock(&stream->mutex); pthread_mutex_lock(&stream->mutex);
......
...@@ -101,7 +101,6 @@ Street, Fifth Floor, Boston, MA 02110-1335 USA ...@@ -101,7 +101,6 @@ Street, Fifth Floor, Boston, MA 02110-1335 USA
#include "encryption_plugin.h" #include "encryption_plugin.h"
#include <sql_plugin.h> #include <sql_plugin.h>
#include <srv0srv.h> #include <srv0srv.h>
#include <crc_glue.h>
#include <log.h> #include <log.h>
#include <derror.h> #include <derror.h>
#include <thr_timer.h> #include <thr_timer.h>
...@@ -4034,7 +4033,7 @@ static bool xtrabackup_backup_func() ...@@ -4034,7 +4033,7 @@ static bool xtrabackup_backup_func()
trx_pool_init(); trx_pool_init();
ut_crc32_init(); ut_crc32_init();
crc_init(); my_checksum_init();
recv_sys.create(); recv_sys.create();
#ifdef WITH_INNODB_DISALLOW_WRITES #ifdef WITH_INNODB_DISALLOW_WRITES
......
...@@ -921,8 +921,18 @@ extern int my_compress_buffer(uchar *dest, size_t *destLen, ...@@ -921,8 +921,18 @@ extern int my_compress_buffer(uchar *dest, size_t *destLen,
extern int packfrm(const uchar *, size_t, uchar **, size_t *); extern int packfrm(const uchar *, size_t, uchar **, size_t *);
extern int unpackfrm(uchar **, size_t *, const uchar *); extern int unpackfrm(uchar **, size_t *, const uchar *);
extern ha_checksum my_checksum(ha_checksum crc, const uchar *mem, void my_checksum_init(void);
size_t count); #ifdef HAVE_CRC32_VPMSUM
extern my_checksum(ha_checksum, const void *, size_t);
#else
typedef ha_checksum (*my_crc32_t)(ha_checksum, const void *, size_t);
extern my_crc32_t my_checksum;
#endif
#if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
int crc32_aarch64_available(void);
#endif
#ifdef DBUG_ASSERT_EXISTS #ifdef DBUG_ASSERT_EXISTS
extern void my_debug_put_break_here(void); extern void my_debug_put_break_here(void);
#else #else
...@@ -930,7 +940,6 @@ extern void my_debug_put_break_here(void); ...@@ -930,7 +940,6 @@ extern void my_debug_put_break_here(void);
#endif #endif
extern void my_sleep(ulong m_seconds); extern void my_sleep(ulong m_seconds);
extern ulong crc32(ulong crc, const uchar *buf, uint len);
extern uint my_set_max_open_files(uint files); extern uint my_set_max_open_files(uint files);
void my_free_open_file_info(void); void my_free_open_file_info(void);
......
...@@ -58,6 +58,59 @@ IF (WIN32) ...@@ -58,6 +58,59 @@ IF (WIN32)
my_win_popen.cc) my_win_popen.cc)
ENDIF() ENDIF()
IF(NOT MSVC AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
#Check for PCLMUL instruction (x86)
CHECK_C_SOURCE_COMPILES("
int main()
{
asm volatile (\"pclmulqdq \\$0x00, %%xmm1, %%xmm0\":::\"cc\");
return 0;
}" HAVE_CLMUL_INSTRUCTION)
IF(HAVE_CLMUL_INSTRUCTION)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c)
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
IF(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
include(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#define CRC32CX(crc, value) __asm__(\"crc32cx %w[c], %w[c], %x[v]\":[c]\"+r\"(crc):[v]\"r\"(value))
asm(\".arch_extension crc\");
unsigned int foo(unsigned int ret) {
CRC32CX(ret, 0);
return ret;
}
#include <sys/auxv.h>
int main() { foo(0); getauxval(AT_HWCAP); }" HAVE_ARMV8_CRC)
CHECK_CXX_SOURCE_COMPILES("
asm(\".arch_extension crypto\");
unsigned int foo(unsigned int ret) {
__asm__(\"pmull v2.1q, v2.1d, v1.1d\");
return ret;
}
int main() { foo(0); }" HAVE_ARMV8_CRYPTO)
CHECK_C_COMPILER_FLAG(-march=armv8-a+crc+crypto HAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
IF(HAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c)
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_arm64.c PROPERTIES
COMPILE_FLAGS "-march=armv8-a+crc+crypto")
ENDIF()
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64")
SET(HAVE_CRC32_VPMSUM 1)
SET(MYSYS_SOURCES ${MYSYS_SOURCES} $<TARGET_OBJECTS:crc32c> $<TARGET_OBJECTS:crc32ieee>)
ADD_LIBRARY(crc32c OBJECT crc32/crc32_ppc64.c)
ADD_LIBRARY(crc32ieee OBJECT crc32/crc32_ppc64.c)
SET_TARGET_PROPERTIES(crc32c crc32ieee PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector")
SET_TARGET_PROPERTIES(crc32ieee PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=my_checksum;CRC32_CONSTANTS_HEADER=\"pcc_crc32_constants.h\"")
SET_TARGET_PROPERTIES(crc32c PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32c_vpmsum;CRC32_CONSTANTS_HEADER=\"pcc_crc32c_constants.h\"")
ENDIF()
IF(UNIX) IF(UNIX)
SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_addr_resolve.c my_setuser.c) SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_addr_resolve.c my_setuser.c)
ENDIF() ENDIF()
...@@ -73,7 +126,7 @@ ENDIF() ...@@ -73,7 +126,7 @@ ENDIF()
ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES})
MAYBE_DISABLE_IPO(mysys) MAYBE_DISABLE_IPO(mysys)
TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY}
${LIBNSL} ${LIBM} ${LIBRT} ${LIBDL} ${LIBSOCKET} ${LIBEXECINFO} ${CRC32_LIBRARY}) ${LIBNSL} ${LIBM} ${LIBRT} ${LIBDL} ${LIBSOCKET} ${LIBEXECINFO})
DTRACE_INSTRUMENT(mysys) DTRACE_INSTRUMENT(mysys)
IF(HAVE_BFD_H) IF(HAVE_BFD_H)
......
...@@ -18,25 +18,41 @@ ...@@ -18,25 +18,41 @@
#include <my_sys.h> #include <my_sys.h>
#include <zlib.h> #include <zlib.h>
/* /* TODO: remove this once zlib adds inherent support for hardware accelerated
Calculate a long checksum for a memoryblock. crc32 for all architectures. */
static unsigned int my_crc32_zlib(unsigned int crc, const void *data,
size_t len)
{
return (unsigned int) crc32(crc, data, (unsigned int) len);
}
#if !defined(HAVE_CRC32_VPMSUM)
my_crc32_t my_checksum= my_crc32_zlib;
#endif
#if __GNUC__ >= 4 && defined(__x86_64__)
SYNOPSIS extern int crc32_pclmul_enabled();
my_checksum() extern unsigned int crc32_pclmul(unsigned int, const void *, size_t);
crc start value for crc
pos pointer to memory block
length length of the block
*/
ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length) /*----------------------------- x86_64 ---------------------------------*/
void my_checksum_init(void)
{ {
#ifdef HAVE_CRC32_VPMSUM if (crc32_pclmul_enabled())
extern unsigned int crc32ieee_vpmsum(unsigned int crc, const unsigned char *p, my_checksum= crc32_pclmul;
unsigned long len); }
crc= (ha_checksum) crc32ieee_vpmsum((uint) crc, pos, (uint) length); #elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
/*----------------------------- aarch64 --------------------------------*/
extern unsigned int crc32_aarch64(unsigned int, const void *, size_t);
/* Ideally all ARM 64 bit processor should support crc32 but if some model
doesn't support better to find it out through auxillary vector. */
void my_checksum_init(void)
{
if (crc32_aarch64_available())
my_checksum= crc32_aarch64;
}
#else #else
crc= (ha_checksum) crc32((uint)crc, pos, (uint) length); void my_checksum_init(void) {}
#endif #endif
DBUG_PRINT("info", ("crc: %lu", (ulong) crc));
return crc;
}
#include <my_global.h> #include <my_global.h>
#include <string.h> #include <string.h>
#include <stdint.h>
#if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
#if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC)
#include <sys/auxv.h> #include <sys/auxv.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
...@@ -11,12 +11,13 @@ ...@@ -11,12 +11,13 @@
#define HWCAP_CRC32 (1 << 7) #define HWCAP_CRC32 (1 << 7)
#endif #endif
unsigned int crc32c_aarch64_available(void) /* ARM made crc32 default from ARMv8.1 but optional in ARMv8A
so the runtime check. */
int crc32_aarch64_available(void)
{ {
unsigned long auxv = getauxval(AT_HWCAP); unsigned long auxv= getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0; return (auxv & HWCAP_CRC32) != 0;
} }
#endif #endif
#ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS #ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS
...@@ -299,3 +300,35 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) ...@@ -299,3 +300,35 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
return (~crc); return (~crc);
} }
/* There are multiple approaches to calculate crc.
Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes
Approach-2: Process 8 bytes and remaining workload using 1 bytes
Apporach-3: Process 64 bytes at once by issuing 8 crc call and remaining
using 8/1 combination.
Based on micro-benchmark testing we found that Approach-2 works best especially
given small chunk of variable data. */
unsigned int crc32_aarch64(unsigned int crc, const void *buf, size_t len)
{
const uint8_t *buf1= buf;
const uint64_t *buf8= (const uint64_t *) (((uintptr_t) buf + 7) & ~7);
crc= ~crc;
/* if start pointer is not 8 bytes aligned */
while ((buf1 != (const uint8_t *) buf8) && len)
{
crc= __crc32b(crc, *buf1++);
len--;
}
for (; len >= 8; len-= 8)
crc= __crc32d(crc, *buf8++);
buf1= (const uint8_t *) buf8;
while (len--)
crc= __crc32b(crc, *buf1++);
return ~crc;
}
...@@ -151,6 +151,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { ...@@ -151,6 +151,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
0xffffffffffffffffUL}; 0xffffffffffffffffUL};
#ifdef REFLECT #ifdef REFLECT
__vector unsigned char vsht_splat;
const __vector unsigned long long vmask_32bit = const __vector unsigned long long vmask_32bit =
(__vector unsigned long long)vec_sld((__vector unsigned char)vzero, (__vector unsigned long long)vec_sld((__vector unsigned char)vzero,
(__vector unsigned char)vones, 4); (__vector unsigned char)vones, 4);
...@@ -598,7 +599,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { ...@@ -598,7 +599,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) {
#ifdef REFLECT #ifdef REFLECT
/* shift left one bit */ /* shift left one bit */
__vector unsigned char vsht_splat = vec_splat_u8 (1); vsht_splat = vec_splat_u8 (1);
v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0,
vsht_splat); vsht_splat);
#endif #endif
......
...@@ -39,6 +39,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA ...@@ -39,6 +39,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
* *
*/ */
#include <my_global.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
...@@ -55,7 +57,7 @@ typedef uint8_t byte; ...@@ -55,7 +57,7 @@ typedef uint8_t byte;
# define _gcry_bswap32 __builtin_bswap32 # define _gcry_bswap32 __builtin_bswap32
#if __GNUC__ >= 4 && defined(__x86_64__) && defined(HAVE_CLMUL_INSTRUCTION) #if __GNUC__ >= 4 && defined(__x86_64__)
#if defined(_GCRY_GCC_VERSION) && _GCRY_GCC_VERSION >= 40400 /* 4.4 */ #if defined(_GCRY_GCC_VERSION) && _GCRY_GCC_VERSION >= 40400 /* 4.4 */
/* Prevent compiler from issuing SSE instructions between asm blocks. */ /* Prevent compiler from issuing SSE instructions between asm blocks. */
...@@ -508,4 +510,36 @@ crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) ...@@ -508,4 +510,36 @@ crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
#endif #endif
} }
#ifdef __GNUC__
int crc32_pclmul_enabled(void)
{
int eax, ecx;
/* We assume that the CPUID instruction and its parameter 1 are available.
We do not support any precursors of the Intel 80486. */
asm("cpuid" : "=a"(eax), "=c"(ecx) : "0"(1) : "ebx", "edx");
return !(~ecx & (1 << 19 | 1 << 1));
}
#elif 0 /* defined _MSC_VER */ /* FIXME: implement the pclmul interface */
#include <intrin.h>
int crc32_pclmul_enabled(void)
{
/* We assume that the CPUID instruction and its parameter 1 are available.
We do not support any precursors of the Intel 80486. */
int regs[4];
__cpuid(regs, 1);
return !(~regs[2] & (1 << 19 | 1 << 1));
}
#else
int crc32_pclmul_enabled(void)
{
return 0;
}
#endif
unsigned int crc32_pclmul(unsigned int crc32, const void *buf, size_t len)
{
crc32= ~crc32;
crc32_intel_pclmul(&crc32, buf, len);
return ~crc32;
}
#endif #endif
...@@ -59,7 +59,6 @@ static ulong atoi_octal(const char *str) ...@@ -59,7 +59,6 @@ static ulong atoi_octal(const char *str)
MYSQL_FILE *mysql_stdin= NULL; MYSQL_FILE *mysql_stdin= NULL;
static MYSQL_FILE instrumented_stdin; static MYSQL_FILE instrumented_stdin;
/** /**
Initialize my_sys functions, resources and variables Initialize my_sys functions, resources and variables
...@@ -101,6 +100,9 @@ my_bool my_init(void) ...@@ -101,6 +100,9 @@ my_bool my_init(void)
/* Initialize our mutex handling */ /* Initialize our mutex handling */
my_mutex_init(); my_mutex_init();
/* Initialize CPU architecture specific hardware based crc32 optimization */
my_checksum_init();
if (my_thread_global_init()) if (my_thread_global_init())
return 1; return 1;
......
...@@ -97,11 +97,9 @@ unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned lo ...@@ -97,11 +97,9 @@ unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned lo
ut_crc32_func_t ut_crc32_low= crc32c_vpmsum; ut_crc32_func_t ut_crc32_low= crc32c_vpmsum;
const char* ut_crc32_implementation = "Using POWER8 crc32 instructions"; const char* ut_crc32_implementation = "Using POWER8 crc32 instructions";
#else #else
# if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC) # if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
extern "C" { extern "C" {
uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len); uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len);
/* For runtime check */
unsigned int crc32c_aarch64_available(void);
}; };
# elif defined(_MSC_VER) # elif defined(_MSC_VER)
# define TRY_SSE4_2 # define TRY_SSE4_2
...@@ -343,8 +341,8 @@ allocations, would not hurt if called twice, but would be pointless. */ ...@@ -343,8 +341,8 @@ allocations, would not hurt if called twice, but would be pointless. */
void ut_crc32_init() void ut_crc32_init()
{ {
#ifndef HAVE_CRC32_VPMSUM #ifndef HAVE_CRC32_VPMSUM
# if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC) # if defined(__GNUC__) && defined(HAVE_ARMV8_CRC)
if (crc32c_aarch64_available()) if (crc32_aarch64_available())
{ {
ut_crc32_low= crc32c_aarch64; ut_crc32_low= crc32c_aarch64;
ut_crc32_implementation= "Using ARMv8 crc32 instructions"; ut_crc32_implementation= "Using ARMv8 crc32 instructions";
......
...@@ -1909,7 +1909,7 @@ static void translog_put_sector_protection(uchar *page, ...@@ -1909,7 +1909,7 @@ static void translog_put_sector_protection(uchar *page,
static uint32 translog_crc(uchar *area, uint length) static uint32 translog_crc(uchar *area, uint length)
{ {
DBUG_ENTER("translog_crc"); DBUG_ENTER("translog_crc");
DBUG_RETURN(crc32(0L, (unsigned char*) area, length)); DBUG_RETURN(my_checksum(0L, area, length));
} }
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
static uint32 maria_page_crc(uint32 start, uchar *data, uint length) static uint32 maria_page_crc(uint32 start, uchar *data, uint length)
{ {
uint32 crc= crc32(start, data, length); uint32 crc= my_checksum(start, data, length);
/* we need this assert to get following comparison working */ /* we need this assert to get following comparison working */
compile_time_assert(MARIA_NO_CRC_BITMAP_PAGE == compile_time_assert(MARIA_NO_CRC_BITMAP_PAGE ==
......
...@@ -646,9 +646,9 @@ int Rdb_converter::verify_row_debug_checksum( ...@@ -646,9 +646,9 @@ int Rdb_converter::verify_row_debug_checksum(
rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE)); rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE));
const uint32_t computed_key_chksum = const uint32_t computed_key_chksum =
my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); my_core::my_checksum(0, rdb_slice_to_uchar_ptr(key), key->size());
const uint32_t computed_val_chksum = const uint32_t computed_val_chksum =
my_core::crc32(0, rdb_slice_to_uchar_ptr(value), my_core::my_checksum(0, rdb_slice_to_uchar_ptr(value),
value->size() - RDB_CHECKSUM_CHUNK_SIZE); value->size() - RDB_CHECKSUM_CHUNK_SIZE);
DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;); DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;);
...@@ -816,10 +816,10 @@ int Rdb_converter::encode_value_slice( ...@@ -816,10 +816,10 @@ int Rdb_converter::encode_value_slice(
} }
if (store_row_debug_checksums) { if (store_row_debug_checksums) {
const uint32_t key_crc32 = my_core::crc32( const uint32_t key_crc32 = my_core::my_checksum(
0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size());
const uint32_t val_crc32 = const uint32_t val_crc32 =
my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), my_core::my_checksum(0, rdb_mysql_str_to_uchar_str(&m_storage_record),
m_storage_record.length()); m_storage_record.length());
uchar key_crc_buf[RDB_CHECKSUM_SIZE]; uchar key_crc_buf[RDB_CHECKSUM_SIZE];
uchar val_crc_buf[RDB_CHECKSUM_SIZE]; uchar val_crc_buf[RDB_CHECKSUM_SIZE];
......
...@@ -1432,9 +1432,10 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer, ...@@ -1432,9 +1432,10 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
// ha_rocksdb::convert_record_to_storage_format // ha_rocksdb::convert_record_to_storage_format
// //
if (should_store_row_debug_checksums) { if (should_store_row_debug_checksums) {
const uint32_t key_crc32 = crc32(0, packed_tuple, tuple - packed_tuple); const uint32_t key_crc32 =
my_checksum(0, packed_tuple, tuple - packed_tuple);
const uint32_t val_crc32 = const uint32_t val_crc32 =
crc32(0, unpack_info->ptr(), unpack_info->get_current_pos()); my_checksum(0, unpack_info->ptr(), unpack_info->get_current_pos());
unpack_info->write_uint8(RDB_CHECKSUM_DATA_TAG); unpack_info->write_uint8(RDB_CHECKSUM_DATA_TAG);
unpack_info->write_uint32(key_crc32); unpack_info->write_uint32(key_crc32);
...@@ -1690,9 +1691,9 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, ...@@ -1690,9 +1691,9 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
(const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE)); (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE));
const uint32_t computed_key_chksum = const uint32_t computed_key_chksum =
crc32(0, (const uchar *)packed_key->data(), packed_key->size()); my_checksum(0, packed_key->data(), packed_key->size());
const uint32_t computed_val_chksum = const uint32_t computed_val_chksum =
crc32(0, (const uchar *)unpack_info->data(), my_checksum(0, unpack_info->data(),
unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE);
DBUG_EXECUTE_IF("myrocks_simulate_bad_key_checksum1", DBUG_EXECUTE_IF("myrocks_simulate_bad_key_checksum1",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment