Commit 2aa14b1a authored by Nick Terrell's avatar Nick Terrell

zstd: import usptream v1.5.2

Updates the kernel's zstd library to v1.5.2, the latest zstd release.
The upstream tag it is updated to is `v1.5.2-kernel`, which contains
several cherry-picked commits on top of the v1.5.2 release which are
required for the kernel update. I will create this tag once the PR is
ready to merge, until then reference the temporary upstream branch
`v1.5.2-kernel-cherrypicks`.

I plan to submit this patch as part of the v6.2 merge window.

I've done basic build testing & testing on x86-64, i386, and aarch64.
I'm merging these patches into my `zstd-next` branch, which is pulled
into `linux-next` for further testing.

I've benchmarked BtrFS with zstd compression on a x86-64 machine, and
saw these results. Decompression speed is a small win across the board.
The lower compression levels 1-4 see both compression speed and
compression ratio wins. The higher compression levels see a small
compression speed loss and about neutral ratio. I expect the lower
compression levels to be used much more heavily than the high
compression levels, so this should be a net win.

Level	CTime	DTime	Ratio
1	-2.95%	-1.1%	-0.7%
3	-3.5%	-1.2%	-0.5%
5	+3.7%	-1.0%	+0.0%
7	+3.2%	-0.9%	+0.0%
9	-4.3%	-0.8%	+0.1%
Signed-off-by: default avatarNick Terrell <terrelln@fb.com>
parent 4782c725
This diff is collapsed.
...@@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c ...@@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
U32 const regMask = sizeof(bitContainer)*8 - 1; U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */ /* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE); assert(nbBits < BIT_MASK_SIZE);
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
* than accessing memory. When bmi2 instruction is not present, we consider
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
#if defined(__x86_64__) || defined(_M_X86)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#ifndef ZSTD_COMPILER_H #ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H #define ZSTD_COMPILER_H
#include "portability_macros.h"
/*-******************************************************* /*-*******************************************************
* Compiler specifics * Compiler specifics
*********************************************************/ *********************************************************/
...@@ -34,7 +36,7 @@ ...@@ -34,7 +36,7 @@
/* /*
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
This explictly marks such functions as __cdecl so that the code will still compile This explicitly marks such functions as __cdecl so that the code will still compile
if a CC other than __cdecl has been made the default. if a CC other than __cdecl has been made the default.
*/ */
#define WIN_CDECL #define WIN_CDECL
...@@ -70,25 +72,13 @@ ...@@ -70,25 +72,13 @@
/* target attribute */ /* target attribute */
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
/* Enable runtime BMI2 dispatch based on the CPU. /* Target attribute for BMI2 dynamic dispatch.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. * Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/ */
#ifndef DYNAMIC_BMI2 #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/* prefetch /* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */ * can be disabled, by declaring NO_PREFETCH build macro */
...@@ -115,8 +105,9 @@ ...@@ -115,8 +105,9 @@
} }
/* vectorization /* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else # else
...@@ -134,20 +125,18 @@ ...@@ -134,20 +125,18 @@
#define LIKELY(x) (__builtin_expect((x), 1)) #define LIKELY(x) (__builtin_expect((x), 1))
#define UNLIKELY(x) (__builtin_expect((x), 0)) #define UNLIKELY(x) (__builtin_expect((x), 0))
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
#else
# define ZSTD_UNREACHABLE { assert(0); }
#endif
/* disable warnings */ /* disable warnings */
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
/* compat. with non-clang compilers */ /* compile time determination of SIMD support */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* C-language Attributes are added in C23. */ /* C-language Attributes are added in C23. */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
...@@ -168,10 +157,28 @@ ...@@ -168,10 +157,28 @@
*/ */
#define ZSTD_FALLTHROUGH fallthrough #define ZSTD_FALLTHROUGH fallthrough
/* detects whether we are being compiled under msan */ /*-**************************************************************
* Alignment check
*****************************************************************/
/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
* which remains valid for both user & kernel spaces.
*/
#ifndef ZSTD_ALIGNOF
/* covers gcc, clang & MSVC */
/* note : this section must come first, before C11,
* due to a limitation in the kernel source generator */
# define ZSTD_ALIGNOF(T) __alignof(T)
#endif /* ZSTD_ALIGNOF */
/*-**************************************************************
* Sanitizer
*****************************************************************/
/* detects whether we are being compiled under asan */
#endif /* ZSTD_COMPILER_H */ #endif /* ZSTD_COMPILER_H */
...@@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default( ...@@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default(
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize) const void* headerBuffer, size_t hbSize)
{ {
...@@ -240,6 +240,7 @@ size_t FSE_readNCount( ...@@ -240,6 +240,7 @@ size_t FSE_readNCount(
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
} }
/*! HUF_readStats() : /*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable(). Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer. `huffWeight` is destination buffer.
...@@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, ...@@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0; weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) { { U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++; rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1; weightTotal += (1 << huffWeight[n]) >> 1;
} } } }
...@@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r ...@@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workSpace, size_t wkspSize) void* workSpace, size_t wkspSize)
......
...@@ -18,8 +18,10 @@ ...@@ -18,8 +18,10 @@
/* **************************************** /* ****************************************
* Dependencies * Dependencies
******************************************/ ******************************************/
#include "zstd_deps.h" /* size_t */
#include <linux/zstd_errors.h> /* enum list */ #include <linux/zstd_errors.h> /* enum list */
#include "compiler.h"
#include "debug.h"
#include "zstd_deps.h" /* size_t */
/* **************************************** /* ****************************************
...@@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) ...@@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
return ERR_getErrorString(ERR_getErrorCode(code)); return ERR_getErrorString(ERR_getErrorCode(code));
} }
/*
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}
/*
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}
#define ERR_QUOTE(str) #str
/*
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}
/*
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);
/*
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);
#endif /* ERROR_H_MODULE */ #endif /* ERROR_H_MODULE */
...@@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); ...@@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/* FSE_buildCTable_wksp() : /* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
*/ */
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
......
...@@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co ...@@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
} }
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{ {
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
} }
......
...@@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, ...@@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
/* HUF_compress4X_wksp() : /* HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`. * Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
...@@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, ...@@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* *** Constants *** */ /* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255 #define HUF_SYMBOLVALUE_MAX 255
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ #define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !" # error "HUF_TABLELOG_MAX is too large !"
#endif #endif
...@@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, ...@@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* static allocation of HUF's Compression Table */ /* static allocation of HUF's Compression Table */
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
struct HUF_CElt_s { typedef size_t HUF_CElt; /* consider it an incomplete type */
U16 val; #define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
BYTE nbBits; #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
}; /* typedef'd to HUF_CElt */
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
/* static allocation of HUF's DTable */ /* static allocation of HUF's DTable */
typedef U32 HUF_DTable; typedef U32 HUF_DTable;
...@@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym ...@@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
...@@ -203,12 +200,13 @@ typedef enum { ...@@ -203,12 +200,13 @@ typedef enum {
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
...@@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, ...@@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */ * Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/* HUF_getNbBits() : /* HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private * Note 1 : is not inlined, as HUF_CElt definition is private */
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
/* /*
* HUF_decompress() does the following: * HUF_decompress() does the following:
...@@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c ...@@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
/* ====================== */ /* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/* HUF_compress1X_repeat() : /* HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
* If preferRepeat then the old table will always be used if valid. */ * If preferRepeat then the old table will always be used if valid.
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
...@@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds ...@@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif #endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_STATIC_LINKING_ONLY */
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
* Basic Types * Basic Types
*****************************************************************/ *****************************************************************/
typedef uint8_t BYTE; typedef uint8_t BYTE;
typedef uint8_t U8;
typedef int8_t S8;
typedef uint16_t U16; typedef uint16_t U16;
typedef int16_t S16; typedef int16_t S16;
typedef uint32_t U32; typedef uint32_t U32;
......
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_PORTABILITY_MACROS_H
#define ZSTD_PORTABILITY_MACROS_H
/*
* This header file contains macro defintions to support portability.
* This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code.
*
* This header ONLY defines macros to detect platforms/feature support.
*
*/
/* compat. with non-clang compilers */
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/* compat. with non-clang compilers */
#ifndef __has_feature
# define __has_feature(x) 0
#endif
/* detects whether we are being compiled under msan */
/* detects whether we are being compiled under asan */
/* detects whether we are being compiled under dfsan */
/* Mark the internal assembly functions as hidden */
#ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
#else
# define ZSTD_HIDE_ASM_FUNCTION(func)
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/*
* Only enable assembly for GNUC comptabile compilers,
* because other platforms may not support GAS assembly syntax.
*
* Only enable assembly for Linux / MacOS, other platforms may
* work, but they haven't been tested. This could likely be
* extended to BSD systems.
*
* Disable assembly when MSAN is enabled, because MSAN requires
* 100% of code to be instrumented to work.
*/
#define ZSTD_ASM_SUPPORTED 1
/*
* Determines whether we should enable assembly for x86-64
* with BMI2.
*
* Enable if all of the following conditions hold:
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
* - Assembly is supported
* - We are compiling for x86-64 and either:
* - DYNAMIC_BMI2 is enabled
* - BMI2 is supported at compile time
*/
#define ZSTD_ENABLE_ASM_X86_64_BMI2 0
#endif /* ZSTD_PORTABILITY_MACROS_H */
This diff is collapsed.
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_CLEVELS_H
#define ZSTD_CLEVELS_H
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
#include <linux/zstd.h>
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_MAX_CLEVEL 22
__attribute__((__unused__))
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - for any srcSize > 256 KB */
/* W, C, H, S, L, TL, strat */
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
},
};
#endif /* ZSTD_CLEVELS_H */
...@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
U32 const step = FSE_TABLESTEP(tableSize); U32 const step = FSE_TABLESTEP(tableSize);
U32 const maxSV1 = maxSymbolValue+1;
U32* cumul = (U32*)workSpace; U16* cumul = (U16*)workSpace; /* size = maxSV1 */
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
U32 highThreshold = tableSize-1; U32 highThreshold = tableSize-1;
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */ /* CTable header */
tableU16[-2] = (U16) tableLog; tableU16[-2] = (U16) tableLog;
...@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
/* symbol start positions */ /* symbol start positions */
{ U32 u; { U32 u;
cumul[0] = 0; cumul[0] = 0;
for (u=1; u <= maxSymbolValue+1; u++) { for (u=1; u <= maxSV1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1; cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else { } else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1]; assert(normalizedCounter[u-1] >= 0);
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
assert(cumul[u] >= cumul[u-1]); /* no overflow */
} } } }
cumul[maxSymbolValue+1] = tableSize+1; cumul[maxSV1] = (U16)(tableSize+1);
} }
/* Spread symbols */ /* Spread symbols */
{ U32 position = 0; if (highThreshold == tableSize - 1) {
/* Case for no low prob count symbols. Lay down 8 bytes at a time
* to reduce branch misses since we are operating on a small block
*/
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
{ U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
for (s=0; s<maxSV1; ++s, sv += add) {
int i;
int const n = normalizedCounter[s];
MEM_write64(spread + pos, sv);
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
assert(n>=0);
pos += (size_t)n;
}
}
/* Spread symbols across the table. Lack of lowprob symbols means that
* we don't need variable sized inner loop, so we can unroll the loop and
* reduce branch misses.
*/
{ size_t position = 0;
size_t s;
size_t const unroll = 2; /* Experimentally determined optimal unroll */
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
for (s = 0; s < (size_t)tableSize; s += unroll) {
size_t u;
for (u = 0; u < unroll; ++u) {
size_t const uPosition = (position + (u * step)) & tableMask;
tableSymbol[uPosition] = spread[s + u];
}
position = (position + (unroll * step)) & tableMask;
}
assert(position == 0); /* Must have initialized all positions */
}
} else {
U32 position = 0;
U32 symbol; U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) { for (symbol=0; symbol<maxSV1; symbol++) {
int nbOccurrences; int nbOccurrences;
int const freq = normalizedCounter[symbol]; int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
...@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold) while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */ position = (position + step) & tableMask; /* Low proba area */
} } } }
assert(position==0); /* Must have initialized all positions */ assert(position==0); /* Must have initialized all positions */
} }
...@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1: case -1:
case 1: case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1; assert(total <= INT_MAX);
symbolTT[s].deltaFindState = (int)(total - 1);
total ++; total ++;
break; break;
default : default :
{ assert(normalizedCounter[s] > 1);
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s]; symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
total += normalizedCounter[s]; total += (unsigned)normalizedCounter[s];
} } } } } } } }
#if 0 /* debug : symbol costs */ #if 0 /* debug : symbol costs */
...@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol], symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol), FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
} } }
}
#endif #endif
return 0; return 0;
...@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, ...@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
#ifndef FSE_COMMONDEFS_ONLY #ifndef FSE_COMMONDEFS_ONLY
/*-************************************************************** /*-**************************************************************
* FSE NCount encoding * FSE NCount encoding
****************************************************************/ ****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{ {
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
} }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2) const int bmi2,
unsigned suspectUncompressible)
{ {
size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
...@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
HUF_compress1X_repeat( HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize, ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
HUF_compress4X_repeat( HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize, ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table"); DEBUGLOG(5, "Reusing previous huffman table");
...@@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
} }
} }
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
......
...@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, ...@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf, ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression, ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2); const int bmi2,
unsigned suspectUncompressible);
#endif /* ZSTD_COMPRESS_LITERALS_H */ #endif /* ZSTD_COMPRESS_LITERALS_H */
...@@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t ...@@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
{ {
unsigned cost = 0; unsigned cost = 0;
unsigned s; unsigned s;
assert(total > 0);
for (s = 0; s <= max; ++s) { for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total); unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0) if (count[s] != 0 && norm == 0)
...@@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
assert(nbSeq_1 > 1); assert(nbSeq_1 > 1);
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
(void)entropyWorkspaceSize; (void)entropyWorkspaceSize;
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ assert(oend >= op);
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
return NCountSize; return NCountSize;
} }
} }
...@@ -310,19 +313,19 @@ ZSTD_encodeSequences_body( ...@@ -310,19 +313,19 @@ ZSTD_encodeSequences_body(
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) { if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1]; U32 const ofBits = ofCodeTable[nbSeq-1];
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) { if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
BIT_flushBits(&blockStream); BIT_flushBits(&blockStream);
} }
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
ofBits - extraBits); ofBits - extraBits);
} else { } else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
} }
BIT_flushBits(&blockStream); BIT_flushBits(&blockStream);
...@@ -336,8 +339,8 @@ ZSTD_encodeSequences_body( ...@@ -336,8 +339,8 @@ ZSTD_encodeSequences_body(
U32 const mlBits = ML_bits[mlCode]; U32 const mlBits = ML_bits[mlCode];
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength, (unsigned)sequences[n].litLength,
(unsigned)sequences[n].matchLength + MINMATCH, (unsigned)sequences[n].mlBase + MINMATCH,
(unsigned)sequences[n].offset); (unsigned)sequences[n].offBase);
/* 32b*/ /* 64b*/ /* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/ /* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
...@@ -348,18 +351,18 @@ ZSTD_encodeSequences_body( ...@@ -348,18 +351,18 @@ ZSTD_encodeSequences_body(
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits); BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) { if (longOffsets) {
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) { if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits); BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
} }
BIT_addBits(&blockStream, sequences[n].offset >> extraBits, BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
ofBits - extraBits); /* 31 */ ofBits - extraBits); /* 31 */
} else { } else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
} }
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
...@@ -396,7 +399,7 @@ ZSTD_encodeSequences_default( ...@@ -396,7 +399,7 @@ ZSTD_encodeSequences_default(
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2( ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences( ...@@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences(
*/ */
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_paramSwitch_e useRowMatchFinder,
void const* src, size_t srcSize); void const* src, size_t srcSize);
/* /*
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment