Commit 82d00ec4 authored by Sergei Golubchik's avatar Sergei Golubchik

AVX-512 support

parent 92878e97
......@@ -169,6 +169,9 @@ struct PatternedSimdBloomFilter
uint8_t res_bits = static_cast<uint8_t>(_mm256_movemask_epi8(_mm256_set1_epi64x(res_bytes)) & 0xff);
return res_bits;
}
/* AVX-512 version can be (and was) implemented, but the speedup is,
basically, unnoticeable, well below the noise level */
#endif
/********************************************************
......
......@@ -127,6 +127,37 @@ struct FVector
{
bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX2_dims) - vec_len)*2);
}
/************* AVX512 ****************************************************/
static constexpr size_t AVX512_bytes= 512/8;
static constexpr size_t AVX512_dims= AVX512_bytes/sizeof(int16_t);
static_assert(subdist_part % AVX512_dims == 0);
__attribute__ ((target ("avx512f,avx512bw")))
static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)
{
__m512i *p1= (__m512i*)v1;
__m512i *p2= (__m512i*)v2;
__m512 d= _mm512_setzero_ps();
for (size_t i= 0; i < (len + AVX512_dims-1)/AVX512_dims; p1++, p2++, i++)
d= _mm512_add_ps(d, _mm512_cvtepi32_ps(_mm512_madd_epi16(*p1, *p2)));
return _mm512_reduce_add_ps(d);
}
__attribute__ ((target ("avx512f,avx512bw")))
static size_t alloc_size(size_t n)
{ return alloc_header + MY_ALIGN(n*2, AVX512_bytes) + AVX512_bytes - 1; }
__attribute__ ((target ("avx512f,avx512bw")))
static FVector *align_ptr(void *ptr)
{ return (FVector*)(MY_ALIGN(((intptr)ptr) + alloc_header, AVX512_bytes)
- alloc_header); }
__attribute__ ((target ("avx512f,avx512bw")))
void fix_tail(size_t vec_len)
{
bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX512_dims) - vec_len)*2);
}
#endif
/************* no-SIMD default ******************************************/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment