Commit 82c2fbd8 authored by Sergei Golubchik's avatar Sergei Golubchik

non-SIMD fallback

parent ea0ee329
...@@ -30,6 +30,13 @@ SOFTWARE. ...@@ -30,6 +30,13 @@ SOFTWARE.
#include <algorithm> #include <algorithm>
#ifdef HAVE_IMMINTRIN_H #ifdef HAVE_IMMINTRIN_H
#include <immintrin.h> #include <immintrin.h>
#if __GNUC__ > 7
#define DEFAULT_IMPLEMENTATION __attribute__ ((target ("default")))
#define AVX2_IMPLEMENTATION __attribute__ ((target ("avx2,avx,fma")))
#endif
#endif
#ifndef DEFAULT_IMPLEMENTATION
#define DEFAULT_IMPLEMENTATION
#endif #endif
template <typename T> template <typename T>
...@@ -49,8 +56,8 @@ struct PatternedSimdBloomFilter ...@@ -49,8 +56,8 @@ struct PatternedSimdBloomFilter
return std::max<uint32_t>(512, static_cast<uint32_t>(bits_per_val * n + 0.5)); return std::max<uint32_t>(512, static_cast<uint32_t>(bits_per_val * n + 0.5));
} }
#if __GNUC__ > 7 && defined(HAVE_IMMINTRIN_H) #ifdef AVX2_IMPLEMENTATION
__attribute__ ((target ("avx2,avx,fma"))) AVX2_IMPLEMENTATION
__m256i CalcHash(__m256i vecData) __m256i CalcHash(__m256i vecData)
{ {
// (almost) xxHash parallel version, 64bit input, 64bit output, seed=0 // (almost) xxHash parallel version, 64bit input, 64bit output, seed=0
...@@ -76,7 +83,7 @@ struct PatternedSimdBloomFilter ...@@ -76,7 +83,7 @@ struct PatternedSimdBloomFilter
return _mm256_xor_si256(step9, _mm256_srli_epi64(step9, 28)); return _mm256_xor_si256(step9, _mm256_srli_epi64(step9, 28));
} }
__attribute__ ((target ("avx2,avx,fma"))) AVX2_IMPLEMENTATION
__m256i GetBlockIdx(__m256i vecHash) __m256i GetBlockIdx(__m256i vecHash)
{ {
__m256i vecNumBlocksMask = _mm256_set1_epi64x(num_blocks - 1); __m256i vecNumBlocksMask = _mm256_set1_epi64x(num_blocks - 1);
...@@ -84,7 +91,7 @@ struct PatternedSimdBloomFilter ...@@ -84,7 +91,7 @@ struct PatternedSimdBloomFilter
return _mm256_and_si256(vecBlockIdx, vecNumBlocksMask); return _mm256_and_si256(vecBlockIdx, vecNumBlocksMask);
} }
__attribute__ ((target ("avx2,avx,fma"))) AVX2_IMPLEMENTATION
__m256i ConstructMask(__m256i vecHash) __m256i ConstructMask(__m256i vecHash)
{ {
__m256i vecMaskIdxMask = _mm256_set1_epi64x((1 << mask_idx_bits) - 1); __m256i vecMaskIdxMask = _mm256_set1_epi64x((1 << mask_idx_bits) - 1);
...@@ -103,7 +110,7 @@ struct PatternedSimdBloomFilter ...@@ -103,7 +110,7 @@ struct PatternedSimdBloomFilter
return _mm256_or_si256(vecShiftDown, vecShiftUp); return _mm256_or_si256(vecShiftDown, vecShiftUp);
} }
__attribute__ ((target ("avx2,avx,fma"))) AVX2_IMPLEMENTATION
void Insert(const T **data) void Insert(const T **data)
{ {
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0)); __m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
...@@ -137,7 +144,7 @@ struct PatternedSimdBloomFilter ...@@ -137,7 +144,7 @@ struct PatternedSimdBloomFilter
bv[block7] |= _mm256_extract_epi64(vecMaskB, 3); bv[block7] |= _mm256_extract_epi64(vecMaskB, 3);
} }
__attribute__ ((target ("avx2,avx,fma"))) AVX2_IMPLEMENTATION
uint8_t Query(T **data) uint8_t Query(T **data)
{ {
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0)); __m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
...@@ -164,6 +171,70 @@ struct PatternedSimdBloomFilter ...@@ -164,6 +171,70 @@ struct PatternedSimdBloomFilter
} }
#endif #endif
/********************************************************
********* non-SIMD fallback version ********************/
uint64_t CalcHash_1(const T* data)
{
static constexpr uint64_t prime_mx2= 0x9FB21C651E98DF25ULL;
static constexpr uint64_t bitflip= 0xC73AB174C5ECD5A2ULL;
uint64_t step1= ((intptr)data) ^ bitflip;
uint64_t step2= (step1 >> 48) ^ (step1 << 16);
uint64_t step3= (step1 >> 24) ^ (step1 << 40);
uint64_t step4= step1 ^ step2 ^ step3;
uint64_t step5= step4 * prime_mx2;
uint64_t step6= step5 >> 35;
uint64_t step7= step6 + 8;
uint64_t step8= step5 ^ step7;
uint64_t step9= step8 * prime_mx2;
return step9 ^ (step9 >> 28);
}
uint64_t GetBlockIdx_1(uint64_t hash)
{
uint64_t blockIdx = hash >> (mask_idx_bits + rotate_bits);
return blockIdx & (num_blocks - 1);
}
uint64_t ConstructMask_1(uint64_t hash)
{
uint64_t maskIdxMask = (1 << mask_idx_bits) - 1;
uint64_t maskMask = (1ULL << bits_per_mask) - 1;
uint64_t maskIdx = hash & maskIdxMask;
uint64_t maskByteIdx = maskIdx >> 3;
uint64_t maskBitIdx = maskIdx & 7;
uint64_t rawMask = *(uint64_t *)(masks + maskByteIdx);
uint64_t unrotated = (rawMask >> maskBitIdx) & maskMask;
uint64_t rotation = (hash >> mask_idx_bits) & ((1 << rotate_bits) - 1);
return rotation ? (unrotated << rotation) | (unrotated >> (64 - rotation))
: unrotated;
}
DEFAULT_IMPLEMENTATION
void Insert(const T **data)
{
for (size_t i = 0; i < 8; i++)
{
uint64_t hash = CalcHash_1(data[i]);
uint64_t mask = ConstructMask_1(hash);
bv[GetBlockIdx_1(hash)] |= mask;
}
}
DEFAULT_IMPLEMENTATION
uint8_t Query(T **data)
{
uint8_t res_bits = 0;
for (size_t i = 0; i < 8; i++)
{
uint64_t hash = CalcHash_1(data[i]);
uint64_t mask = ConstructMask_1(hash);
if ((bv[GetBlockIdx_1(hash)] & mask) == mask)
res_bits |= 1 << i;
}
return res_bits;
}
int n; int n;
float epsilon; float epsilon;
......
...@@ -85,6 +85,30 @@ struct Neighborhood: public Sql_alloc ...@@ -85,6 +85,30 @@ struct Neighborhood: public Sql_alloc
}; };
#ifdef AVX2_IMPLEMENTATION
AVX2_IMPLEMENTATION
float vec_distance(float *v1, float *v2, size_t len)
{
typedef float v8f __attribute__((vector_size(SIMD_word)));
v8f *p1= (v8f*)v1;
v8f *p2= (v8f*)v2;
v8f d= {0};
for (size_t i= 0; i < len/SIMD_floats; p1++, p2++, i++)
{
v8f dist= *p1 - *p2;
d+= dist * dist;
}
return d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7];
}
#endif
DEFAULT_IMPLEMENTATION
float vec_distance(float *v1, float *v2, size_t len)
{
return euclidean_vec_distance(v1, v2, len);
}
/* /*
One node in a graph = one row in the graph table One node in a graph = one row in the graph table
...@@ -526,20 +550,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *tref_, uint8_t layer, ...@@ -526,20 +550,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *tref_, uint8_t layer,
float FVectorNode::distance_to(const FVector &other) const float FVectorNode::distance_to(const FVector &other) const
{ {
#if __GNUC__ > 7 return vec_distance(vec, other.vec, ctx->vec_len);
typedef float v8f __attribute__((vector_size(SIMD_word)));
v8f *p1= (v8f*)vec;
v8f *p2= (v8f*)other.vec;
v8f d= {0,0,0,0,0,0,0,0};
for (size_t i= 0; i < ctx->vec_len/SIMD_floats; p1++, p2++, i++)
{
v8f dist= *p1 - *p2;
d+= dist * dist;
}
return d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7];
#else
return euclidean_vec_distance(vec, other.vec, ctx->vec_len);
#endif
} }
int FVectorNode::alloc_neighborhood(uint8_t layer) int FVectorNode::alloc_neighborhood(uint8_t layer)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment