AVX-512 support

82d00ec4 · Sergei Golubchik · 92878e97 · 82d00ec4 · 82d00ec4
Commit 82d00ec4 authored Aug 15, 2024 by Sergei Golubchik
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 0 deletions

sql/bloom_filters.h sql/bloom_filters.h +3 -0

sql/vector_mhnsw.cc sql/vector_mhnsw.cc +31 -0

No files found.
--- a/sql/bloom_filters.h
+++ b/sql/bloom_filters.h
@@ -169,6 +169,9 @@ struct PatternedSimdBloomFilter
    uint8_t res_bits = static_cast<uint8_t>(_mm256_movemask_epi8(_mm256_set1_epi64x(res_bytes)) & 0xff);
    return res_bits;
  }
+
+  /* AVX-512 version can be (and was) implemented, but the speedup is,
+     basically, unnoticeable, well below the noise level */
 #endif

  /********************************************************

--- a/sql/vector_mhnsw.cc
+++ b/sql/vector_mhnsw.cc
@@ -127,6 +127,37 @@ struct FVector
  {
    bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX2_dims) - vec_len)*2);
  }
+
+  /************* AVX512 ****************************************************/
+  static constexpr size_t AVX512_bytes= 512/8;
+  static constexpr size_t AVX512_dims= AVX512_bytes/sizeof(int16_t);
+  static_assert(subdist_part % AVX512_dims == 0);
+
+  __attribute__ ((target ("avx512f,avx512bw")))
+  static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)
+  {
+    __m512i *p1= (__m512i*)v1;
+    __m512i *p2= (__m512i*)v2;
+    __m512 d= _mm512_setzero_ps();
+    for (size_t i= 0; i < (len + AVX512_dims-1)/AVX512_dims; p1++, p2++, i++)
+      d= _mm512_add_ps(d, _mm512_cvtepi32_ps(_mm512_madd_epi16(*p1, *p2)));
+    return _mm512_reduce_add_ps(d);
+  }
+
+  __attribute__ ((target ("avx512f,avx512bw")))
+  static size_t alloc_size(size_t n)
+  { return alloc_header + MY_ALIGN(n*2, AVX512_bytes) + AVX512_bytes - 1; }
+
+  __attribute__ ((target ("avx512f,avx512bw")))
+  static FVector *align_ptr(void *ptr)
+  { return (FVector*)(MY_ALIGN(((intptr)ptr) + alloc_header, AVX512_bytes)
+                      - alloc_header); }
+
+  __attribute__ ((target ("avx512f,avx512bw")))
+  void fix_tail(size_t vec_len)
+  {
+    bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX512_dims) - vec_len)*2);
+  }
 #endif

  /************* no-SIMD default ******************************************/