AVX-512 support

f9307eaa · Sergei Golubchik · 8547d088 · f9307eaa · f9307eaa
Commit f9307eaa authored Aug 15, 2024 by Sergei Golubchik
Show whitespace changes
Inline Side-by-side

Showing with 39 additions and 0 deletions

sql/bloom_filters.h sql/bloom_filters.h +6 -0

sql/vector_mhnsw.cc sql/vector_mhnsw.cc +33 -0

No files found.
--- a/sql/bloom_filters.h
+++ b/sql/bloom_filters.h
@@ -33,6 +33,9 @@ SOFTWARE.
 #if __GNUC__ > 7
 #define DEFAULT_IMPLEMENTATION    __attribute__ ((target ("default")))
 #define AVX2_IMPLEMENTATION __attribute__ ((target ("avx2,avx,fma")))
+#if __GNUC__ > 9
+#define AVX512_IMPLEMENTATION __attribute__ ((target ("avx512f,avx512bw")))
+#endif
 #endif
 #endif
 #ifndef DEFAULT_IMPLEMENTATION
@@ -169,6 +172,9 @@ struct PatternedSimdBloomFilter
    uint8_t res_bits = static_cast<uint8_t>(_mm256_movemask_epi8(_mm256_set1_epi64x(res_bytes)) & 0xff);
    return res_bits;
  }
+  /* AVX-512 version can be (and was) implemented, but the speedup is,
+     basically, unnoticeable, well below the noise level */
 #endif
  /********************************************************

--- a/sql/vector_mhnsw.cc
+++ b/sql/vector_mhnsw.cc
@@ -156,6 +156,39 @@ struct FVector
  }
 #endif
+#ifdef AVX512_IMPLEMENTATION
+  /************* AVX512 ****************************************************/
+  static constexpr size_t AVX512_bytes= 512/8;
+  static constexpr size_t AVX512_dims= AVX512_bytes/sizeof(int16_t);
+  static_assert(subdist_part % AVX512_dims == 0);
+  AVX512_IMPLEMENTATION
+  static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)
+  {
+    __m512i *p1= (__m512i*)v1;
+    __m512i *p2= (__m512i*)v2;
+    __m512 d= _mm512_setzero_ps();
+    for (size_t i= 0; i < (len + AVX512_dims-1)/AVX512_dims; p1++, p2++, i++)
+      d= _mm512_add_ps(d, _mm512_cvtepi32_ps(_mm512_madd_epi16(*p1, *p2)));
+    return _mm512_reduce_add_ps(d);
+  }
+  AVX512_IMPLEMENTATION
+  static size_t alloc_size(size_t n)
+  { return alloc_header + MY_ALIGN(n*2, AVX512_bytes) + AVX512_bytes - 1; }
+  AVX512_IMPLEMENTATION
+  static FVector *align_ptr(void *ptr)
+  { return (FVector*)(MY_ALIGN(((intptr)ptr) + alloc_header, AVX512_bytes)
+                      - alloc_header); }
+  AVX512_IMPLEMENTATION
+  void fix_tail(size_t vec_len)
+  {
+    bzero(dims + vec_len, (MY_ALIGN(vec_len, AVX512_dims) - vec_len)*2);
+  }
+#endif
  /************* no-SIMD default ******************************************/
  DEFAULT_IMPLEMENTATION
  static float dot_product(const int16_t *v1, const int16_t *v2, size_t len)