diff --git a/sql/vector_mhnsw.cc b/sql/vector_mhnsw.cc index d6cd012c54552ecb6f9fff686763a649f31ba3a4..2fb21635a87f2f9c8327a37cc4e564b4483e3201 100644 --- a/sql/vector_mhnsw.cc +++ b/sql/vector_mhnsw.cc @@ -38,8 +38,8 @@ static const double alpha = 1.1; static const uint clo_nei_threshold= 10000; // SIMD definitions -#define SIMD_word (256/8) -#define SIMD_floats (SIMD_word/sizeof(float)) +#define SIMD_bytes (256/8) +#define SIMD_dims (SIMD_bytes/sizeof(int16_t)) enum Graph_table_fields { FIELD_LAYER, FIELD_TREF, FIELD_VEC, FIELD_NEIGHBORS @@ -51,17 +51,26 @@ enum Graph_table_indices { class MHNSW_Context; class FVectorNode; -/* - One vector, an array of ctx->vec_len floats +#pragma pack(push, 1) +struct vector +{ + float abs2, scale; + int16_t dims[4]; + static constexpr size_t alloc_size(size_t n) + { return sizeof(float)*2+MY_ALIGN(n*2, SIMD_bytes) + SIMD_bytes - 1; } + static constexpr size_t data_size(size_t n) + { return sizeof(float)*2+n*2; } +}; +#pragma pack(pop) - Aligned on 32-byte (SIMD_word) boundary for SIMD, vector lenght - is zero-padded to multiples of 8, for the same reason. +/* + One vector, an array of coordinates in ctx->vec_len dimensions */ class FVector { public: FVector(MHNSW_Context *ctx_, MEM_ROOT *root, const void *vec_); - float *vec; + vector *vec; protected: FVector() : vec(nullptr) {} }; @@ -98,26 +107,30 @@ struct Neighborhood: public Sql_alloc #if __GNUC__ > 7 -__attribute__ ((target ("avx2,avx,fma"))) -float vec_distance(float *v1, float *v2, size_t len) +__attribute__ ((target ("avx2,avx"))) +float dot_product(int16_t *v1, int16_t *v2, size_t len) { - typedef float v8f __attribute__((vector_size(SIMD_word))); - v8f *p1= (v8f*)v1; - v8f *p2= (v8f*)v2; + typedef float v8f __attribute__((vector_size(SIMD_bytes))); + union { v8f v; __m256 i; } tmp; + __m256i *p1= (__m256i*)v1; + __m256i *p2= (__m256i*)v2; v8f d= {0}; - for (size_t i= 0; i < len/SIMD_floats; p1++, p2++, i++) + for (size_t i= 0; i < len/SIMD_dims; p1++, p2++, i++) { - v8f dist= *p1 - *p2; - d+= dist * dist; + tmp.i= _mm256_cvtepi32_ps(_mm256_madd_epi16(*p1, *p2)); + d+= tmp.v; } return d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7]; } #endif __attribute__ ((target ("default"))) -float vec_distance(float *v1, float *v2, size_t len) +float dot_product(int16_t *v1, int16_t *v2, size_t len) { - return euclidean_vec_distance(v1, v2, len); + int64_t d= 0; + for (size_t i= 0; i < len; i++) + d+= int32_t(v1[i]) * int32_t(v2[i]); + return d; } @@ -148,7 +161,7 @@ class FVectorNode: public FVector private: MHNSW_Context *ctx; - float *make_vec(const void *v); + vector *make_vec(const void *v); int alloc_neighborhood(size_t layer); public: Neighborhood *neighbors= nullptr; @@ -204,7 +217,7 @@ class MHNSW_Context : public Sql_alloc void *alloc_node_internal() { return alloc_root(&root, sizeof(FVectorNode) + gref_len + tref_len - + vec_len * sizeof(float) + SIMD_word - 1); + + vector::alloc_size(vec_len)); } protected: @@ -276,7 +289,7 @@ class MHNSW_Context : public Sql_alloc void set_lengths(size_t len) { byte_len= len; - vec_len= MY_ALIGN(byte_len/sizeof(float), SIMD_floats); + vec_len= len / sizeof(float); } static int acquire(MHNSW_Context **ctx, TABLE *table, bool for_update); @@ -522,29 +535,39 @@ int MHNSW_Context::acquire(MHNSW_Context **ctx, TABLE *table, bool for_update) return err; graph->file->position(graph->record[0]); - (*ctx)->set_lengths(graph->field[FIELD_VEC]->value_length()); + (*ctx)->set_lengths((graph->field[FIELD_VEC]->value_length()-8)*2); (*ctx)->start= (*ctx)->get_node(graph->file->ref); return (*ctx)->start->load_from_record(graph); } -/* copy the vector, aligned and padded for SIMD */ -static float *make_vec(void *mem, const void *src, size_t src_len) +/* copy the vector, preprocessed as needed */ +static vector *make_vec(void *mem, const void *src, size_t src_len) { - auto dst= (float*)MY_ALIGN((intptr)mem, SIMD_word); - memcpy(dst, src, src_len); - const size_t start= src_len/sizeof(float); - for (size_t i= start; i < MY_ALIGN(start, SIMD_floats); i++) - dst[i]=0.0f; - return dst; + auto vec= (vector*)(MY_ALIGN(((intptr)mem)+8, SIMD_bytes) - 8); + float abs2= 0, scale=0, *v= (float *)src; + size_t vec_len= src_len / sizeof(float); + for (size_t i= 0; i < vec_len; i++) + { + abs2+= v[i]*v[i]; + if (std::abs(scale) < std::abs(v[i])) + scale= v[i]; + } + vec->abs2= abs2/2; + vec->scale= scale ? scale/32767 : 1; + for (size_t i= 0; i < vec_len; i++) + vec->dims[i] = std::round(v[i] / vec->scale); + bzero(vec->dims + vec_len, (MY_ALIGN(vec_len, SIMD_dims) - vec_len)*2); + + return vec; } FVector::FVector(MHNSW_Context *ctx, MEM_ROOT *root, const void *vec_) { - vec= make_vec(alloc_root(root, ctx->vec_len * sizeof(float) + SIMD_word - 1), + vec= make_vec(alloc_root(root, vector::alloc_size(ctx->vec_len)), vec_, ctx->byte_len); } -float *FVectorNode::make_vec(const void *v) +vector *FVectorNode::make_vec(const void *v) { return ::make_vec(tref() + tref_len(), v, ctx->byte_len); } @@ -569,7 +592,8 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *tref_, size_t layer, float FVectorNode::distance_to(const FVector &other) const { - return vec_distance(vec, other.vec, ctx->vec_len); + return vec->abs2 + other.vec->abs2 - vec->scale * other.vec->scale * + dot_product(vec->dims, other.vec->dims, MY_ALIGN(ctx->vec_len, SIMD_dims)); } int FVectorNode::alloc_neighborhood(size_t layer) @@ -620,9 +644,11 @@ int FVectorNode::load_from_record(TABLE *graph) if (unlikely(!v)) return my_errno= HA_ERR_CRASHED; - if (v->length() != ctx->byte_len) + if (v->length() != vector::data_size(ctx->vec_len)) return my_errno= HA_ERR_CRASHED; - float *vec_ptr= make_vec(v->ptr()); + auto vec_ptr= (vector*)(MY_ALIGN(((intptr)tref() + tref_len())+8, SIMD_bytes) - 8); + memcpy(vec_ptr, v->ptr(), v->length()); + bzero(vec_ptr->dims + ctx->vec_len, 2*(MY_ALIGN(ctx->vec_len, SIMD_dims) - ctx->vec_len)); size_t layer= graph->field[FIELD_LAYER]->val_int(); if (layer > 100) // 10e30 nodes at M=2, more at larger M's @@ -803,7 +829,7 @@ int FVectorNode::save(TABLE *graph) graph->field[FIELD_TREF]->set_notnull(); graph->field[FIELD_TREF]->store_binary(tref(), tref_len()); } - graph->field[FIELD_VEC]->store_binary((uchar*)vec, ctx->byte_len); + graph->field[FIELD_VEC]->store_binary((uchar*)vec, vector::data_size(ctx->vec_len)); size_t total_size= 0; for (size_t i=0; i <= max_layer; i++) @@ -1095,10 +1121,15 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit) /* if the query vector is NULL or invalid, VEC_DISTANCE will return NULL, so the result is basically unsorted, we can return rows - in any order. For simplicity let's sort by the start_node. + in any order. Let's use some hardcoded value here */ if (!res || ctx->byte_len != res->length()) - (res= &buf)->set((char*)start_nodes.links[0]->vec, ctx->byte_len, &my_charset_bin); + { + res= &buf; + buf.alloc(ctx->byte_len); + for (size_t i=0; i < ctx->vec_len; i++) + ((float*)buf.ptr())[i]= i == 0; + } const longlong max_layer= start_nodes.links[0]->max_layer; FVector target(ctx, thd->mem_root, res->ptr());