Commit b2b8c86c authored by Sergei Golubchik's avatar Sergei Golubchik

cleanups

parent 9b427691
...@@ -17,15 +17,8 @@ ...@@ -17,15 +17,8 @@
#include <my_global.h> #include <my_global.h>
#include "vector_mhnsw.h" #include "vector_mhnsw.h"
#include "field.h"
#include "hash.h"
#include "item.h"
#include "item_vectorfunc.h" #include "item_vectorfunc.h"
#include "key.h" #include "key.h"
#include "my_base.h"
#include "mysql/psi/psi_base.h"
#include "sql_queue.h"
#include <scope.h> #include <scope.h>
const LEX_CSTRING mhnsw_hlindex_table={STRING_WITH_LEN("\ const LEX_CSTRING mhnsw_hlindex_table={STRING_WITH_LEN("\
...@@ -215,11 +208,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer, ...@@ -215,11 +208,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
const List<FVectorNode> &candidates_unsafe, const List<FVectorNode> &candidates_unsafe,
size_t max_neighbor_connections) size_t max_neighbor_connections)
{ {
/*
TODO: If the input neighbors list is already sorted in search_layer, then
no need to do additional queue build steps here.
*/
Hash_set<FVectorNode> visited(PSI_INSTRUMENT_MEM, FVectorNode::get_key); Hash_set<FVectorNode> visited(PSI_INSTRUMENT_MEM, FVectorNode::get_key);
Queue<FVectorNode, const FVector> pq; // working queue Queue<FVectorNode, const FVector> pq; // working queue
Queue<FVectorNode, const FVector> pq_discard; // queue for discarded candidates Queue<FVectorNode, const FVector> pq_discard; // queue for discarded candidates
...@@ -231,8 +219,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer, ...@@ -231,8 +219,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
List<FVectorNode> &neighbors= target.get_neighbors(layer); List<FVectorNode> &neighbors= target.get_neighbors(layer);
neighbors.empty(); neighbors.empty();
// TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size.
// This should not be fixed.
if (pq.init(10000, 0, cmp_vec, &target) || if (pq.init(10000, 0, cmp_vec, &target) ||
pq_discard.init(10000, 0, cmp_vec, &target)) pq_discard.init(10000, 0, cmp_vec, &target))
return HA_ERR_OUT_OF_MEM; return HA_ERR_OUT_OF_MEM;
...@@ -272,45 +258,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer, ...@@ -272,45 +258,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
} }
static void dbug_print_vec_ref(const char *prefix, uint layer,
const FVectorNode &ref)
{
#ifndef DBUG_OFF
// TODO(cvicentiu) disable this in release build.
char *ref_str= static_cast<char *>(alloca(ref.get_ref_len() * 2 + 1));
DBUG_ASSERT(ref_str);
char *ptr= ref_str;
for (size_t i= 0; i < ref.get_ref_len(); ptr += 2, i++)
{
snprintf(ptr, 3, "%02x", ref.get_ref()[i]);
}
DBUG_PRINT("VECTOR", ("%s %u %s", prefix, layer, ref_str));
#endif
}
static void dbug_print_vec_neigh(uint layer, const List<FVectorNode> &neighbors)
{
#ifndef DBUG_OFF
DBUG_PRINT("VECTOR", ("NEIGH: NUM: %d", neighbors.elements));
for (const FVectorNode& ref : neighbors)
{
dbug_print_vec_ref("NEIGH: ", layer, ref);
}
#endif
}
static void dbug_print_hash_vec(Hash_set<FVectorNode> &h)
{
#ifndef DBUG_OFF
for (FVectorNode &ptr : h)
{
DBUG_PRINT("VECTOR", ("HASH elem: %p", &ptr));
dbug_print_vec_ref("VISITED: ", 0, ptr);
}
#endif
}
static int write_neighbors(MHNSW_Context *ctx, size_t layer, static int write_neighbors(MHNSW_Context *ctx, size_t layer,
const FVectorNode &source_node) const FVectorNode &source_node)
{ {
...@@ -336,15 +283,9 @@ static int write_neighbors(MHNSW_Context *ctx, size_t layer, ...@@ -336,15 +283,9 @@ static int write_neighbors(MHNSW_Context *ctx, size_t layer,
graph->field[2]->store_binary(neighbor_array_bytes, total_size); graph->field[2]->store_binary(neighbor_array_bytes, total_size);
if (source_node.is_new()) if (source_node.is_new())
{
dbug_print_vec_ref("INSERT ", layer, source_node);
err= graph->file->ha_write_row(graph->record[0]); err= graph->file->ha_write_row(graph->record[0]);
}
else else
{ {
dbug_print_vec_ref("UPDATE ", layer, source_node);
dbug_print_vec_neigh(layer, new_neighbors);
uchar *key= static_cast<uchar*>(alloca(graph->key_info->key_length)); uchar *key= static_cast<uchar*>(alloca(graph->key_info->key_length));
key_copy(key, graph->record[0], graph->key_info, graph->key_info->key_length); key_copy(key, graph->record[0], graph->key_info, graph->key_info->key_length);
...@@ -415,7 +356,6 @@ static int search_layer(MHNSW_Context *ctx, ...@@ -415,7 +356,6 @@ static int search_layer(MHNSW_Context *ctx,
else if (node.distance_to(target) > best.top()->distance_to(target)) else if (node.distance_to(target) > best.top()->distance_to(target))
best.replace_top(&node); best.replace_top(&node);
visited.insert(&node); visited.insert(&node);
dbug_print_vec_ref("INSERTING node in visited: ", layer, node);
} }
float furthest_best= best.top()->distance_to(target); float furthest_best= best.top()->distance_to(target);
...@@ -431,7 +371,6 @@ static int search_layer(MHNSW_Context *ctx, ...@@ -431,7 +371,6 @@ static int search_layer(MHNSW_Context *ctx,
for (const FVectorNode &neigh: cur_vec.get_neighbors(layer)) for (const FVectorNode &neigh: cur_vec.get_neighbors(layer))
{ {
dbug_print_hash_vec(visited);
if (visited.find(&neigh)) if (visited.find(&neigh))
continue; continue;
...@@ -450,13 +389,9 @@ static int search_layer(MHNSW_Context *ctx, ...@@ -450,13 +389,9 @@ static int search_layer(MHNSW_Context *ctx,
} }
} }
} }
DBUG_PRINT("VECTOR", ("SEARCH_LAYER_END %d best", best.elements()));
while (best.elements()) while (best.elements())
{
// TODO(cvicentiu) this is n*log(n), we need a queue iterator.
result->push_front(best.pop(), &ctx->root); result->push_front(best.pop(), &ctx->root);
}
return 0; return 0;
} }
...@@ -530,8 +465,6 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo) ...@@ -530,8 +465,6 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
ref_ptr= graph->field[1]->val_str(&ref_str); ref_ptr= graph->field[1]->val_str(&ref_str);
FVectorNode start_node(&ctx, ref_ptr->ptr()); FVectorNode start_node(&ctx, ref_ptr->ptr());
// XXX may be *all* nodes in the last layer? there should be few
// xxx could boost recall, if needed
if (start_nodes.push_back(&start_node, &ctx.root)) if (start_nodes.push_back(&start_node, &ctx.root))
return HA_ERR_OUT_OF_MEM; return HA_ERR_OUT_OF_MEM;
...@@ -621,8 +554,8 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit) ...@@ -621,8 +554,8 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit)
FVectorNode start_node(&ctx, ref_ptr->ptr()); FVectorNode start_node(&ctx, ref_ptr->ptr());
// XXX or may be *all* nodes in the last layer? there should be few // one could put all max_layer nodes in start_nodes
// xxx could boost recall, if needed // but it has no effect of the recall or speed
if (start_nodes.push_back(&start_node, &ctx.root)) if (start_nodes.push_back(&start_node, &ctx.root))
return HA_ERR_OUT_OF_MEM; return HA_ERR_OUT_OF_MEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment