Commit 1cd6bdb4 authored by Sergei Golubchik's avatar Sergei Golubchik

mhnsw: fix the heuristic neighbor selection algorithm

parent f51ff23b
...@@ -213,9 +213,9 @@ static int cmp_vec(const FVector *target, const FVectorNode *a, const FVectorNod ...@@ -213,9 +213,9 @@ static int cmp_vec(const FVector *target, const FVectorNode *a, const FVectorNod
const bool KEEP_PRUNED_CONNECTIONS=true; // XXX why? const bool KEEP_PRUNED_CONNECTIONS=true; // XXX why?
const bool EXTEND_CANDIDATES=true; // XXX or false? const bool EXTEND_CANDIDATES=true; // XXX or false?
static int select_neighbors(MHNSW_Context *ctx, static int select_neighbors(MHNSW_Context *ctx, size_t layer,
size_t layer, const FVectorNode &target, const FVectorNode &target,
const List<FVectorNode> &candidates, const List<FVectorNode> &candidates_unsafe,
size_t max_neighbor_connections) size_t max_neighbor_connections)
{ {
/* /*
...@@ -224,16 +224,20 @@ static int select_neighbors(MHNSW_Context *ctx, ...@@ -224,16 +224,20 @@ static int select_neighbors(MHNSW_Context *ctx,
*/ */
Hash_set<FVectorNode> visited(PSI_INSTRUMENT_MEM, FVectorNode::get_key); Hash_set<FVectorNode> visited(PSI_INSTRUMENT_MEM, FVectorNode::get_key);
Queue<FVectorNode, const FVector> pq; // working queue Queue<FVectorNode, const FVector> pq; // working queue
Queue<FVectorNode, const FVector> pq_discard; // queue for discarded candidates Queue<FVectorNode, const FVector> pq_discard; // queue for discarded candidates
Queue<FVectorNode, const FVector> best; // neighbors to return /*
make a copy of candidates in case it's target.get_neighbors(layer).
because we're going to modify the latter below
*/
List<FVectorNode> candidates= candidates_unsafe;
List<FVectorNode> &neighbors= target.get_neighbors(layer);
neighbors.empty();
// TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size. // TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size.
// This should not be fixed. // This should not be fixed.
if (pq.init(10000, 0, cmp_vec, &target) || if (pq.init(10000, 0, cmp_vec, &target) ||
pq_discard.init(10000, 0, cmp_vec, &target) || pq_discard.init(10000, 0, cmp_vec, &target))
best.init(max_neighbor_connections, true, cmp_vec, &target))
return HA_ERR_OUT_OF_MEM; return HA_ERR_OUT_OF_MEM;
for (const FVectorNode &candidate : candidates) for (const FVectorNode &candidate : candidates)
...@@ -257,38 +261,33 @@ static int select_neighbors(MHNSW_Context *ctx, ...@@ -257,38 +261,33 @@ static int select_neighbors(MHNSW_Context *ctx,
} }
DBUG_ASSERT(pq.elements()); DBUG_ASSERT(pq.elements());
best.push(pq.pop()); neighbors.push_back(pq.pop(), &ctx->root);
float best_top= best.top()->distance_to(target); while (pq.elements() && neighbors.elements < max_neighbor_connections)
while (pq.elements() && best.elements() < max_neighbor_connections)
{ {
const FVectorNode *vec= pq.pop(); const FVectorNode *vec= pq.pop();
const float cur_dist= vec->distance_to(target); const float target_dist= vec->distance_to(target);
if (cur_dist < best_top) bool discard= false;
for (const FVectorNode &neigh : neighbors)
{ {
DBUG_ASSERT(0); // impossible. XXX redo the loop if ((discard= vec->distance_to(neigh) < target_dist))
best.push(vec); break;
best_top= cur_dist;
} }
else if (discard)
pq_discard.push(vec); pq_discard.push(vec);
else
neighbors.push_back(vec, &ctx->root);
} }
if (KEEP_PRUNED_CONNECTIONS) if (KEEP_PRUNED_CONNECTIONS)
{ {
while (pq_discard.elements() && while (pq_discard.elements() &&
best.elements() < max_neighbor_connections) neighbors.elements < max_neighbor_connections)
{ {
best.push(pq_discard.pop()); neighbors.push_back(pq_discard.pop(), &ctx->root);
} }
} }
DBUG_ASSERT(best.elements() <= max_neighbor_connections);
List<FVectorNode> &neighbors= target.get_neighbors(layer);
neighbors.empty();
while (best.elements())
neighbors.push_front(best.pop(), &ctx->root);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment