Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
2dc1e635
Commit
2dc1e635
authored
9 months ago
by
Sergei Golubchik
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mhnsw: fix the heuristic neighbor selection algorithm
parent
f629c96c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
24 deletions
+23
-24
sql/vector_mhnsw.cc
sql/vector_mhnsw.cc
+23
-24
No files found.
sql/vector_mhnsw.cc
View file @
2dc1e635
...
@@ -213,9 +213,9 @@ static int cmp_vec(const FVector *target, const FVectorNode *a, const FVectorNod
...
@@ -213,9 +213,9 @@ static int cmp_vec(const FVector *target, const FVectorNode *a, const FVectorNod
const
bool
KEEP_PRUNED_CONNECTIONS
=
true
;
// XXX why?
const
bool
KEEP_PRUNED_CONNECTIONS
=
true
;
// XXX why?
const
bool
EXTEND_CANDIDATES
=
true
;
// XXX or false?
const
bool
EXTEND_CANDIDATES
=
true
;
// XXX or false?
static
int
select_neighbors
(
MHNSW_Context
*
ctx
,
static
int
select_neighbors
(
MHNSW_Context
*
ctx
,
size_t
layer
,
size_t
layer
,
const
FVectorNode
&
target
,
const
FVectorNode
&
target
,
const
List
<
FVectorNode
>
&
candidates
,
const
List
<
FVectorNode
>
&
candidates
_unsafe
,
size_t
max_neighbor_connections
)
size_t
max_neighbor_connections
)
{
{
/*
/*
...
@@ -224,16 +224,20 @@ static int select_neighbors(MHNSW_Context *ctx,
...
@@ -224,16 +224,20 @@ static int select_neighbors(MHNSW_Context *ctx,
*/
*/
Hash_set
<
FVectorNode
>
visited
(
PSI_INSTRUMENT_MEM
,
FVectorNode
::
get_key
);
Hash_set
<
FVectorNode
>
visited
(
PSI_INSTRUMENT_MEM
,
FVectorNode
::
get_key
);
Queue
<
FVectorNode
,
const
FVector
>
pq
;
// working queue
Queue
<
FVectorNode
,
const
FVector
>
pq
;
// working queue
Queue
<
FVectorNode
,
const
FVector
>
pq_discard
;
// queue for discarded candidates
Queue
<
FVectorNode
,
const
FVector
>
pq_discard
;
// queue for discarded candidates
Queue
<
FVectorNode
,
const
FVector
>
best
;
// neighbors to return
/*
make a copy of candidates in case it's target.get_neighbors(layer).
because we're going to modify the latter below
*/
List
<
FVectorNode
>
candidates
=
candidates_unsafe
;
List
<
FVectorNode
>
&
neighbors
=
target
.
get_neighbors
(
layer
);
neighbors
.
empty
();
// TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size.
// TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size.
// This should not be fixed.
// This should not be fixed.
if
(
pq
.
init
(
10000
,
0
,
cmp_vec
,
&
target
)
||
if
(
pq
.
init
(
10000
,
0
,
cmp_vec
,
&
target
)
||
pq_discard
.
init
(
10000
,
0
,
cmp_vec
,
&
target
)
||
pq_discard
.
init
(
10000
,
0
,
cmp_vec
,
&
target
))
best
.
init
(
max_neighbor_connections
,
true
,
cmp_vec
,
&
target
))
return
HA_ERR_OUT_OF_MEM
;
return
HA_ERR_OUT_OF_MEM
;
for
(
const
FVectorNode
&
candidate
:
candidates
)
for
(
const
FVectorNode
&
candidate
:
candidates
)
...
@@ -257,38 +261,33 @@ static int select_neighbors(MHNSW_Context *ctx,
...
@@ -257,38 +261,33 @@ static int select_neighbors(MHNSW_Context *ctx,
}
}
DBUG_ASSERT
(
pq
.
elements
());
DBUG_ASSERT
(
pq
.
elements
());
best
.
push
(
pq
.
pop
()
);
neighbors
.
push_back
(
pq
.
pop
(),
&
ctx
->
root
);
float
best_top
=
best
.
top
()
->
distance_to
(
target
);
while
(
pq
.
elements
()
&&
neighbors
.
elements
<
max_neighbor_connections
)
while
(
pq
.
elements
()
&&
best
.
elements
()
<
max_neighbor_connections
)
{
{
const
FVectorNode
*
vec
=
pq
.
pop
();
const
FVectorNode
*
vec
=
pq
.
pop
();
const
float
cur_dist
=
vec
->
distance_to
(
target
);
const
float
target_dist
=
vec
->
distance_to
(
target
);
if
(
cur_dist
<
best_top
)
bool
discard
=
false
;
for
(
const
FVectorNode
&
neigh
:
neighbors
)
{
{
DBUG_ASSERT
(
0
);
// impossible. XXX redo the loop
if
((
discard
=
vec
->
distance_to
(
neigh
)
<
target_dist
))
best
.
push
(
vec
);
break
;
best_top
=
cur_dist
;
}
}
else
if
(
discard
)
pq_discard
.
push
(
vec
);
pq_discard
.
push
(
vec
);
else
neighbors
.
push_back
(
vec
,
&
ctx
->
root
);
}
}
if
(
KEEP_PRUNED_CONNECTIONS
)
if
(
KEEP_PRUNED_CONNECTIONS
)
{
{
while
(
pq_discard
.
elements
()
&&
while
(
pq_discard
.
elements
()
&&
best
.
elements
()
<
max_neighbor_connections
)
neighbors
.
elements
<
max_neighbor_connections
)
{
{
best
.
push
(
pq_discard
.
pop
()
);
neighbors
.
push_back
(
pq_discard
.
pop
(),
&
ctx
->
root
);
}
}
}
}
DBUG_ASSERT
(
best
.
elements
()
<=
max_neighbor_connections
);
List
<
FVectorNode
>
&
neighbors
=
target
.
get_neighbors
(
layer
);
neighbors
.
empty
();
while
(
best
.
elements
())
neighbors
.
push_front
(
best
.
pop
(),
&
ctx
->
root
);
return
0
;
return
0
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment