Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
b2b8c86c
Commit
b2b8c86c
authored
Jun 07, 2024
by
Sergei Golubchik
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cleanups
parent
9b427691
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
69 deletions
+2
-69
sql/vector_mhnsw.cc
sql/vector_mhnsw.cc
+2
-69
No files found.
sql/vector_mhnsw.cc
View file @
b2b8c86c
...
@@ -17,15 +17,8 @@
...
@@ -17,15 +17,8 @@
#include <my_global.h>
#include <my_global.h>
#include "vector_mhnsw.h"
#include "vector_mhnsw.h"
#include "field.h"
#include "hash.h"
#include "item.h"
#include "item_vectorfunc.h"
#include "item_vectorfunc.h"
#include "key.h"
#include "key.h"
#include "my_base.h"
#include "mysql/psi/psi_base.h"
#include "sql_queue.h"
#include <scope.h>
#include <scope.h>
const
LEX_CSTRING
mhnsw_hlindex_table
=
{
STRING_WITH_LEN
(
"\
const
LEX_CSTRING
mhnsw_hlindex_table
=
{
STRING_WITH_LEN
(
"\
...
@@ -215,11 +208,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
...
@@ -215,11 +208,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
const
List
<
FVectorNode
>
&
candidates_unsafe
,
const
List
<
FVectorNode
>
&
candidates_unsafe
,
size_t
max_neighbor_connections
)
size_t
max_neighbor_connections
)
{
{
/*
TODO: If the input neighbors list is already sorted in search_layer, then
no need to do additional queue build steps here.
*/
Hash_set
<
FVectorNode
>
visited
(
PSI_INSTRUMENT_MEM
,
FVectorNode
::
get_key
);
Hash_set
<
FVectorNode
>
visited
(
PSI_INSTRUMENT_MEM
,
FVectorNode
::
get_key
);
Queue
<
FVectorNode
,
const
FVector
>
pq
;
// working queue
Queue
<
FVectorNode
,
const
FVector
>
pq
;
// working queue
Queue
<
FVectorNode
,
const
FVector
>
pq_discard
;
// queue for discarded candidates
Queue
<
FVectorNode
,
const
FVector
>
pq_discard
;
// queue for discarded candidates
...
@@ -231,8 +219,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
...
@@ -231,8 +219,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
List
<
FVectorNode
>
&
neighbors
=
target
.
get_neighbors
(
layer
);
List
<
FVectorNode
>
&
neighbors
=
target
.
get_neighbors
(
layer
);
neighbors
.
empty
();
neighbors
.
empty
();
// TODO(cvicentiu) this 1000 here is a hardcoded value for max queue size.
// This should not be fixed.
if
(
pq
.
init
(
10000
,
0
,
cmp_vec
,
&
target
)
||
if
(
pq
.
init
(
10000
,
0
,
cmp_vec
,
&
target
)
||
pq_discard
.
init
(
10000
,
0
,
cmp_vec
,
&
target
))
pq_discard
.
init
(
10000
,
0
,
cmp_vec
,
&
target
))
return
HA_ERR_OUT_OF_MEM
;
return
HA_ERR_OUT_OF_MEM
;
...
@@ -272,45 +258,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
...
@@ -272,45 +258,6 @@ static int select_neighbors(MHNSW_Context *ctx, size_t layer,
}
}
static
void
dbug_print_vec_ref
(
const
char
*
prefix
,
uint
layer
,
const
FVectorNode
&
ref
)
{
#ifndef DBUG_OFF
// TODO(cvicentiu) disable this in release build.
char
*
ref_str
=
static_cast
<
char
*>
(
alloca
(
ref
.
get_ref_len
()
*
2
+
1
));
DBUG_ASSERT
(
ref_str
);
char
*
ptr
=
ref_str
;
for
(
size_t
i
=
0
;
i
<
ref
.
get_ref_len
();
ptr
+=
2
,
i
++
)
{
snprintf
(
ptr
,
3
,
"%02x"
,
ref
.
get_ref
()[
i
]);
}
DBUG_PRINT
(
"VECTOR"
,
(
"%s %u %s"
,
prefix
,
layer
,
ref_str
));
#endif
}
static
void
dbug_print_vec_neigh
(
uint
layer
,
const
List
<
FVectorNode
>
&
neighbors
)
{
#ifndef DBUG_OFF
DBUG_PRINT
(
"VECTOR"
,
(
"NEIGH: NUM: %d"
,
neighbors
.
elements
));
for
(
const
FVectorNode
&
ref
:
neighbors
)
{
dbug_print_vec_ref
(
"NEIGH: "
,
layer
,
ref
);
}
#endif
}
static
void
dbug_print_hash_vec
(
Hash_set
<
FVectorNode
>
&
h
)
{
#ifndef DBUG_OFF
for
(
FVectorNode
&
ptr
:
h
)
{
DBUG_PRINT
(
"VECTOR"
,
(
"HASH elem: %p"
,
&
ptr
));
dbug_print_vec_ref
(
"VISITED: "
,
0
,
ptr
);
}
#endif
}
static
int
write_neighbors
(
MHNSW_Context
*
ctx
,
size_t
layer
,
static
int
write_neighbors
(
MHNSW_Context
*
ctx
,
size_t
layer
,
const
FVectorNode
&
source_node
)
const
FVectorNode
&
source_node
)
{
{
...
@@ -336,15 +283,9 @@ static int write_neighbors(MHNSW_Context *ctx, size_t layer,
...
@@ -336,15 +283,9 @@ static int write_neighbors(MHNSW_Context *ctx, size_t layer,
graph
->
field
[
2
]
->
store_binary
(
neighbor_array_bytes
,
total_size
);
graph
->
field
[
2
]
->
store_binary
(
neighbor_array_bytes
,
total_size
);
if
(
source_node
.
is_new
())
if
(
source_node
.
is_new
())
{
dbug_print_vec_ref
(
"INSERT "
,
layer
,
source_node
);
err
=
graph
->
file
->
ha_write_row
(
graph
->
record
[
0
]);
err
=
graph
->
file
->
ha_write_row
(
graph
->
record
[
0
]);
}
else
else
{
{
dbug_print_vec_ref
(
"UPDATE "
,
layer
,
source_node
);
dbug_print_vec_neigh
(
layer
,
new_neighbors
);
uchar
*
key
=
static_cast
<
uchar
*>
(
alloca
(
graph
->
key_info
->
key_length
));
uchar
*
key
=
static_cast
<
uchar
*>
(
alloca
(
graph
->
key_info
->
key_length
));
key_copy
(
key
,
graph
->
record
[
0
],
graph
->
key_info
,
graph
->
key_info
->
key_length
);
key_copy
(
key
,
graph
->
record
[
0
],
graph
->
key_info
,
graph
->
key_info
->
key_length
);
...
@@ -415,7 +356,6 @@ static int search_layer(MHNSW_Context *ctx,
...
@@ -415,7 +356,6 @@ static int search_layer(MHNSW_Context *ctx,
else
if
(
node
.
distance_to
(
target
)
>
best
.
top
()
->
distance_to
(
target
))
else
if
(
node
.
distance_to
(
target
)
>
best
.
top
()
->
distance_to
(
target
))
best
.
replace_top
(
&
node
);
best
.
replace_top
(
&
node
);
visited
.
insert
(
&
node
);
visited
.
insert
(
&
node
);
dbug_print_vec_ref
(
"INSERTING node in visited: "
,
layer
,
node
);
}
}
float
furthest_best
=
best
.
top
()
->
distance_to
(
target
);
float
furthest_best
=
best
.
top
()
->
distance_to
(
target
);
...
@@ -431,7 +371,6 @@ static int search_layer(MHNSW_Context *ctx,
...
@@ -431,7 +371,6 @@ static int search_layer(MHNSW_Context *ctx,
for
(
const
FVectorNode
&
neigh
:
cur_vec
.
get_neighbors
(
layer
))
for
(
const
FVectorNode
&
neigh
:
cur_vec
.
get_neighbors
(
layer
))
{
{
dbug_print_hash_vec
(
visited
);
if
(
visited
.
find
(
&
neigh
))
if
(
visited
.
find
(
&
neigh
))
continue
;
continue
;
...
@@ -450,13 +389,9 @@ static int search_layer(MHNSW_Context *ctx,
...
@@ -450,13 +389,9 @@ static int search_layer(MHNSW_Context *ctx,
}
}
}
}
}
}
DBUG_PRINT
(
"VECTOR"
,
(
"SEARCH_LAYER_END %d best"
,
best
.
elements
()));
while
(
best
.
elements
())
while
(
best
.
elements
())
{
// TODO(cvicentiu) this is n*log(n), we need a queue iterator.
result
->
push_front
(
best
.
pop
(),
&
ctx
->
root
);
result
->
push_front
(
best
.
pop
(),
&
ctx
->
root
);
}
return
0
;
return
0
;
}
}
...
@@ -530,8 +465,6 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
...
@@ -530,8 +465,6 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
ref_ptr
=
graph
->
field
[
1
]
->
val_str
(
&
ref_str
);
ref_ptr
=
graph
->
field
[
1
]
->
val_str
(
&
ref_str
);
FVectorNode
start_node
(
&
ctx
,
ref_ptr
->
ptr
());
FVectorNode
start_node
(
&
ctx
,
ref_ptr
->
ptr
());
// XXX may be *all* nodes in the last layer? there should be few
// xxx could boost recall, if needed
if
(
start_nodes
.
push_back
(
&
start_node
,
&
ctx
.
root
))
if
(
start_nodes
.
push_back
(
&
start_node
,
&
ctx
.
root
))
return
HA_ERR_OUT_OF_MEM
;
return
HA_ERR_OUT_OF_MEM
;
...
@@ -621,8 +554,8 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit)
...
@@ -621,8 +554,8 @@ int mhnsw_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit)
FVectorNode
start_node
(
&
ctx
,
ref_ptr
->
ptr
());
FVectorNode
start_node
(
&
ctx
,
ref_ptr
->
ptr
());
//
XXX or may be *all* nodes in the last layer? there should be few
//
one could put all max_layer nodes in start_nodes
//
xxx could boost recall, if need
ed
//
but it has no effect of the recall or spe
ed
if
(
start_nodes
.
push_back
(
&
start_node
,
&
ctx
.
root
))
if
(
start_nodes
.
push_back
(
&
start_node
,
&
ctx
.
root
))
return
HA_ERR_OUT_OF_MEM
;
return
HA_ERR_OUT_OF_MEM
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment