Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
4dc4950d
Commit
4dc4950d
authored
Jul 26, 2024
by
Sergei Golubchik
Committed by
Vicențiu Ciorbaru
Jul 26, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
windows ugh
parent
883c6769
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
29 deletions
+27
-29
include/my_bit.h
include/my_bit.h
+1
-1
sql/bloom_filters.h
sql/bloom_filters.h
+4
-4
sql/item_vectorfunc.cc
sql/item_vectorfunc.cc
+1
-1
sql/sql_string.cc
sql/sql_string.cc
+3
-5
sql/vector_mhnsw.cc
sql/vector_mhnsw.cc
+18
-18
No files found.
include/my_bit.h
View file @
4dc4950d
...
@@ -37,7 +37,7 @@ extern const uchar _my_bits_reverse_table[256];
...
@@ -37,7 +37,7 @@ extern const uchar _my_bits_reverse_table[256];
- returns 0 for (1<<0)
- returns 0 for (1<<0)
- returns 1 for (1<<1)
- returns 1 for (1<<1)
- returns 2 for (1<<2)
- returns 2 for (1<<2)
- returns
2 for 3, which has (1<<2
) as the highest bit set.
- returns
1 for 3, which has (1<<1
) as the highest bit set.
Note, the behaviour of log2(0) is not defined.
Note, the behaviour of log2(0) is not defined.
Let's return 0 for the input 0, for the code simplicity.
Let's return 0 for the input 0, for the code simplicity.
...
...
sql/bloom_filters.h
View file @
4dc4950d
...
@@ -50,10 +50,10 @@ struct PatternedSimdBloomFilter
...
@@ -50,10 +50,10 @@ struct PatternedSimdBloomFilter
bv
.
resize
(
num_blocks
);
bv
.
resize
(
num_blocks
);
}
}
uint
64
_t
ComputeNumBits
()
uint
32
_t
ComputeNumBits
()
{
{
double
bits_per_val
=
-
1.44
*
std
::
log2
(
epsilon
);
double
bits_per_val
=
-
1.44
*
std
::
log2
(
epsilon
);
return
std
::
max
<
uint
64_t
>
(
512
,
bits_per_val
*
n
+
0.5
);
return
std
::
max
<
uint
32_t
>
(
512
,
static_cast
<
uint32_t
>
(
bits_per_val
*
n
+
0.5
)
);
}
}
#ifdef INTEL_SIMD_IMPLEMENTATION
#ifdef INTEL_SIMD_IMPLEMENTATION
...
@@ -190,9 +190,9 @@ struct PatternedSimdBloomFilter
...
@@ -190,9 +190,9 @@ struct PatternedSimdBloomFilter
return
step9
^
(
step9
>>
28
);
return
step9
^
(
step9
>>
28
);
}
}
uint
GetBlockIdx_1
(
uint64_t
hash
)
uint
64_t
GetBlockIdx_1
(
uint64_t
hash
)
{
{
uint64
blockIdx
=
hash
>>
(
mask_idx_bits
+
rotate_bits
);
uint64
_t
blockIdx
=
hash
>>
(
mask_idx_bits
+
rotate_bits
);
return
blockIdx
&
(
num_blocks
-
1
);
return
blockIdx
&
(
num_blocks
-
1
);
}
}
...
...
sql/item_vectorfunc.cc
View file @
4dc4950d
...
@@ -193,7 +193,7 @@ String *Item_func_vec_fromtext::val_str(String *buf)
...
@@ -193,7 +193,7 @@ String *Item_func_vec_fromtext::val_str(String *buf)
int
error
;
int
error
;
char
*
start
=
(
char
*
)
je
.
value_begin
,
*
end
;
char
*
start
=
(
char
*
)
je
.
value_begin
,
*
end
;
float
f
=
cs
->
strntod
(
start
,
je
.
value_len
,
&
end
,
&
error
);
float
f
=
(
float
)
cs
->
strntod
(
start
,
je
.
value_len
,
&
end
,
&
error
);
if
(
unlikely
(
error
))
if
(
unlikely
(
error
))
goto
error_format
;
goto
error_format
;
...
...
sql/sql_string.cc
View file @
4dc4950d
...
@@ -118,15 +118,13 @@ bool Binary_string::realloc_raw(size_t alloc_length)
...
@@ -118,15 +118,13 @@ bool Binary_string::realloc_raw(size_t alloc_length)
}
}
static
size_t
write_float_str_to_buff
(
char
*
buff
,
size_
t
buff_len
,
static
uint32
write_float_str_to_buff
(
char
*
buff
,
in
t
buff_len
,
float
num
,
uint
decimals
)
float
num
,
uint
decimals
)
{
{
if
(
decimals
>=
FLOATING_POINT_DECIMALS
)
if
(
decimals
>=
FLOATING_POINT_DECIMALS
)
{
return
(
uint32
)
my_gcvt
(
num
,
MY_GCVT_ARG_FLOAT
,
buff_len
-
1
,
buff
,
NULL
);
return
my_gcvt
(
num
,
MY_GCVT_ARG_FLOAT
,
buff_len
-
1
,
buff
,
NULL
);
}
else
else
return
my_fcvt
(
num
,
decimals
,
buff
,
NULL
);
return
(
uint32
)
my_fcvt
(
num
,
decimals
,
buff
,
NULL
);
}
}
bool
String
::
append_float
(
float
num
,
uint
decimals
)
bool
String
::
append_float
(
float
num
,
uint
decimals
)
...
...
sql/vector_mhnsw.cc
View file @
4dc4950d
...
@@ -29,8 +29,8 @@ ulonglong mhnsw_cache_size;
...
@@ -29,8 +29,8 @@ ulonglong mhnsw_cache_size;
#define clo_nei_read float4get
#define clo_nei_read float4get
// Algorithm parameters
// Algorithm parameters
static
constexpr
double
alpha
=
1.1
;
static
constexpr
float
alpha
=
1.1
f
;
static
constexpr
double
generosity
=
1.2
;
static
constexpr
float
generosity
=
1.2
f
;
static
constexpr
double
stiffness
=
0.002
;
static
constexpr
double
stiffness
=
0.002
;
static
constexpr
uint
ef_construction_max_factor
=
16
;
static
constexpr
uint
ef_construction_max_factor
=
16
;
static
constexpr
uint
clo_nei_threshold
=
10000
;
static
constexpr
uint
clo_nei_threshold
=
10000
;
...
@@ -84,7 +84,7 @@ struct FVector
...
@@ -84,7 +84,7 @@ struct FVector
vec
->
abs2
=
abs2
/
2
;
vec
->
abs2
=
abs2
/
2
;
vec
->
scale
=
scale
?
scale
/
32767
:
1
;
vec
->
scale
=
scale
?
scale
/
32767
:
1
;
for
(
size_t
i
=
0
;
i
<
vec_len
;
i
++
)
for
(
size_t
i
=
0
;
i
<
vec_len
;
i
++
)
vec
->
dims
[
i
]
=
st
d
::
round
(
v
[
i
]
/
vec
->
scale
);
vec
->
dims
[
i
]
=
st
atic_cast
<
uint16_t
>
(
std
::
round
(
v
[
i
]
/
vec
->
scale
)
);
vec
->
fix_tail
(
vec_len
);
vec
->
fix_tail
(
vec_len
);
return
vec
;
return
vec
;
}
}
...
@@ -113,7 +113,7 @@ struct FVector
...
@@ -113,7 +113,7 @@ struct FVector
int64_t
d
=
0
;
int64_t
d
=
0
;
for
(
size_t
i
=
0
;
i
<
len
;
i
++
)
for
(
size_t
i
=
0
;
i
<
len
;
i
++
)
d
+=
int32_t
(
v1
[
i
])
*
int32_t
(
v2
[
i
]);
d
+=
int32_t
(
v1
[
i
])
*
int32_t
(
v2
[
i
]);
return
d
;
return
static_cast
<
float
>
(
d
)
;
}
}
float
distance_to
(
const
FVector
*
other
,
size_t
vec_len
)
const
float
distance_to
(
const
FVector
*
other
,
size_t
vec_len
)
const
...
@@ -183,7 +183,7 @@ class FVectorNode
...
@@ -183,7 +183,7 @@ class FVectorNode
MHNSW_Context
*
ctx
;
MHNSW_Context
*
ctx
;
const
FVector
*
make_vec
(
const
void
*
v
);
const
FVector
*
make_vec
(
const
void
*
v
);
int
alloc_neighborhood
(
size
_t
layer
);
int
alloc_neighborhood
(
uint8
_t
layer
);
public:
public:
const
FVector
*
vec
=
nullptr
;
const
FVector
*
vec
=
nullptr
;
Neighborhood
*
neighbors
=
nullptr
;
Neighborhood
*
neighbors
=
nullptr
;
...
@@ -191,7 +191,7 @@ class FVectorNode
...
@@ -191,7 +191,7 @@ class FVectorNode
bool
stored
:
1
,
deleted
:
1
;
bool
stored
:
1
,
deleted
:
1
;
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
gref_
);
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
gref_
);
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
size
_t
layer
,
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
uint8
_t
layer
,
const
void
*
vec_
);
const
void
*
vec_
);
float
distance_to
(
const
FVector
*
other
)
const
;
float
distance_to
(
const
FVector
*
other
)
const
;
int
load
(
TABLE
*
graph
);
int
load
(
TABLE
*
graph
);
...
@@ -596,7 +596,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *gref_)
...
@@ -596,7 +596,7 @@ FVectorNode::FVectorNode(MHNSW_Context *ctx_, const void *gref_)
memcpy
(
gref
(),
gref_
,
gref_len
());
memcpy
(
gref
(),
gref_
,
gref_len
());
}
}
FVectorNode
::
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
size
_t
layer
,
FVectorNode
::
FVectorNode
(
MHNSW_Context
*
ctx_
,
const
void
*
tref_
,
uint8
_t
layer
,
const
void
*
vec_
)
const
void
*
vec_
)
:
ctx
(
ctx_
),
stored
(
false
),
deleted
(
false
)
:
ctx
(
ctx_
),
stored
(
false
),
deleted
(
false
)
{
{
...
@@ -613,7 +613,7 @@ float FVectorNode::distance_to(const FVector *other) const
...
@@ -613,7 +613,7 @@ float FVectorNode::distance_to(const FVector *other) const
return
vec
->
distance_to
(
other
,
ctx
->
vec_len
);
return
vec
->
distance_to
(
other
,
ctx
->
vec_len
);
}
}
int
FVectorNode
::
alloc_neighborhood
(
size
_t
layer
)
int
FVectorNode
::
alloc_neighborhood
(
uint8
_t
layer
)
{
{
if
(
neighbors
)
if
(
neighbors
)
return
0
;
return
0
;
...
@@ -667,11 +667,11 @@ int FVectorNode::load_from_record(TABLE *graph)
...
@@ -667,11 +667,11 @@ int FVectorNode::load_from_record(TABLE *graph)
memcpy
(
vec_ptr
,
v
->
ptr
(),
v
->
length
());
memcpy
(
vec_ptr
,
v
->
ptr
(),
v
->
length
());
vec_ptr
->
fix_tail
(
ctx
->
vec_len
);
vec_ptr
->
fix_tail
(
ctx
->
vec_len
);
size_t
layer
=
graph
->
field
[
FIELD_LAYER
]
->
val_int
();
longlong
layer
=
graph
->
field
[
FIELD_LAYER
]
->
val_int
();
if
(
layer
>
100
)
// 10e30 nodes at M=2, more at larger M's
if
(
layer
>
100
)
// 10e30 nodes at M=2, more at larger M's
return
my_errno
=
HA_ERR_CRASHED
;
return
my_errno
=
HA_ERR_CRASHED
;
if
(
int
err
=
alloc_neighborhood
(
layer
))
if
(
int
err
=
alloc_neighborhood
(
static_cast
<
uint8_t
>
(
layer
)
))
return
err
;
return
err
;
v
=
graph
->
field
[
FIELD_NEIGHBORS
]
->
val_str
(
&
buf
);
v
=
graph
->
field
[
FIELD_NEIGHBORS
]
->
val_str
(
&
buf
);
...
@@ -751,7 +751,7 @@ class VisitedSet
...
@@ -751,7 +751,7 @@ class VisitedSet
public:
public:
uint
count
=
0
;
uint
count
=
0
;
VisitedSet
(
MEM_ROOT
*
root
,
const
FVector
*
target
,
uint
size
)
:
VisitedSet
(
MEM_ROOT
*
root
,
const
FVector
*
target
,
uint
size
)
:
root
(
root
),
target
(
target
),
map
(
size
,
0.01
)
{}
root
(
root
),
target
(
target
),
map
(
size
,
0.01
f
)
{}
Visited
*
create
(
FVectorNode
*
node
,
bool
e
=
false
)
Visited
*
create
(
FVectorNode
*
node
,
bool
e
=
false
)
{
{
auto
*
v
=
new
(
root
)
Visited
(
node
,
node
->
distance_to
(
target
),
e
);
auto
*
v
=
new
(
root
)
Visited
(
node
,
node
->
distance_to
(
target
),
e
);
...
@@ -911,7 +911,7 @@ static int update_second_degree_neighbors(MHNSW_Context *ctx, TABLE *graph,
...
@@ -911,7 +911,7 @@ static int update_second_degree_neighbors(MHNSW_Context *ctx, TABLE *graph,
}
}
static
int
search_layer
(
MHNSW_Context
*
ctx
,
TABLE
*
graph
,
const
FVector
*
target
,
static
int
search_layer
(
MHNSW_Context
*
ctx
,
TABLE
*
graph
,
const
FVector
*
target
,
Neighborhood
*
start_nodes
,
u
int
result_size
,
Neighborhood
*
start_nodes
,
u
longlong
result_size
,
size_t
layer
,
Neighborhood
*
result
,
bool
construction
)
size_t
layer
,
Neighborhood
*
result
,
bool
construction
)
{
{
DBUG_ASSERT
(
start_nodes
->
num
>
0
);
DBUG_ASSERT
(
start_nodes
->
num
>
0
);
...
@@ -920,7 +920,7 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
...
@@ -920,7 +920,7 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
MEM_ROOT
*
const
root
=
graph
->
in_use
->
mem_root
;
MEM_ROOT
*
const
root
=
graph
->
in_use
->
mem_root
;
Queue
<
Visited
>
candidates
,
best
;
Queue
<
Visited
>
candidates
,
best
;
bool
skip_deleted
;
bool
skip_deleted
;
uint
ef
=
result_size
,
expand_size
=
0
;
uint
ef
=
static_cast
<
uint
>
(
result_size
)
,
expand_size
=
0
;
if
(
construction
)
if
(
construction
)
{
{
...
@@ -937,12 +937,12 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
...
@@ -937,12 +937,12 @@ static int search_layer(MHNSW_Context *ctx, TABLE *graph, const FVector *target,
{
{
skip_deleted
=
layer
==
0
;
skip_deleted
=
layer
==
0
;
if
(
ef
>
1
||
layer
==
0
)
if
(
ef
>
1
||
layer
==
0
)
ef
=
ef
*
graph
->
in_use
->
variables
.
mhnsw_limit_multiplier
;
ef
=
static_cast
<
uint
>
(
ef
*
graph
->
in_use
->
variables
.
mhnsw_limit_multiplier
)
;
}
}
// WARNING! heuristic here
// WARNING! heuristic here
const
double
est_heuristic
=
8
*
std
::
sqrt
(
ctx
->
max_neighbors
(
layer
));
const
double
est_heuristic
=
8
*
std
::
sqrt
(
ctx
->
max_neighbors
(
layer
));
const
uint
est_size
=
est_heuristic
*
std
::
pow
(
ef
,
ctx
->
get_ef_power
(
));
const
uint
est_size
=
static_cast
<
uint
>
(
est_heuristic
*
std
::
pow
(
ef
,
ctx
->
get_ef_power
()
));
VisitedSet
visited
(
root
,
target
,
est_size
);
VisitedSet
visited
(
root
,
target
,
est_size
);
candidates
.
init
(
10000
,
false
,
Visited
::
cmp
);
candidates
.
init
(
10000
,
false
,
Visited
::
cmp
);
...
@@ -1090,9 +1090,9 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
...
@@ -1090,9 +1090,9 @@ int mhnsw_insert(TABLE *table, KEY *keyinfo)
const
double
NORMALIZATION_FACTOR
=
1
/
std
::
log
(
ctx
->
M
);
const
double
NORMALIZATION_FACTOR
=
1
/
std
::
log
(
ctx
->
M
);
double
log
=
-
std
::
log
(
my_rnd
(
&
thd
->
rand
))
*
NORMALIZATION_FACTOR
;
double
log
=
-
std
::
log
(
my_rnd
(
&
thd
->
rand
))
*
NORMALIZATION_FACTOR
;
const
longlong
max_layer
=
start_nodes
.
links
[
0
]
->
max_layer
;
const
uint8_t
max_layer
=
start_nodes
.
links
[
0
]
->
max_layer
;
longlong
target_layer
=
std
::
min
<
longlong
>
(
std
::
floor
(
log
),
max_layer
+
1
);
uint8_t
target_layer
=
std
::
min
<
uint8_t
>
(
static_cast
<
uint8_t
>
(
std
::
floor
(
log
)
),
max_layer
+
1
);
longlong
cur_layer
;
int
cur_layer
;
FVectorNode
*
target
=
new
(
ctx
->
alloc_node
())
FVectorNode
*
target
=
new
(
ctx
->
alloc_node
())
FVectorNode
(
ctx
,
table
->
file
->
ref
,
target_layer
,
res
->
ptr
());
FVectorNode
(
ctx
,
table
->
file
->
ref
,
target_layer
,
res
->
ptr
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment