Commit 0fa882cb authored by Sergei Golubchik's avatar Sergei Golubchik

misc changes

* sysvars should be REQUIRED_ARG
* fix a mix of US and UK spelling (use US)
* use consistent naming
* work if VEC_DISTANCE arguments are in the swapped order (const, col)
* work if VEC_DISTANCE argument is NULL/invalid or wrong length
* abort INSERT if the value is invalid or wrong length
* store the "number of neighbors" in a blob in endianness-independent way
* use field->store(longlong, bool) not field->store(double)
* a lot more error checking everywhere
* cleanup after errors
* simplify calling conventions, remove reinterpret_cast's
* todo/XXX comments
* whitespaces
* use float consistently

memory management is still totally PoC quality

Initial HNSW implementation
parent 09989347
......@@ -402,10 +402,10 @@ The following specify which files/extra groups are read (specified before remain
height-balanced, DOUBLE_PREC_HB - double precision
height-balanced, JSON_HB - height-balanced, stored as
JSON
--hnsw-ef-constructor
--hnsw-ef-constructor=#
hnsw_ef_constructor
--hnsw-ef-search hnsw_ef_search
--hnsw-max-connection-per-layer
--hnsw-ef-search=# hnsw_ef_search
--hnsw-max-connection-per-layer=#
hnsw_max_connection_per_layer
--host-cache-size=# How many host names should be cached to avoid resolving
(Automatically configured unless set explicitly)
......
......@@ -80,6 +80,21 @@ id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select id,vec_distance(x'b047263c9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
id>0 d
1 NULL
1 NULL
1 NULL
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
id>0 d
1 NULL
1 NULL
1 NULL
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
id1 id2 vec_distance(t1.v, t2.v)
1 1 0
......@@ -182,5 +197,11 @@ id1 id2 vec_distance(t1.v, t2.v)
9 8 1.2575258643523053
7 8 1.288239696195716
8 7 1.288239696195716
insert t1 (v) values ('');
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
insert t1 (v) values (x'1234');
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
insert t1 (v) values (x'12345678');
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
drop table t1;
db.opt
......@@ -10,7 +10,7 @@ create table t1 (id int auto_increment primary key, v blob not null, vector inde
show create table t1;
show keys from t1;
query_vertical select * from information_schema.statistics where table_name='t1';
# print unpack(H40,pack(f5,map{rand}1..5))
# print unpack("H*",pack("f*",map{rand}1..5))
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
(x'f09baa3ea172763f123def3e0c7fe53e288bf33e'),
......@@ -24,8 +24,23 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
select id, hex(v) from t1;
flush tables;
# test with a valid query vector
select id,vec_distance(v, x'b047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
# swapped arguments
select id,vec_distance(x'b047263c9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
# test with NULL (id is unpredictable)
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
# test with invalid query vector (id is unpredictable)
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
insert t1 (v) values ('');
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
insert t1 (v) values (x'1234');
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
insert t1 (v) values (x'12345678');
drop table t1;
let $datadir=`select @@datadir`;
list_files $datadir/test;
......@@ -1431,7 +1431,7 @@ NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NONE
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME HNSW_EF_SEARCH
VARIABLE_SCOPE SESSION
VARIABLE_TYPE INT UNSIGNED
......@@ -1441,7 +1441,7 @@ NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NONE
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME HNSW_MAX_CONNECTION_PER_LAYER
VARIABLE_SCOPE SESSION
VARIABLE_TYPE INT UNSIGNED
......@@ -1451,7 +1451,7 @@ NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NONE
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME HOSTNAME
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE VARCHAR
......
......@@ -965,6 +965,10 @@ class Field: public Value_source
{
return store(to, length, &my_charset_bin);
}
int store_binary(const uchar *to, size_t length)
{
return store_binary((const char*)(to), length);
}
virtual int store_hex_hybrid(const char *str, size_t length);
virtual int store(double nr)=0;
virtual int store(longlong nr, bool unsigned_val)=0;
......
......@@ -51,10 +51,17 @@ class Item_func_vec_distance: public Item_real_func
static LEX_CSTRING name= {STRING_WITH_LEN("vec_distance") };
return name;
}
Item *get_const_arg() const
{
if (args[0]->type() == Item::FIELD_ITEM && args[1]->const_item())
return args[1];
if (args[1]->type() == Item::FIELD_ITEM && args[0]->const_item())
return args[0];
return NULL;
}
key_map part_of_sortkey() const override;
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_func_vec_distance>(thd, this); }
virtual ~Item_func_vec_distance() {};
};
......
......@@ -7388,18 +7388,18 @@ static Sys_var_enum Sys_block_encryption_mode(
static Sys_var_uint Sys_hnsw_ef_search(
"hnsw_ef_search",
"hnsw_ef_search",
SESSION_VAR(hnsw_ef_search), CMD_LINE(NO_ARG),
SESSION_VAR(hnsw_ef_search), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX), DEFAULT(10),
BLOCK_SIZE(1));
static Sys_var_uint Sys_hnsw_ef_constructor(
"hnsw_ef_constructor",
"hnsw_ef_constructor",
SESSION_VAR(hnsw_ef_constructor), CMD_LINE(NO_ARG),
SESSION_VAR(hnsw_ef_constructor), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX), DEFAULT(10),
BLOCK_SIZE(1));
static Sys_var_uint Sys_hnsw_max_connection_per_layer(
"hnsw_max_connection_per_layer",
"hnsw_max_connection_per_layer",
SESSION_VAR(hnsw_max_connection_per_layer), CMD_LINE(NO_ARG),
SESSION_VAR(hnsw_max_connection_per_layer), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX), DEFAULT(50),
BLOCK_SIZE(1));
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment