Commit 518f9cb7 authored by Sergei Golubchik's avatar Sergei Golubchik

if we require Eigen, we can as well use it everywhere

it's measurably faster even in items
parent 78bd693a
...@@ -108,14 +108,14 @@ id hex(v) ...@@ -108,14 +108,14 @@ id hex(v)
flush tables; flush tables;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
9 0.4719976290006591 9 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
select id,vec_distance_euclidean(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3; select id,vec_distance_euclidean(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
id d id d
9 0.4719976290006591 9 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
select id>0,vec_distance_euclidean(v, NULL) d from t1 order by d limit 3; select id>0,vec_distance_euclidean(v, NULL) d from t1 order by d limit 3;
id>0 d id>0 d
1 NULL 1 NULL
...@@ -138,149 +138,149 @@ id1 id2 vec_distance_euclidean(t1.v, t2.v) ...@@ -138,149 +138,149 @@ id1 id2 vec_distance_euclidean(t1.v, t2.v)
8 8 0 8 8 0
9 9 0 9 9 0
10 10 0 10 10 0
7 10 0.35209010323904116 7 10 0.35209009051322937
10 7 0.35209010323904116 10 7 0.35209009051322937
1 7 0.557267332724855 1 7 0.5572673082351685
7 1 0.557267332724855 7 1 0.5572673082351685
2 3 0.6065128837978769 2 3 0.6065129041671753
3 2 0.6065128837978769 3 2 0.6065129041671753
1 3 0.6128238020507096 1 3 0.6128237843513489
3 1 0.6128238020507096 3 1 0.6128237843513489
5 8 0.6219995745138945 5 8 0.6219995617866516
8 5 0.6219995745138945 8 5 0.6219995617866516
3 10 0.6523185662547816 3 10 0.65231853723526
10 3 0.6523185662547816 10 3 0.65231853723526
9 10 0.6732681362788765 9 10 0.6732681393623352
10 9 0.6732681362788765 10 9 0.6732681393623352
3 7 0.6799892416547949 3 7 0.679989218711853
7 3 0.6799892416547949 7 3 0.679989218711853
3 9 0.6820752294088018 3 9 0.6820752024650574
9 3 0.6820752294088018 9 3 0.6820752024650574
2 10 0.6916305331777215 2 10 0.6916305422782898
10 2 0.6916305331777215 10 2 0.6916305422782898
2 9 0.6966650510789955 2 9 0.6966650485992432
9 2 0.6966650510789955 9 2 0.6966650485992432
3 6 0.7102823580937639 3 6 0.7102823853492737
6 3 0.7102823580937639 6 3 0.7102823853492737
2 7 0.7120217580666971 2 7 0.7120217680931091
7 2 0.7120217580666971 7 2 0.7120217680931091
2 6 0.7351618106552689 2 6 0.7351617813110352
6 2 0.7351618106552689 6 2 0.7351617813110352
1 10 0.7386864491588024 1 10 0.7386864423751831
10 1 0.7386864491588024 10 1 0.7386864423751831
4 6 0.7784357824370262 4 6 0.7784357666969299
6 4 0.7784357824370262 6 4 0.7784357666969299
4 8 0.7795837407361241 4 8 0.779583752155304
8 4 0.7795837407361241 8 4 0.779583752155304
4 5 0.8132007346697969 4 5 0.8132007122039795
5 4 0.8132007346697969 5 4 0.8132007122039795
2 4 0.8260925223296488 2 4 0.826092541217804
4 2 0.8260925223296488 4 2 0.826092541217804
5 10 0.8286488932765299 5 10 0.8286488652229309
10 5 0.8286488932765299 10 5 0.8286488652229309
5 9 0.8769351333060768 5 9 0.8769351243972778
9 5 0.8769351333060768 9 5 0.8769351243972778
1 6 0.8861410875047832 1 6 0.8861410617828369
6 1 0.8861410875047832 6 1 0.8861410617828369
3 5 0.9224201772876247 3 5 0.9224202036857605
5 3 0.9224201772876247 5 3 0.9224202036857605
4 7 0.9347916246876117 4 7 0.934791624546051
7 4 0.9347916246876117 7 4 0.934791624546051
7 9 0.9364253407685257 7 9 0.9364253282546997
9 7 0.9364253407685257 9 7 0.9364253282546997
3 4 0.9757105842688992 3 4 0.9757105708122253
4 3 0.9757105842688992 4 3 0.9757105708122253
1 2 0.9810272439433514 1 2 0.9810272455215454
2 1 0.9810272439433514 2 1 0.9810272455215454
1 4 0.9965475544626712 1 4 0.9965475797653198
4 1 0.9965475544626712 4 1 0.9965475797653198
5 7 0.9976863778073342 5 7 0.9976863861083984
7 5 0.9976863778073342 7 5 0.9976863861083984
4 10 1.0109345944029724 4 10 1.010934591293335
10 4 1.0109345944029724 10 4 1.010934591293335
1 5 1.0208359400987237 1 5 1.0208359956741333
5 1 1.0208359400987237 5 1 1.0208359956741333
6 7 1.0221332668982412 6 7 1.022133231163025
7 6 1.0221332668982412 7 6 1.022133231163025
2 5 1.050769316594881 2 5 1.050769329071045
5 2 1.050769316594881 5 2 1.050769329071045
6 8 1.103420381318026 6 8 1.103420376777649
8 6 1.103420381318026 8 6 1.103420376777649
3 8 1.1170300826294572 3 8 1.1170300245285034
8 3 1.1170300826294572 8 3 1.1170300245285034
6 10 1.1523451990991307 6 10 1.1523451805114746
10 6 1.1523451990991307 10 6 1.1523451805114746
1 9 1.1637750565139302 1 9 1.163775086402893
9 1 1.1637750565139302 9 1 1.163775086402893
2 8 1.1736571017573874 2 8 1.1736570596694946
8 2 1.1736571017573874 8 2 1.1736570596694946
4 9 1.1746893942711878 4 9 1.1746894121170044
9 4 1.1746893942711878 9 4 1.1746894121170044
1 8 1.1909959973982214 1 8 1.1909960508346558
8 1 1.1909959973982214 8 1 1.1909960508346558
8 10 1.209359617652948 8 10 1.2093596458435059
10 8 1.209359617652948 10 8 1.2093596458435059
6 9 1.214529873940304 6 9 1.2145298719406128
9 6 1.214529873940304 9 6 1.2145298719406128
5 6 1.227278506501395 5 6 1.2272785902023315
6 5 1.227278506501395 6 5 1.2272785902023315
8 9 1.2575258643523053 8 9 1.2575258016586304
9 8 1.2575258643523053 9 8 1.2575258016586304
7 8 1.288239696195716 7 8 1.288239598274231
8 7 1.288239696195716 8 7 1.288239598274231
flush session status; flush session status;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
9 0.4719976290006591 9 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 0 Handler_read_rnd_next 0
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3; select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3;
id d id d
9 0.4719976290006591 9 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 11 Handler_read_rnd_next 11
flush session status; flush session status;
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
10 0.05905546376032378 10 0.059055447578430176
9 0.06546887818344715 9 0.06546902656555176
3 0.10750282439505232 3 0.10750287771224976
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 11 Handler_read_rnd_next 11
delete from t1 where v = x'7b713f3e5258323f80d1113d673b2b3f66e3583f'; delete from t1 where v = x'7b713f3e5258323f80d1113d673b2b3f66e3583f';
select id,vec_distance_euclidean(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_euclidean(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
7 0.7344464697214867 7 0.7344464659690857
insert t1 (v) values (x'7b713f3e5258323f80d1113d673b2b3f66e3583f'); insert t1 (v) values (x'7b713f3e5258323f80d1113d673b2b3f66e3583f');
select id,vec_distance_euclidean(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_euclidean(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
11 0.4719976290006591 11 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
select id,vec_distance_euclidean(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d id d
11 0.4719976290006591 11 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
7 0.7344464697214867 7 0.7344464659690857
5 0.7671033529042712 5 0.7671033143997192
update t1 set v=x'76EDFC3E4B57243F10F8423FB158713F020BAA3E' where v=x'6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E'; update t1 set v=x'76EDFC3E4B57243F10F8423FB158713F020BAA3E' where v=x'6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E';
select id,vec_distance_euclidean(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d id d
11 0.4719976290006591 11 0.47199761867523193
3 0.5865673124650332 3 0.5865673422813416
7 0.7344464697214867 7 0.7344464659690857
10 0.746836719209219 10 0.7468367218971252
5 0.7671033529042712 5 0.7671033143997192
delete from t1; delete from t1;
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'), insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'), (x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
...@@ -294,11 +294,11 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'), ...@@ -294,11 +294,11 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e'); (x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance_euclidean(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d id d
20 0.4719976290006591 20 0.47199761867523193
21 0.5069011044450041 21 0.5069010853767395
14 0.5865673124650332 14 0.5865673422813416
18 0.7344464697214867 18 0.7344464659690857
16 0.7671033529042712 16 0.7671033143997192
insert t1 (v) values (''); insert t1 (v) values ('');
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1 ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
insert t1 (v) values (x'1234'); insert t1 (v) values (x'1234');
...@@ -426,32 +426,32 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'), ...@@ -426,32 +426,32 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e'); (x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
10 0.05905546376032378 10 0.059055447578430176
9 0.06546887818344715 9 0.06546902656555176
3 0.10750282439505232 3 0.10750287771224976
flush session status; flush session status;
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
10 0.05905546376032378 10 0.059055447578430176
9 0.06546887818344715 9 0.06546902656555176
3 0.10750282439505232 3 0.10750287771224976
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 0 Handler_read_rnd_next 0
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3; select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3;
id d id d
10 0.05905546376032378 10 0.059055447578430176
9 0.06546887818344715 9 0.06546902656555176
3 0.10750282439505232 3 0.10750287771224976
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 11 Handler_read_rnd_next 11
flush session status; flush session status;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3; select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d id d
9 0.4719976290006591 9 0.47199761867523193
10 0.5069011044450041 10 0.5069010853767395
3 0.5865673124650332 3 0.5865673422813416
show status like 'handler_read_rnd_next'; show status like 'handler_read_rnd_next';
Variable_name Value Variable_name Value
Handler_read_rnd_next 11 Handler_read_rnd_next 11
......
...@@ -40,18 +40,18 @@ delete from t1 where id=7; ...@@ -40,18 +40,18 @@ delete from t1 where id=7;
rollback; rollback;
select id,vec_distance_euclidean(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
id d id d
2 0.8781474260354732 2 0.8781474828720093
10 0.8856208347761952 10 0.8856208324432373
30 1.0162643974895857 30 1.0162643194198608
7 1.026397313888122 7 1.0263972282409668
5 1.0308161006949719 5 1.0308160781860352
select id,vec_distance_euclidean(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
id d id d
2 0.9426904171992334 2 0.9426904320716858
33 0.9477554826856 33 0.9477554559707642
30 1.111405427702547 30 1.1114054918289185
10 1.118630286292343 10 1.118630290031433
8 1.1405733350751739 8 1.140573263168335
create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb; create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
insert t2 (v) values insert t2 (v) values
(x'45cf153f830a313f7a0a113fb1ff533f47a1533fcf9e6e3f'), (x'45cf153f830a313f7a0a113fb1ff533f47a1533fcf9e6e3f'),
...@@ -73,16 +73,16 @@ insert t2 values ...@@ -73,16 +73,16 @@ insert t2 values
commit; commit;
select id,vec_distance_euclidean(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5; select id,vec_distance_euclidean(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
id d id d
6 0.9309383181777582 6 0.9309383034706116
5 0.9706304662574956 5 0.9706304669380188
30 0.98144492002831 30 0.9814448952674866
50 1.079862635421575 50 1.0798625946044922
2 1.0907138991979892 2 1.0907139778137207
select id,vec_distance_euclidean(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5; select id,vec_distance_euclidean(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
id d id d
21 0.43559180321379337 21 0.43559178709983826
20 0.6435053022072372 20 0.643505334854126
6 0.6942000623336242 6 0.6942000389099121
2 0.7971622099055623 2 0.7971622347831726
9 0.8298589136476077 9 0.8298588991165161
drop table t1, t2; drop table t1, t2;
...@@ -215,3 +215,23 @@ String *Item_func_vec_fromtext::val_str(String *buf) ...@@ -215,3 +215,23 @@ String *Item_func_vec_fromtext::val_str(String *buf)
null_value= true; null_value= true;
return nullptr; return nullptr;
} }
#include <eigen3/Eigen/Dense>
using namespace Eigen;
double Item_func_vec_distance_euclidean::
calc_distance(float *data1, float *data2, size_t d_len)
{
Map<VectorXf> v1(data1, d_len);
Map<VectorXf> v2(data2, d_len);
return (v1-v2).norm();
}
double Item_func_vec_distance_cosine::
calc_distance(float *data1, float *data2, size_t d_len)
{
Map<VectorXf> v1(data1, d_len);
Map<VectorXf> v2(data2, d_len);
return 1.0f - v1.dot(v2)/v1.norm()/v2.norm();
}
...@@ -61,16 +61,7 @@ class Item_func_vec_distance_common: public Item_real_func ...@@ -61,16 +61,7 @@ class Item_func_vec_distance_common: public Item_real_func
class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common
{ {
double calc_distance(float *v1, float *v2, size_t v_len) override double calc_distance(float *v1, float *v2, size_t v_len) override;
{
double d= 0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
float dist= *v1 - *v2;
d+= dist * dist;
}
return sqrt(d);
}
public: public:
Item_func_vec_distance_euclidean(THD *thd, Item *a, Item *b) Item_func_vec_distance_euclidean(THD *thd, Item *a, Item *b)
...@@ -87,17 +78,7 @@ class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common ...@@ -87,17 +78,7 @@ class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common
class Item_func_vec_distance_cosine: public Item_func_vec_distance_common class Item_func_vec_distance_cosine: public Item_func_vec_distance_common
{ {
double calc_distance(float *v1, float *v2, size_t v_len) override double calc_distance(float *v1, float *v2, size_t v_len) override;
{
double dotp=0, abs1=0, abs2=0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
abs1+= *v1 * *v1;
abs2+= *v2 * *v2;
dotp+= *v1 * *v2;
}
return 1 - dotp/sqrt(abs1*abs2);
}
public: public:
Item_func_vec_distance_cosine(THD *thd, Item *a, Item *b) Item_func_vec_distance_cosine(THD *thd, Item *a, Item *b)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment