Commit 518f9cb7 authored by Sergei Golubchik's avatar Sergei Golubchik

if we require Eigen, we can as well use it everywhere

it's measurably faster even in items
parent 78bd693a
......@@ -108,14 +108,14 @@ id hex(v)
flush tables;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
9 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
select id,vec_distance_euclidean(x'b047263C9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
9 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
select id>0,vec_distance_euclidean(v, NULL) d from t1 order by d limit 3;
id>0 d
1 NULL
......@@ -138,149 +138,149 @@ id1 id2 vec_distance_euclidean(t1.v, t2.v)
8 8 0
9 9 0
10 10 0
7 10 0.35209010323904116
10 7 0.35209010323904116
1 7 0.557267332724855
7 1 0.557267332724855
2 3 0.6065128837978769
3 2 0.6065128837978769
1 3 0.6128238020507096
3 1 0.6128238020507096
5 8 0.6219995745138945
8 5 0.6219995745138945
3 10 0.6523185662547816
10 3 0.6523185662547816
9 10 0.6732681362788765
10 9 0.6732681362788765
3 7 0.6799892416547949
7 3 0.6799892416547949
3 9 0.6820752294088018
9 3 0.6820752294088018
2 10 0.6916305331777215
10 2 0.6916305331777215
2 9 0.6966650510789955
9 2 0.6966650510789955
3 6 0.7102823580937639
6 3 0.7102823580937639
2 7 0.7120217580666971
7 2 0.7120217580666971
2 6 0.7351618106552689
6 2 0.7351618106552689
1 10 0.7386864491588024
10 1 0.7386864491588024
4 6 0.7784357824370262
6 4 0.7784357824370262
4 8 0.7795837407361241
8 4 0.7795837407361241
4 5 0.8132007346697969
5 4 0.8132007346697969
2 4 0.8260925223296488
4 2 0.8260925223296488
5 10 0.8286488932765299
10 5 0.8286488932765299
5 9 0.8769351333060768
9 5 0.8769351333060768
1 6 0.8861410875047832
6 1 0.8861410875047832
3 5 0.9224201772876247
5 3 0.9224201772876247
4 7 0.9347916246876117
7 4 0.9347916246876117
7 9 0.9364253407685257
9 7 0.9364253407685257
3 4 0.9757105842688992
4 3 0.9757105842688992
1 2 0.9810272439433514
2 1 0.9810272439433514
1 4 0.9965475544626712
4 1 0.9965475544626712
5 7 0.9976863778073342
7 5 0.9976863778073342
4 10 1.0109345944029724
10 4 1.0109345944029724
1 5 1.0208359400987237
5 1 1.0208359400987237
6 7 1.0221332668982412
7 6 1.0221332668982412
2 5 1.050769316594881
5 2 1.050769316594881
6 8 1.103420381318026
8 6 1.103420381318026
3 8 1.1170300826294572
8 3 1.1170300826294572
6 10 1.1523451990991307
10 6 1.1523451990991307
1 9 1.1637750565139302
9 1 1.1637750565139302
2 8 1.1736571017573874
8 2 1.1736571017573874
4 9 1.1746893942711878
9 4 1.1746893942711878
1 8 1.1909959973982214
8 1 1.1909959973982214
8 10 1.209359617652948
10 8 1.209359617652948
6 9 1.214529873940304
9 6 1.214529873940304
5 6 1.227278506501395
6 5 1.227278506501395
8 9 1.2575258643523053
9 8 1.2575258643523053
7 8 1.288239696195716
8 7 1.288239696195716
7 10 0.35209009051322937
10 7 0.35209009051322937
1 7 0.5572673082351685
7 1 0.5572673082351685
2 3 0.6065129041671753
3 2 0.6065129041671753
1 3 0.6128237843513489
3 1 0.6128237843513489
5 8 0.6219995617866516
8 5 0.6219995617866516
3 10 0.65231853723526
10 3 0.65231853723526
9 10 0.6732681393623352
10 9 0.6732681393623352
3 7 0.679989218711853
7 3 0.679989218711853
3 9 0.6820752024650574
9 3 0.6820752024650574
2 10 0.6916305422782898
10 2 0.6916305422782898
2 9 0.6966650485992432
9 2 0.6966650485992432
3 6 0.7102823853492737
6 3 0.7102823853492737
2 7 0.7120217680931091
7 2 0.7120217680931091
2 6 0.7351617813110352
6 2 0.7351617813110352
1 10 0.7386864423751831
10 1 0.7386864423751831
4 6 0.7784357666969299
6 4 0.7784357666969299
4 8 0.779583752155304
8 4 0.779583752155304
4 5 0.8132007122039795
5 4 0.8132007122039795
2 4 0.826092541217804
4 2 0.826092541217804
5 10 0.8286488652229309
10 5 0.8286488652229309
5 9 0.8769351243972778
9 5 0.8769351243972778
1 6 0.8861410617828369
6 1 0.8861410617828369
3 5 0.9224202036857605
5 3 0.9224202036857605
4 7 0.934791624546051
7 4 0.934791624546051
7 9 0.9364253282546997
9 7 0.9364253282546997
3 4 0.9757105708122253
4 3 0.9757105708122253
1 2 0.9810272455215454
2 1 0.9810272455215454
1 4 0.9965475797653198
4 1 0.9965475797653198
5 7 0.9976863861083984
7 5 0.9976863861083984
4 10 1.010934591293335
10 4 1.010934591293335
1 5 1.0208359956741333
5 1 1.0208359956741333
6 7 1.022133231163025
7 6 1.022133231163025
2 5 1.050769329071045
5 2 1.050769329071045
6 8 1.103420376777649
8 6 1.103420376777649
3 8 1.1170300245285034
8 3 1.1170300245285034
6 10 1.1523451805114746
10 6 1.1523451805114746
1 9 1.163775086402893
9 1 1.163775086402893
2 8 1.1736570596694946
8 2 1.1736570596694946
4 9 1.1746894121170044
9 4 1.1746894121170044
1 8 1.1909960508346558
8 1 1.1909960508346558
8 10 1.2093596458435059
10 8 1.2093596458435059
6 9 1.2145298719406128
9 6 1.2145298719406128
5 6 1.2272785902023315
6 5 1.2272785902023315
8 9 1.2575258016586304
9 8 1.2575258016586304
7 8 1.288239598274231
8 7 1.288239598274231
flush session status;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
9 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 0
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
9 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 11
flush session status;
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
10 0.05905546376032378
9 0.06546887818344715
3 0.10750282439505232
10 0.059055447578430176
9 0.06546902656555176
3 0.10750287771224976
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 11
delete from t1 where v = x'7b713f3e5258323f80d1113d673b2b3f66e3583f';
select id,vec_distance_euclidean(v, x'B047263C9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
10 0.5069011044450041
3 0.5865673124650332
7 0.7344464697214867
10 0.5069010853767395
3 0.5865673422813416
7 0.7344464659690857
insert t1 (v) values (x'7b713f3e5258323f80d1113d673b2b3f66e3583f');
select id,vec_distance_euclidean(v, x'b047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
11 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
11 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
select id,vec_distance_euclidean(v, x'B047263c9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
11 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
7 0.7344464697214867
5 0.7671033529042712
11 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
7 0.7344464659690857
5 0.7671033143997192
update t1 set v=x'76EDFC3E4B57243F10F8423FB158713F020BAA3E' where v=x'6CA1D43E9DF91B3FE580DA3E1C247D3F147CF33E';
select id,vec_distance_euclidean(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
11 0.4719976290006591
3 0.5865673124650332
7 0.7344464697214867
10 0.746836719209219
5 0.7671033529042712
11 0.47199761867523193
3 0.5865673422813416
7 0.7344464659690857
10 0.7468367218971252
5 0.7671033143997192
delete from t1;
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
......@@ -294,11 +294,11 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance_euclidean(v, x'b047263c9f87233Fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 5;
id d
20 0.4719976290006591
21 0.5069011044450041
14 0.5865673124650332
18 0.7344464697214867
16 0.7671033529042712
20 0.47199761867523193
21 0.5069010853767395
14 0.5865673422813416
18 0.7344464659690857
16 0.7671033143997192
insert t1 (v) values ('');
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
insert t1 (v) values (x'1234');
......@@ -426,32 +426,32 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
(x'6ca1d43e9df91b3fe580da3e1c247d3f147cf33e');
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
10 0.05905546376032378
9 0.06546887818344715
3 0.10750282439505232
10 0.059055447578430176
9 0.06546902656555176
3 0.10750287771224976
flush session status;
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
10 0.05905546376032378
9 0.06546887818344715
3 0.10750282439505232
10 0.059055447578430176
9 0.06546902656555176
3 0.10750287771224976
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 0
select id,vec_distance_cosine(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 use index () order by d limit 3;
id d
10 0.05905546376032378
9 0.06546887818344715
3 0.10750282439505232
10 0.059055447578430176
9 0.06546902656555176
3 0.10750287771224976
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 11
flush session status;
select id,vec_distance_euclidean(v, x'B047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
id d
9 0.4719976290006591
10 0.5069011044450041
3 0.5865673124650332
9 0.47199761867523193
10 0.5069010853767395
3 0.5865673422813416
show status like 'handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 11
......
......@@ -40,18 +40,18 @@ delete from t1 where id=7;
rollback;
select id,vec_distance_euclidean(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
id d
2 0.8781474260354732
10 0.8856208347761952
30 1.0162643974895857
7 1.026397313888122
5 1.0308161006949719
2 0.8781474828720093
10 0.8856208324432373
30 1.0162643194198608
7 1.0263972282409668
5 1.0308160781860352
select id,vec_distance_euclidean(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
id d
2 0.9426904171992334
33 0.9477554826856
30 1.111405427702547
10 1.118630286292343
8 1.1405733350751739
2 0.9426904320716858
33 0.9477554559707642
30 1.1114054918289185
10 1.118630290031433
8 1.140573263168335
create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
insert t2 (v) values
(x'45cf153f830a313f7a0a113fb1ff533f47a1533fcf9e6e3f'),
......@@ -73,16 +73,16 @@ insert t2 values
commit;
select id,vec_distance_euclidean(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
id d
6 0.9309383181777582
5 0.9706304662574956
30 0.98144492002831
50 1.079862635421575
2 1.0907138991979892
6 0.9309383034706116
5 0.9706304669380188
30 0.9814448952674866
50 1.0798625946044922
2 1.0907139778137207
select id,vec_distance_euclidean(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
id d
21 0.43559180321379337
20 0.6435053022072372
6 0.6942000623336242
2 0.7971622099055623
9 0.8298589136476077
21 0.43559178709983826
20 0.643505334854126
6 0.6942000389099121
2 0.7971622347831726
9 0.8298588991165161
drop table t1, t2;
......@@ -215,3 +215,23 @@ String *Item_func_vec_fromtext::val_str(String *buf)
null_value= true;
return nullptr;
}
#include <eigen3/Eigen/Dense>
using namespace Eigen;
double Item_func_vec_distance_euclidean::
calc_distance(float *data1, float *data2, size_t d_len)
{
Map<VectorXf> v1(data1, d_len);
Map<VectorXf> v2(data2, d_len);
return (v1-v2).norm();
}
double Item_func_vec_distance_cosine::
calc_distance(float *data1, float *data2, size_t d_len)
{
Map<VectorXf> v1(data1, d_len);
Map<VectorXf> v2(data2, d_len);
return 1.0f - v1.dot(v2)/v1.norm()/v2.norm();
}
......@@ -61,16 +61,7 @@ class Item_func_vec_distance_common: public Item_real_func
class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common
{
double calc_distance(float *v1, float *v2, size_t v_len) override
{
double d= 0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
float dist= *v1 - *v2;
d+= dist * dist;
}
return sqrt(d);
}
double calc_distance(float *v1, float *v2, size_t v_len) override;
public:
Item_func_vec_distance_euclidean(THD *thd, Item *a, Item *b)
......@@ -87,17 +78,7 @@ class Item_func_vec_distance_euclidean: public Item_func_vec_distance_common
class Item_func_vec_distance_cosine: public Item_func_vec_distance_common
{
double calc_distance(float *v1, float *v2, size_t v_len) override
{
double dotp=0, abs1=0, abs2=0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
abs1+= *v1 * *v1;
abs2+= *v2 * *v2;
dotp+= *v1 * *v2;
}
return 1 - dotp/sqrt(abs1*abs2);
}
double calc_distance(float *v1, float *v2, size_t v_len) override;
public:
Item_func_vec_distance_cosine(THD *thd, Item *a, Item *b)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment