Commit e3661b9f authored by Sergei Petrunia's avatar Sergei Petrunia

Cherry-picked from MyRocks upstream: Issue #809: Wrong query result with bloom filters

  In reverse-ordered column families, if one wants to start reading at the
  logical end of the index, they should Seek() to a key value that is not
  covered by the index. This may (and typically does) prevent use of a bloom
  filter.
  The calls to setup_scan_iterator() that are made for index and table scan
  didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE.
  Fixed them to compute and pass correct eq_cond_len.

  Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds.
parent 39fbafbc
...@@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { ...@@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
DBUG_ASSERT(key != nullptr); DBUG_ASSERT(key != nullptr);
const Rdb_key_def &kd = *m_key_descr_arr[active_index]; const Rdb_key_def &kd = *m_key_descr_arr[active_index];
kd.get_first_key(key, &key_size); int key_start_matching_bytes = kd.get_first_key(key, &key_size);
rocksdb::Slice index_key((const char *)key, key_size); rocksdb::Slice index_key((const char *)key, key_size);
...@@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { ...@@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
// Loop as long as we get a deadlock error AND we end up creating the // Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this) // snapshot here (i.e. it did not exist prior to this)
for (;;) { for (;;) {
setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes);
m_scan_it->Seek(index_key); m_scan_it->Seek(index_key);
m_skip_scan_it_next_call = true; m_skip_scan_it_next_call = true;
...@@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, ...@@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd,
const rocksdb::Slice &eq_cond) { const rocksdb::Slice &eq_cond) {
uint eq_cond_len = eq_cond.size(); uint eq_cond_len = eq_cond.size();
DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE);
memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len);
kd.successor(m_eq_cond_upper_bound, eq_cond_len); kd.successor(m_eq_cond_upper_bound, eq_cond_len);
memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len);
...@@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() { ...@@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() {
void ha_rocksdb::setup_iterator_for_rnd_scan() { void ha_rocksdb::setup_iterator_for_rnd_scan() {
uint key_size; uint key_size;
m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
setup_scan_iterator(*m_pk_descr, &table_key, false, setup_scan_iterator(*m_pk_descr, &table_key, false,
Rdb_key_def::INDEX_NUMBER_SIZE); key_start_matching_bytes);
m_scan_it->Seek(table_key); m_scan_it->Seek(table_key);
m_skip_scan_it_next_call = true; m_skip_scan_it_next_call = true;
} }
......
#
# Issue #809: Wrong query result with bloom filters
#
create table t1 (
id1 bigint not null,
id2 bigint not null,
id3 varchar(100) not null,
id4 int not null,
id5 int not null,
value bigint,
value2 varchar(100),
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
) engine=ROCKSDB;
create table t2(a int);
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t3(seq int);
insert into t3
select
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
from t2 A, t2 B, t2 C, t2 D;
insert t1
select
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
from t3;
set global rocksdb_force_flush_memtable_now=1;
# Full table scan
explain
select * from t1 limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000
select * from t1 limit 10;
id1 id2 id3 id4 id5 value value2
1000 2000 2000 10000 10000 1000 aaabbbccc
1000 2000 2000 9999 9999 1000 aaabbbccc
1000 2000 2000 9998 9998 1000 aaabbbccc
1000 2000 2000 9997 9997 1000 aaabbbccc
1000 2000 2000 9996 9996 1000 aaabbbccc
1000 1999 1999 9995 9995 1000 aaabbbccc
1000 1999 1999 9994 9994 1000 aaabbbccc
1000 1999 1999 9993 9993 1000 aaabbbccc
1000 1999 1999 9992 9992 1000 aaabbbccc
1000 1999 1999 9991 9991 1000 aaabbbccc
# An index scan starting from the end of the table:
explain
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 122 NULL 1
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
id1 id2 id3 id4 id5 value value2
1000 2000 2000 10000 10000 1000 aaabbbccc
create table t4 (
pk int unsigned not null primary key,
kp1 int unsigned not null,
kp2 int unsigned not null,
col1 int unsigned,
key(kp1, kp2) comment 'rev:bf5_2'
) engine=rocksdb;
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
# This must not fail an assert:
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
pk kp1 kp2 col1
drop table t1,t2,t3,t4;
--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}};
--echo #
--echo # Issue #809: Wrong query result with bloom filters
--echo #
create table t1 (
id1 bigint not null,
id2 bigint not null,
id3 varchar(100) not null,
id4 int not null,
id5 int not null,
value bigint,
value2 varchar(100),
primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1'
) engine=ROCKSDB;
create table t2(a int);
insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t3(seq int);
insert into t3
select
1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000
from t2 A, t2 B, t2 C, t2 D;
insert t1
select
(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
from t3;
set global rocksdb_force_flush_memtable_now=1;
--echo # Full table scan
explain
select * from t1 limit 10;
select * from t1 limit 10;
--echo # An index scan starting from the end of the table:
explain
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
# A testcase for an assertion that the fix is removing
# The only requirement for the used column family is that it is reverse-ordered
create table t4 (
pk int unsigned not null primary key,
kp1 int unsigned not null,
kp2 int unsigned not null,
col1 int unsigned,
key(kp1, kp2) comment 'rev:bf5_2'
) engine=rocksdb;
insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
--echo # This must not fail an assert:
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
drop table t1,t2,t3,t4;
...@@ -238,12 +238,28 @@ class Rdb_key_def { ...@@ -238,12 +238,28 @@ class Rdb_key_def {
*size = INDEX_NUMBER_SIZE; *size = INDEX_NUMBER_SIZE;
} }
/* Get the first key that you need to position at to start iterating. /*
Returns a "supremum" or "infimum" for this index based on collation order Get the first key that you need to position at to start iterating.
Stores into *key a "supremum" or "infimum" key value for the index.
@return Number of bytes in the key that are usable for bloom filter use.
*/ */
inline void get_first_key(uchar *const key, uint *const size) const { inline int get_first_key(uchar *const key, uint *const size) const {
return m_is_reverse_cf ? get_supremum_key(key, size) if (m_is_reverse_cf)
: get_infimum_key(key, size); get_supremum_key(key, size);
else
get_infimum_key(key, size);
/* Find out how many bytes of infimum are the same as m_index_number */
uchar unmodified_key[INDEX_NUMBER_SIZE];
rdb_netbuf_store_index(unmodified_key, m_index_number);
int i;
for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
if (key[i] != unmodified_key[i])
break;
}
return i;
} }
/* Make a key that is right after the given key. */ /* Make a key that is right after the given key. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment