Commit c943dfd8 authored by Sergey Petrunya's avatar Sergey Petrunya

MDEV-494, part #1: phantom row for big full-scan selects

- Full table scan internally uses LIMIT n, and re-starts the scan from
  the last seen rowkey value.  rowkey ranges are inclusive, so we will
  see the same rowkey again. We should ignore it.
parent 869826d7
......@@ -166,3 +166,20 @@ truncate table t1;
select * from t1;
rowkey a
drop table t1;
#
# MDEV-494, part #1: phantom row for big full-scan selects
#
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE TABLE t1 (rowkey BIGINT PRIMARY KEY, a BIGINT) ENGINE=CASSANDRA
thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cf2';
insert into t1 select A.a + 10 * B.a + 100*C.a, 12345 from t0 A, t0 B, t0 C;
select count(*) from t1;
count(*)
1000
select count(*) from t1 where a=12345;
count(*)
1000
delete from t1;
drop table t1;
drop table t0;
......@@ -172,6 +172,23 @@ truncate table t1;
select * from t1;
drop table t1;
--echo #
--echo # MDEV-494, part #1: phantom row for big full-scan selects
--echo #
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE TABLE t1 (rowkey BIGINT PRIMARY KEY, a BIGINT) ENGINE=CASSANDRA
thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cf2';
insert into t1 select A.a + 10 * B.a + 100*C.a, 12345 from t0 A, t0 B, t0 C;
select count(*) from t1;
select count(*) from t1 where a=12345;
delete from t1;
drop table t1;
drop table t0;
############################################################################
## Cassandra cleanup
############################################################################
......
......@@ -93,6 +93,10 @@ class Cassandra_se_impl: public Cassandra_se_interface
void get_read_rowkey(char **value, int *value_len);
/* Reads, multi-row scans */
private:
bool have_rowkey_to_skip;
std::string rowkey_to_skip;
public:
bool get_range_slices(bool last_key_as_start_key);
void finish_reading_range_slices();
bool get_next_range_slice_row(bool *eof);
......@@ -106,15 +110,17 @@ class Cassandra_se_impl: public Cassandra_se_interface
int add_lookup_key(const char *key, size_t key_len);
bool multiget_slice();
private:
std::vector<std::string> mrr_keys; /* TODO: can we use allocator to put them onto MRR buffer? */
std::map<std::string, std::vector<ColumnOrSuperColumn> > mrr_result;
std::map<std::string, std::vector<ColumnOrSuperColumn> >::iterator mrr_result_it;
public:
bool get_next_multiget_row();
bool truncate();
bool remove_row();
private:
/* Non-inherited utility functions: */
int64_t get_i64_timestamp();
};
......@@ -407,9 +413,17 @@ bool Cassandra_se_impl::get_range_slices(bool last_key_as_start_key)
key_range.__isset.end_key= true;
if (last_key_as_start_key)
{
key_range.start_key= rowkey;
have_rowkey_to_skip= true;
rowkey_to_skip= rowkey;
}
else
{
have_rowkey_to_skip= false;
key_range.start_key.assign("", 0);
}
key_range.end_key.assign("", 0);
key_range.count= read_batch_size;
......@@ -441,6 +455,7 @@ bool Cassandra_se_impl::get_range_slices(bool last_key_as_start_key)
/* Switch to next row. This may produce an error */
bool Cassandra_se_impl::get_next_range_slice_row(bool *eof)
{
restart:
if (key_slice_it == key_slice_vec.end())
{
if (get_slices_returned_less)
......@@ -462,7 +477,13 @@ bool Cassandra_se_impl::get_next_range_slice_row(bool *eof)
return false;
}
}
if (have_rowkey_to_skip && !rowkey_to_skip.compare(key_slice_it->key))
{
key_slice_it++;
goto restart;
}
*eof= false;
column_data_vec= key_slice_it->columns;
rowkey= key_slice_it->key;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment