Commit 62084feb authored by Marko Mäkelä's avatar Marko Mäkelä

Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED

columns

When the server crashes after a record stub has been inserted and
before all its off-page columns have been written, the record will
contain incomplete off-page columns after crash recovery. Such records
may only be accessed at the READ UNCOMMITTED isolation level or when
rolling back a recovered transaction in recv_recovery_rollback_active().
Skip these records at the READ UNCOMMITTED isolation level.

TODO: Add assertions for checking the above assumptions hold when an
incomplete BLOB is encountered.

btr_rec_copy_externally_stored_field(): Return NULL if the field is
incomplete.

row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this
context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs.

row_sel_store_mysql_rec(): Return FALSE if not all columns could be
retrieved. Previously this function always returned TRUE.  Assert that
the record is not delete-marked.

row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns
could be retrieved.

row_search_for_mysql(): Skip records containing incomplete off-page
columns. Assert that the transaction isolation level is READ
UNCOMMITTED.

rb://380 approved by Jimmy Yang
parent 1b31b3a3
...@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field( ...@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field(
/*******************************************************************//** /*******************************************************************//**
Copies an externally stored field of a record to mem heap. Copies an externally stored field of a record to mem heap.
@return the field copied to heap */ @return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN UNIV_INTERN
byte* byte*
btr_rec_copy_externally_stored_field( btr_rec_copy_externally_stored_field(
...@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field( ...@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field(
data = rec_get_nth_field(rec, offsets, no, &local_len); data = rec_get_nth_field(rec, offsets, no, &local_len);
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
if (UNIV_UNLIKELY
(!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
/* The externally stored field was not written
yet. This is only a valid condition when the server
crashed after the time a record stub was freshly
inserted but before all its columns were written. This
record should only be seen by
recv_recovery_rollback_active() or any
TRX_ISO_READ_UNCOMMITTED transactions. */
/* TODO: assert that there is an owner_trx with
owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
/* TODO: assert that for the current transaction trx,
either (trx == owner_trx && trx_is_recv(trx)) or
trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
return(NULL);
}
return(btr_copy_externally_stored_field(len, data, return(btr_copy_externally_stored_field(len, data,
zip_size, local_len, heap)); zip_size, local_len, heap));
} }
......
...@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix( ...@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix(
ulint local_len);/*!< in: length of data, in bytes */ ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//** /*******************************************************************//**
Copies an externally stored field of a record to mem heap. Copies an externally stored field of a record to mem heap.
@return the field copied to heap */ @return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN UNIV_INTERN
byte* byte*
btr_rec_copy_externally_stored_field( btr_rec_copy_externally_stored_field(
......
...@@ -622,7 +622,11 @@ struct row_prebuilt_struct { ...@@ -622,7 +622,11 @@ struct row_prebuilt_struct {
the secondary index, then this is the secondary index, then this is
set to TRUE */ set to TRUE */
unsigned templ_contains_blob:1;/*!< TRUE if the template contains unsigned templ_contains_blob:1;/*!< TRUE if the template contains
BLOB column(s) */ a column with DATA_BLOB ==
get_innobase_type_from_mysql_type();
not to be confused with InnoDB
externally stored columns
(VARCHAR can be off-page too) */
mysql_row_templ_t* mysql_template;/*!< template used to transform mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase rows fast between MySQL and Innobase
formats; memory for this template formats; memory for this template
......
...@@ -1780,6 +1780,11 @@ row_merge_copy_blobs( ...@@ -1780,6 +1780,11 @@ row_merge_copy_blobs(
(below). */ (below). */
data = btr_rec_copy_externally_stored_field( data = btr_rec_copy_externally_stored_field(
mrec, offsets, zip_size, i, &len, heap); mrec, offsets, zip_size, i, &len, heap);
/* Because we have locked the table, any records
written by incomplete transactions must have been
rolled back already. There must not be any incomplete
BLOB columns. */
ut_a(data);
dfield_set_data(field, data, len); dfield_set_data(field, data, len);
} }
......
...@@ -416,7 +416,7 @@ row_sel_fetch_columns( ...@@ -416,7 +416,7 @@ row_sel_fetch_columns(
field_no))) { field_no))) {
/* Copy an externally stored field to the /* Copy an externally stored field to the
temporary heap */ temporary heap, if possible. */
heap = mem_heap_create(1); heap = mem_heap_create(1);
...@@ -425,6 +425,21 @@ row_sel_fetch_columns( ...@@ -425,6 +425,21 @@ row_sel_fetch_columns(
dict_table_zip_size(index->table), dict_table_zip_size(index->table),
field_no, &len, heap); field_no, &len, heap);
/* data == NULL means that the
externally stored field was not
written yet. This is only a valid
condition when the server crashed
after the time a record stub was
freshly inserted but before all its
columns were written. This record
should only be seen by
recv_recovery_rollback_active() or any
TRX_ISO_READ_UNCOMMITTED
transactions. The InnoDB SQL parser
(the sole caller of this function)
does not implement READ UNCOMMITTED,
and it is not involved during rollback. */
ut_a(data);
ut_a(len != UNIV_SQL_NULL); ut_a(len != UNIV_SQL_NULL);
needs_copy = TRUE; needs_copy = TRUE;
...@@ -926,6 +941,7 @@ row_sel_get_clust_rec( ...@@ -926,6 +941,7 @@ row_sel_get_clust_rec(
when plan->clust_pcur was positioned. The latch will not be when plan->clust_pcur was positioned. The latch will not be
released until mtr_commit(mtr). */ released until mtr_commit(mtr). */
ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
row_sel_fetch_columns(index, clust_rec, offsets, row_sel_fetch_columns(index, clust_rec, offsets,
UT_LIST_GET_FIRST(plan->columns)); UT_LIST_GET_FIRST(plan->columns));
*out_rec = clust_rec; *out_rec = clust_rec;
...@@ -1628,6 +1644,13 @@ row_sel( ...@@ -1628,6 +1644,13 @@ row_sel(
} }
if (old_vers == NULL) { if (old_vers == NULL) {
/* The record does not exist
in our read view. Skip it, but
first attempt to determine
whether the index segment we
are searching through has been
exhausted. */
offsets = rec_get_offsets( offsets = rec_get_offsets(
rec, index, offsets, rec, index, offsets,
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
...@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to a row in the MySQL format. ...@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query. be needed in the query.
@return TRUE if success, FALSE if could not allocate memory for a BLOB @return TRUE on success, FALSE if not all columns could be retrieved */
(though we may also assert in that case) */ static __attribute__((warn_unused_result))
static
ibool ibool
row_sel_store_mysql_rec( row_sel_store_mysql_rec(
/*====================*/ /*====================*/
...@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec( ...@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec(
ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->mysql_template);
ut_ad(prebuilt->default_rec); ut_ad(prebuilt->default_rec);
ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
mem_heap_free(prebuilt->blob_heap); mem_heap_free(prebuilt->blob_heap);
...@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec( ...@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec(
dict_table_zip_size(prebuilt->table), dict_table_zip_size(prebuilt->table),
templ->rec_field_no, &len, heap); templ->rec_field_no, &len, heap);
if (UNIV_UNLIKELY(!data)) {
/* The externally stored field
was not written yet. This is
only a valid condition when
the server crashed after the
time a record stub was freshly
inserted but before all its
columns were written. This
record should only be seen by
recv_recovery_rollback_active()
or any TRX_ISO_READ_UNCOMMITTED
transactions. */
if (extern_field_heap) {
mem_heap_free(extern_field_heap);
}
return(FALSE);
}
ut_a(len != UNIV_SQL_NULL); ut_a(len != UNIV_SQL_NULL);
} else { } else {
/* Field is stored in the row. */ /* Field is stored in the row. */
...@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql( ...@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql(
} }
/********************************************************************//** /********************************************************************//**
Pushes a row for MySQL to the fetch cache. */ Pushes a row for MySQL to the fetch cache.
UNIV_INLINE @return TRUE on success, FALSE if the record contains incomplete BLOBs */
void UNIV_INLINE __attribute__((warn_unused_result))
ibool
row_sel_push_cache_row_for_mysql( row_sel_push_cache_row_for_mysql(
/*=============================*/ /*=============================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
...@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql( ...@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql(
prebuilt->fetch_cache[ prebuilt->fetch_cache[
prebuilt->n_fetch_cached], prebuilt->n_fetch_cached],
prebuilt, rec, offsets))) { prebuilt, rec, offsets))) {
ut_error; return(FALSE);
} }
prebuilt->n_fetch_cached++; prebuilt->n_fetch_cached++;
return(TRUE);
} }
/*********************************************************************//** /*********************************************************************//**
...@@ -3578,6 +3623,24 @@ row_search_for_mysql( ...@@ -3578,6 +3623,24 @@ row_search_for_mysql(
if (!row_sel_store_mysql_rec(buf, prebuilt, if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) { rec, offsets)) {
/* Only fresh inserts at
server crash time may contain
incomplete externally stored
columns. Pretend that such
records do not exist. Such
records may only be accessed
at the READ UNCOMMITTED
isolation level or when
rolling back a recovered
transaction. Rollback happens
at a lower level, not here. */
ut_a(trx->isolation_level
== TRX_ISO_READ_UNCOMMITTED);
/* TODO: assert that there is
an owner_trx with
owner_trx->id == DB_TRX_ID and
owner_trx->is_recovered */
err = DB_TOO_BIG_RECORD; err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the /* We let the main loop to do the
...@@ -4357,9 +4420,20 @@ row_search_for_mysql( ...@@ -4357,9 +4420,20 @@ row_search_for_mysql(
not cache rows because there the cursor is a scrollable not cache rows because there the cursor is a scrollable
cursor. */ cursor. */
row_sel_push_cache_row_for_mysql(prebuilt, result_rec, if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
offsets); offsets)) {
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { /* Only fresh inserts at server crash time may contain
incomplete externally stored columns. Pretend that
such records do not exist. Such records may only be
accessed at the READ UNCOMMITTED isolation level or
when rolling back a recovered transaction. Rollback
happens at a lower level, not here. */
ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
/* TODO: assert that there is an owner_trx
with owner_trx->id == DB_TRX_ID and
owner_trx->is_recovered */
} else if (prebuilt->n_fetch_cached
== MYSQL_FETCH_CACHE_SIZE) {
goto got_row; goto got_row;
} }
...@@ -4375,9 +4449,20 @@ row_search_for_mysql( ...@@ -4375,9 +4449,20 @@ row_search_for_mysql(
} else { } else {
if (!row_sel_store_mysql_rec(buf, prebuilt, if (!row_sel_store_mysql_rec(buf, prebuilt,
result_rec, offsets)) { result_rec, offsets)) {
err = DB_TOO_BIG_RECORD; /* Only fresh inserts at server crash
time may contain incomplete externally
goto lock_wait_or_error; stored columns. Pretend that such
records do not exist. Such records may
only be accessed at the READ UNCOMMITTED
isolation level or when rolling back a
recovered transaction. Rollback happens
at a lower level, not here. */
ut_a(trx->isolation_level
== TRX_ISO_READ_UNCOMMITTED);
/* TODO: assert that there is an owner_trx
with owner_trx->id == DB_TRX_ID and
owner_trx->is_recovered */
goto next_rec;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment