Commit 138cbec5 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-21724: Optimize page_cur_insert_low() redo logging

Inserting a record into an index page involves updating multiple
fields in the page header as well as updating the next-record links
and potentially updating fields related to the sparse page directory.

Let us cover the insert operations by higher-level log records, to avoid
'redundant' logging about the writes.

The code for applying the high-level log records will check the
consistency of the page thoroughly, to avoid crashes during recovery.
We will refuse to replay the inserts if any inconsistency is detected.
With innodb_force_recovery=1, recovery will continue, but the affected
pages may be more inconsistent if some changes were omitted.

mrec_ext_t: Introduce the EXTENDED record subtypes
INSERT_HEAP_REDUNDANT, INSERT_REUSE_REDUNDANT,
INSERT_HEAP_DYNAMIC, INSERT_REUSE_DYNAMIC.
The record will explicitly identify the page type and whether
the space will be allocated from PAGE_HEAP_TOP or reused from
the PAGE_FREE list. It will also tell how many bytes to copy
from the preceding record header and payload, and how to
initialize the rest of the record header and payload.

mtr_t::page_insert(): Write the high-level log records.

log_phys_t::apply(): Parse the high-level log records.

page_apply_insert_redundant(), page_apply_insert_dynamic():
Apply the high-level log records.

page_dir_split_slot(): Introduce a variant that does not write log
nor deal with ROW_FORMAT=COMPRESSED pages.

page_mem_alloc_heap(): Remove the mtr_t parameter

page_cur_insert_rec_low(): Write log only via mtr_t::page_insert().
parent dee6fb35
......@@ -494,6 +494,45 @@ struct mtr_t {
@param block B-tree or R-tree page
@param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
inline void page_create(const buf_block_t &block, bool comp);
/** Write log for inserting a B-tree or R-tree record in
ROW_FORMAT=REDUNDANT.
@param block B-tree or R-tree page
@param reuse false=allocate from PAGE_HEAP_TOP; true=reuse PAGE_FREE
@param prev_rec byte offset of the predecessor of the record to insert,
starting from PAGE_OLD_INFIMUM
@param info_bits info_bits of the record
@param n_fields_s number of fields << 1 | rec_get_1byte_offs_flag()
@param hdr_c number of common record header bytes with prev_rec
@param data_c number of common data bytes with prev_rec
@param hdr record header bytes to copy to the log
@param hdr_l number of copied record header bytes
@param data record payload bytes to copy to the log
@param data_l number of copied record data bytes */
inline void page_insert(const buf_block_t &block, bool reuse,
ulint prev_rec, byte info_bits,
ulint n_fields_s, size_t hdr_c, size_t data_c,
const byte *hdr, size_t hdr_l,
const byte *data, size_t data_l);
/** Write log for inserting a B-tree or R-tree record in
ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC.
@param block B-tree or R-tree page
@param reuse false=allocate from PAGE_HEAP_TOP; true=reuse PAGE_FREE
@param prev_rec byte offset of the predecessor of the record to insert,
starting from PAGE_NEW_INFIMUM
@param info_status rec_get_info_and_status_bits()
@param shift unless !reuse: number of bytes the PAGE_FREE is moving
@param hdr_c number of common record header bytes with prev_rec
@param data_c number of common data bytes with prev_rec
@param hdr record header bytes to copy to the log
@param hdr_l number of copied record header bytes
@param data record payload bytes to copy to the log
@param data_l number of copied record data bytes */
inline void page_insert(const buf_block_t &block, bool reuse,
ulint prev_rec, byte info_status,
ssize_t shift, size_t hdr_c, size_t data_c,
const byte *hdr, size_t hdr_l,
const byte *data, size_t data_l);
/** Write log for deleting a B-tree or R-tree record in ROW_FORMAT=REDUNDANT.
@param block B-tree or R-tree page
@param prev_rec byte offset of the predecessor of the record to delete,
......
......@@ -263,6 +263,18 @@ enum mrec_ext_t
This is equivalent to the old MLOG_UNDO_INSERT record.
The current byte offset will be reset to FIL_PAGE_TYPE. */
UNDO_APPEND= 3,
/** Insert a ROW_FORMAT=REDUNDANT record, extending PAGE_HEAP_TOP.
The current byte offset will be reset to FIL_PAGE_TYPE. */
INSERT_HEAP_REDUNDANT= 4,
/** Insert a ROW_FORMAT=REDUNDANT record, reusing PAGE_FREE.
The current byte offset will be reset to FIL_PAGE_TYPE. */
INSERT_REUSE_REDUNDANT= 5,
/** Insert a ROW_FORMAT=COMPACT or DYNAMIC record, extending PAGE_HEAP_TOP.
The current byte offset will be reset to FIL_PAGE_TYPE. */
INSERT_HEAP_DYNAMIC= 6,
/** Insert a ROW_FORMAT=COMPACT or DYNAMIC record, reusing PAGE_FREE.
The current byte offset will be reset to FIL_PAGE_TYPE. */
INSERT_REUSE_DYNAMIC= 7,
/** Delete a record on a ROW_FORMAT=REDUNDANT page.
We point to the precedessor of the record to be deleted.
The current byte offset will be reset to FIL_PAGE_TYPE.
......
......@@ -201,6 +201,39 @@ page_cur_delete_rec(
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/** Apply a INSERT_HEAP_REDUNDANT or INSERT_REUSE_REDUNDANT record that was
written by page_cur_insert_rec_low() for a ROW_FORMAT=REDUNDANT page.
@param block B-tree or R-tree page in ROW_FORMAT=COMPACT or DYNAMIC
@param reuse false=allocate from PAGE_HEAP_TOP; true=reuse PAGE_FREE
@param prev byte offset of the predecessor, relative to PAGE_NEW_INFIMUM
@param enc_hdr encoded fixed-size header bits
@param hdr_c number of common record header bytes with prev
@param data_c number of common data bytes with prev
@param data literal header and data bytes
@param data_len length of the literal data, in bytes
@return whether the operation failed (inconcistency was noticed) */
bool page_apply_insert_redundant(const buf_block_t &block, bool reuse,
ulint prev, ulint enc_hdr,
size_t hdr_c, size_t data_c,
const void *data, size_t data_len);
/** Apply a INSERT_HEAP_DYNAMIC or INSERT_REUSE_DYNAMIC record that was
written by page_cur_insert_rec_low() for a ROW_FORMAT=COMPACT or DYNAMIC page.
@param block B-tree or R-tree page in ROW_FORMAT=COMPACT or DYNAMIC
@param reuse false=allocate from PAGE_HEAP_TOP; true=reuse PAGE_FREE
@param prev byte offset of the predecessor, relative to PAGE_NEW_INFIMUM
@param shift unless !reuse: number of bytes the PAGE_FREE is moving
@param enc_hdr_l number of copied record header bytes, plus record type bits
@param hdr_c number of common record header bytes with prev
@param data_c number of common data bytes with prev
@param data literal header and data bytes
@param data_len length of the literal data, in bytes
@return whether the operation failed (inconcistency was noticed) */
bool page_apply_insert_dynamic(const buf_block_t &block, bool reuse,
ulint prev, ulint shift, ulint enc_hdr_l,
size_t hdr_c, size_t data_c,
const void *data, size_t data_len);
/** Apply a DELETE_ROW_FORMAT_REDUNDANT record that was written by
page_cur_delete_rec() for a ROW_FORMAT=REDUNDANT page.
@param block B-tree or R-tree page in ROW_FORMAT=REDUNDANT
......
......@@ -291,8 +291,9 @@ struct log_phys_t : public log_rec_t
static_assert(INIT_ROW_FORMAT_DYNAMIC == 1, "compatibility");
if (UNIV_UNLIKELY(!rlen))
goto record_corrupted;
switch (*l) {
switch (const byte subtype= *l) {
uint8_t ll;
size_t prev_rec, hdr_size;
default:
goto record_corrupted;
case INIT_ROW_FORMAT_REDUNDANT:
......@@ -317,6 +318,90 @@ struct log_phys_t : public log_rec_t
return applied;
}
break;
case INSERT_HEAP_REDUNDANT:
case INSERT_REUSE_REDUNDANT:
case INSERT_HEAP_DYNAMIC:
case INSERT_REUSE_DYNAMIC:
if (UNIV_UNLIKELY(rlen < 2))
goto record_corrupted;
rlen--;
ll= mlog_decode_varint_length(*++l);
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
prev_rec= mlog_decode_varint(l);
ut_ad(prev_rec != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
static_assert(INSERT_HEAP_REDUNDANT == 4, "compatibility");
static_assert(INSERT_REUSE_REDUNDANT == 5, "compatibility");
static_assert(INSERT_HEAP_DYNAMIC == 6, "compatibility");
static_assert(INSERT_REUSE_DYNAMIC == 7, "compatibility");
if (subtype & 2)
{
size_t shift= 0;
if (subtype & 1)
{
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
shift= mlog_decode_varint(l);
ut_ad(shift != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
}
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
size_t enc_hdr_l= mlog_decode_varint(l);
ut_ad(enc_hdr_l != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t hdr_c= mlog_decode_varint(l);
ut_ad(hdr_c != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
size_t data_c= mlog_decode_varint(l);
ut_ad(data_c != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
if (page_apply_insert_dynamic(block, subtype & 1, prev_rec,
shift, enc_hdr_l, hdr_c, data_c,
l, rlen) && !srv_force_recovery)
goto page_corrupted;
}
else
{
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t header= mlog_decode_varint(l);
ut_ad(header != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t hdr_c= mlog_decode_varint(l);
ut_ad(hdr_c != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t data_c= mlog_decode_varint(l);
rlen-= ll;
l+= ll;
if (page_apply_insert_redundant(block, subtype & 1, prev_rec,
header, hdr_c, data_c,
l, rlen) && !srv_force_recovery)
goto page_corrupted;
}
break;
case DELETE_ROW_FORMAT_REDUNDANT:
if (UNIV_UNLIKELY(rlen < 2 || rlen > 4))
goto record_corrupted;
......@@ -335,14 +420,14 @@ struct log_phys_t : public log_rec_t
ll= mlog_decode_varint_length(*++l);
if (UNIV_UNLIKELY(ll > 3 || ll >= rlen))
goto record_corrupted;
size_t prev_rec= mlog_decode_varint(l);
prev_rec= mlog_decode_varint(l);
ut_ad(prev_rec != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
ll= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(ll > 2 || ll >= rlen))
goto record_corrupted;
size_t hdr_size= mlog_decode_varint(l);
hdr_size= mlog_decode_varint(l);
ut_ad(hdr_size != MLOG_DECODE_ERROR);
rlen-= ll;
l+= ll;
......@@ -350,7 +435,7 @@ struct log_phys_t : public log_rec_t
if (UNIV_UNLIKELY(ll > 3 || ll != rlen))
goto record_corrupted;
if (page_apply_delete_dynamic(block, prev_rec, hdr_size,
mlog_decode_varint(l)) &&
mlog_decode_varint(l)) &&
!srv_force_recovery)
goto page_corrupted;
break;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment