Commit 312569e2 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-21132 Remove buf_page_t::newest_modification

At each mini-transaction commit, the log sequence number of the
mini-transaction must be written to each modified page, so that
it will be available in the FIL_PAGE_LSN field when the page is
being read in crash recovery.

InnoDB was unnecessarily allocating redundant storage for the
field, in buf_page_t::newest_modification. Let us access
FIL_PAGE_LSN directly.

Furthermore, on ALTER TABLE...IMPORT TABLESPACE, let us write
0 to FIL_PAGE_LSN instead of using log_sys.lsn.

buf_flush_init_for_writing(), buf_flush_update_zip_checksum(),
fil_encrypt_buf_for_full_crc32(), fil_encrypt_buf(),
fil_space_encrypt(): Remove the parameter lsn.

buf_page_get_newest_modification(): Merge with the only caller.

buf_tmp_reserve_compression_buf(), buf_tmp_page_encrypt(),
buf_page_encrypt(): Define static in the same compilation unit
with the only caller.

PageConverter::m_current_lsn: Remove. Write 0 to FIL_PAGE_LSN
on ALTER TABLE...IMPORT TABLESPACE.
parent 777b3996
......@@ -4585,7 +4585,7 @@ xb_space_create_file(
if (!zip_size) {
buf_flush_init_for_writing(
NULL, page, NULL, 0,
NULL, page, NULL,
fil_space_t::full_crc32(flags));
ret = os_file_write(IORequestWrite, path, *file, page, 0,
......@@ -4602,7 +4602,7 @@ xb_space_create_file(
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
buf_flush_init_for_writing(NULL, page, &page_zip, 0, false);
buf_flush_init_for_writing(NULL, page, &page_zip, false);
ret = os_file_write(IORequestWrite, path, *file,
page_zip.data, 0, zip_size);
......
This diff is collapsed.
This diff is collapsed.
......@@ -678,26 +678,25 @@ static byte* fil_encrypt_buf_for_full_crc32(
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in] zip_size ROW_FORMAT=COMPRESSED
page size, or 0
@param[in,out] dst_frame Output buffer
@param[in] use_full_checksum full crc32 algo is used
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_encrypt_buf(
byte* fil_encrypt_buf(
fil_space_crypt_t* crypt_data,
ulint space,
ulint offset,
lsn_t lsn,
const byte* src_frame,
ulint zip_size,
byte* dst_frame,
bool use_full_checksum)
{
const lsn_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
if (use_full_checksum) {
ut_ad(!zip_size);
return fil_encrypt_buf_for_full_crc32(
crypt_data, space, offset,
lsn, src_frame, dst_frame);
......@@ -732,16 +731,12 @@ Encrypt a page
@param[in] space Tablespace
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_space_encrypt(
byte* fil_space_encrypt(
const fil_space_t* space,
ulint offset,
lsn_t lsn,
byte* src_frame,
byte* dst_frame)
{
......@@ -759,7 +754,7 @@ fil_space_encrypt(
const bool full_crc32 = space->full_crc32();
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn,
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset,
src_frame, zip_size, dst_frame,
full_crc32);
......@@ -1994,8 +1989,8 @@ fil_crypt_rotate_page(
&sleeptime_ms)) {
bool modified = false;
int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
lsn_t block_lsn = block->page.newest_modification;
byte* frame = buf_block_get_frame(block);
const lsn_t block_lsn = mach_read_from_8(FIL_PAGE_LSN + frame);
uint kv = buf_page_get_key_version(frame, space->flags);
if (space->is_stopping()) {
......
......@@ -3036,12 +3036,12 @@ fil_ibd_create(
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
buf_flush_init_for_writing(NULL, page, &page_zip, 0, false);
buf_flush_init_for_writing(NULL, page, &page_zip, false);
*err = os_file_write(
IORequestWrite, path, file, page_zip.data, 0, zip_size);
} else {
buf_flush_init_for_writing(NULL, page, NULL, 0,
buf_flush_init_for_writing(NULL, page, NULL,
fil_space_t::full_crc32(flags));
*err = os_file_write(
......
......@@ -4255,8 +4255,6 @@ i_s_innodb_buffer_page_get_info(
page_info->fix_count = bpage->buf_fix_count;
page_info->newest_mod = bpage->newest_modification;
page_info->oldest_mod = bpage->oldest_modification;
page_info->access_time = bpage->access_time;
......@@ -4276,6 +4274,7 @@ i_s_innodb_buffer_page_get_info(
break;
case BUF_IO_READ:
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
page_info->newest_mod = 0;
return;
}
......@@ -4296,6 +4295,7 @@ i_s_innodb_buffer_page_get_info(
frame = bpage->zip.data;
}
page_info->newest_mod = mach_read_from_8(FIL_PAGE_LSN + frame);
i_s_innodb_set_page_type(page_info, frame);
} else {
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
......
......@@ -540,16 +540,6 @@ inline void buf_page_make_young_if_needed(buf_pool_t* buf_pool,
}
}
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage); /*!< in: block containing the
page frame */
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
......@@ -708,6 +698,34 @@ inline uint buf_page_full_crc32_size(const byte* buf, bool* comp, bool* cr)
}
#ifndef UNIV_INNOCHECKSUM
# ifdef UNIV_LINUX
# include <stdlib.h>
# endif
inline void* aligned_malloc(size_t size, size_t align)
{
#ifdef _MSC_VER
return _aligned_malloc(size, align);
#elif defined HAVE_POSIX_MEMALIGN
void *result;
if (posix_memalign(&result, align, size))
result= NULL;
return result;
#else
/* Use unaligned malloc as fallback */
return malloc(size);
#endif
}
inline void aligned_free(void *ptr)
{
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
......@@ -1363,30 +1381,10 @@ bool buf_page_verify_crypt_checksum(
const byte* page,
ulint fsp_flags);
/** Calculate the checksum of a page from compressed table and update the
page.
@param[in,out] page page to update
@param[in] size compressed page size
@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
buf_frame_t* page,
ulint size,
lsn_t lsn);
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@param[in,out] space tablespace
@param[in,out] bpage buffer page
@param[in] src_frame physical page frame that is being encrypted
@return page frame to be written to file
(may be src_frame or an encrypted/compressed copy of it) */
UNIV_INTERN
byte*
buf_page_encrypt(
fil_space_t* space,
buf_page_t* bpage,
byte* src_frame);
/** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
@param[in,out] page page to update
@param[in] size compressed page size */
void buf_flush_update_zip_checksum(buf_frame_t* page, ulint size);
/** @brief The temporary memory structure.
......@@ -1420,6 +1418,15 @@ class buf_tmp_buffer_t {
{
return !reserved.exchange(true, std::memory_order_relaxed);
}
/** Allocate a buffer for encryption, decryption or decompression. */
void allocate()
{
if (!crypt_buf) {
crypt_buf= static_cast<byte*>(
aligned_malloc(srv_page_size, srv_page_size));
}
}
};
/** The common buffer control block structure
......@@ -1548,12 +1555,6 @@ class buf_page_t {
FlushObserver* flush_observer; /*!< flush observer */
lsn_t newest_modification;
/*!< log sequence number of
the youngest modification to
this block, zero if not
modified. Protected by block
mutex */
lsn_t oldest_modification;
/*!< log sequence number of
the START of the log entry
......@@ -2233,7 +2234,15 @@ struct buf_pool_t{
memset((void*) slots, 0, n_slots * sizeof *slots);
}
~io_buf_t();
~io_buf_t()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots;
s != e; s++) {
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
}
/** Reserve a buffer */
buf_tmp_buffer_t* reserve()
......
......@@ -864,33 +864,6 @@ buf_frame_copy(
return(buf);
}
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage) /*!< in: block containing the
page frame */
{
lsn_t lsn;
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (buf_page_in_file(bpage)) {
lsn = bpage->newest_modification;
} else {
lsn = 0;
}
mutex_exit(block_mutex);
return(lsn);
}
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
......
......@@ -85,14 +85,12 @@ void buf_flush_assign_full_crc32_checksum(byte* page);
@param[in] block buffer block; NULL if bypassing the buffer pool
@param[in,out] page page frame
@param[in,out] page_zip_ compressed page, or NULL if uncompressed
@param[in] newest_lsn newest modification LSN to the page
@param[in] use_full_checksum whether tablespace uses full checksum */
void
buf_flush_init_for_writing(
const buf_block_t* block,
byte* page,
void* page_zip_,
lsn_t newest_lsn,
bool use_full_checksum);
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
......
......@@ -57,8 +57,15 @@ buf_flush_note_modification(
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn;
ut_ad(mach_read_from_8(block->frame + FIL_PAGE_LSN) <= end_lsn);
mach_write_to_8(block->frame + FIL_PAGE_LSN, end_lsn);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
compile_time_assert(FIL_PAGE_LSN % 8 == 0);
*reinterpret_cast<uint64_t*>(FIL_PAGE_LSN
+ block->page.zip.data)
= *reinterpret_cast<const uint64_t*>(FIL_PAGE_LSN
+ block->frame);
}
/* Don't allow to set flush observer from non-null to null,
or from one observer to another. */
......
......@@ -312,7 +312,6 @@ fil_parse_write_crypt_data(
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] dst_frame Output buffer
......@@ -324,7 +323,6 @@ fil_encrypt_buf(
fil_space_crypt_t* crypt_data,
ulint space,
ulint offset,
lsn_t lsn,
const byte* src_frame,
ulint zip_size,
byte* dst_frame,
......@@ -336,16 +334,12 @@ Encrypt a page.
@param[in] space Tablespace
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_space_encrypt(
byte* fil_space_encrypt(
const fil_space_t* space,
ulint offset,
lsn_t lsn,
byte* src_frame,
byte* dst_frame)
MY_ATTRIBUTE((warn_unused_result));
......
......@@ -1876,13 +1876,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
page = block->frame;
page_zip = buf_block_get_page_zip(block);
/* The page may have been modified in the buffer pool.
FIL_PAGE_LSN would only be updated right before flushing. */
lsn_t page_lsn = buf_page_get_newest_modification(&block->page);
if (!page_lsn) {
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
}
const lsn_t page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
bool free_page = false;
lsn_t start_lsn = 0, end_lsn = 0;
ut_d(lsn_t recv_start_lsn = 0);
......
......@@ -796,14 +796,12 @@ class PageConverter : public AbstractCallback {
AbstractCallback(trx, space_id),
m_cfg(cfg),
m_index(cfg->m_indexes),
m_current_lsn(log_get_lsn()),
m_page_zip_ptr(0),
m_rec_iter(),
m_offsets_(), m_offsets(m_offsets_),
m_heap(0),
m_cluster_index(dict_table_get_first_index(cfg->m_table))
{
ut_ad(m_current_lsn);
rec_offs_init(m_offsets_);
}
......@@ -906,9 +904,6 @@ class PageConverter : public AbstractCallback {
/** Current index whose pages are being imported */
row_index_t* m_index;
/** Current system LSN */
lsn_t m_current_lsn;
/** Alias for m_page_zip, only set for compressed pages. */
page_zip_des_t* m_page_zip_ptr;
......@@ -1921,9 +1916,7 @@ PageConverter::update_header(
ib::warn() << "Space id check in the header failed: ignored";
}
mach_write_to_8(
get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
m_current_lsn);
memset(get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,0,8);
/* Write back the adjusted flags. */
mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
......@@ -2036,20 +2029,22 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
}
const bool full_crc32 = fil_space_t::full_crc32(get_space_flags());
byte* frame = get_frame(block);
compile_time_assert(FIL_PAGE_LSN % 8 == 0);
*reinterpret_cast<uint64_t*>(frame + FIL_PAGE_LSN)= 0;
if (!block->page.zip.data) {
buf_flush_init_for_writing(
NULL, block->frame, NULL, m_current_lsn, full_crc32);
NULL, block->frame, NULL, full_crc32);
} else if (fil_page_type_is_index(page_type)) {
buf_flush_init_for_writing(
NULL, block->page.zip.data, &block->page.zip,
m_current_lsn, full_crc32);
full_crc32);
} else {
/* Calculate and update the checksum of non-index
pages for ROW_FORMAT=COMPRESSED tables. */
buf_flush_update_zip_checksum(
block->page.zip.data, block->zip_size(),
m_current_lsn);
block->page.zip.data, block->zip_size());
}
return DB_SUCCESS;
......@@ -3554,7 +3549,6 @@ fil_iterate(
iter.crypt_data,
block->page.id.space(),
block->page.id.page_no(),
mach_read_from_8(src + FIL_PAGE_LSN),
src, block->zip_size(), dest,
full_crc32);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment