Commit 312569e2 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-21132 Remove buf_page_t::newest_modification

At each mini-transaction commit, the log sequence number of the
mini-transaction must be written to each modified page, so that
it will be available in the FIL_PAGE_LSN field when the page is
being read in crash recovery.

InnoDB was unnecessarily allocating redundant storage for the
field, in buf_page_t::newest_modification. Let us access
FIL_PAGE_LSN directly.

Furthermore, on ALTER TABLE...IMPORT TABLESPACE, let us write
0 to FIL_PAGE_LSN instead of using log_sys.lsn.

buf_flush_init_for_writing(), buf_flush_update_zip_checksum(),
fil_encrypt_buf_for_full_crc32(), fil_encrypt_buf(),
fil_space_encrypt(): Remove the parameter lsn.

buf_page_get_newest_modification(): Merge with the only caller.

buf_tmp_reserve_compression_buf(), buf_tmp_page_encrypt(),
buf_page_encrypt(): Define static in the same compilation unit
with the only caller.

PageConverter::m_current_lsn: Remove. Write 0 to FIL_PAGE_LSN
on ALTER TABLE...IMPORT TABLESPACE.
parent 777b3996
......@@ -4585,7 +4585,7 @@ xb_space_create_file(
if (!zip_size) {
buf_flush_init_for_writing(
NULL, page, NULL, 0,
NULL, page, NULL,
fil_space_t::full_crc32(flags));
ret = os_file_write(IORequestWrite, path, *file, page, 0,
......@@ -4602,7 +4602,7 @@ xb_space_create_file(
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
buf_flush_init_for_writing(NULL, page, &page_zip, 0, false);
buf_flush_init_for_writing(NULL, page, &page_zip, false);
ret = os_file_write(IORequestWrite, path, *file,
page_zip.data, 0, zip_size);
......
......@@ -67,14 +67,6 @@ Created 11/5/1995 Heikki Tuuri
#include <map>
#include <sstream>
#ifdef UNIV_LINUX
#include <stdlib.h>
#endif
#ifdef HAVE_LZO
#include "lzo/lzo1x.h"
#endif
#ifdef HAVE_LIBNUMA
#include <numa.h>
#include <numaif.h>
......@@ -118,44 +110,6 @@ struct set_numa_interleave_t
#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE
#endif /* HAVE_LIBNUMA */
#ifdef HAVE_SNAPPY
#include "snappy-c.h"
#endif
#ifndef UNIV_INNOCHECKSUM
inline void* aligned_malloc(size_t size, size_t align) {
void *result;
#ifdef _MSC_VER
result = _aligned_malloc(size, align);
#elif defined (HAVE_POSIX_MEMALIGN)
if(posix_memalign(&result, align, size)) {
result = 0;
}
#else
/* Use unaligned malloc as fallback */
result = malloc(size);
#endif
return result;
}
inline void aligned_free(void *ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
buf_pool_t::io_buf_t::~io_buf_t()
{
for (buf_tmp_buffer_t* s = slots, *e = slots + n_slots; s != e; s++) {
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
}
#endif /* !UNIV_INNOCHECKSUM */
/*
IMPLEMENTATION OF THE BUFFER POOL
=================================
......@@ -423,45 +377,6 @@ on the io_type */
: (counter##_WRITTEN))
/** Reserve a buffer slot for encryption, decryption or page compression.
@param[in,out] buf_pool buffer pool
@return reserved buffer slot */
static buf_tmp_buffer_t* buf_pool_reserve_tmp_slot(buf_pool_t* buf_pool)
{
buf_tmp_buffer_t* slot = buf_pool->io_buf.reserve();
ut_a(slot);
return slot;
}
/** Reserve a buffer for encryption, decryption or decompression.
@param[in,out] slot reserved slot */
static void buf_tmp_reserve_crypt_buf(buf_tmp_buffer_t* slot)
{
if (!slot->crypt_buf) {
slot->crypt_buf = static_cast<byte*>(
aligned_malloc(srv_page_size, srv_page_size));
}
}
/** Reserve a buffer for compression.
@param[in,out] slot reserved slot */
static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot)
{
if (!slot->comp_buf) {
/* Both snappy and lzo compression methods require that
output buffer used for compression is bigger than input
buffer. Increase the allocated buffer size accordingly. */
ulint size = srv_page_size;
#ifdef HAVE_LZO
size += LZO1X_1_15_MEM_COMPRESS;
#elif defined HAVE_SNAPPY
size = snappy_max_compressed_length(size);
#endif
slot->comp_buf = static_cast<byte*>(
aligned_malloc(size, srv_page_size));
}
}
/** Registers a chunk to buf_pool_chunk_map
@param[in] chunk chunk of buffers */
static
......@@ -534,8 +449,9 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
if (space->purpose == FIL_TYPE_TEMPORARY
&& innodb_encrypt_temporary_tables) {
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool);
buf_tmp_reserve_crypt_buf(slot);
buf_tmp_buffer_t* slot = buf_pool->io_buf.reserve();
ut_a(slot);
slot->allocate();
if (!buf_tmp_page_decrypt(slot->crypt_buf, dst_frame)) {
slot->release();
......@@ -564,9 +480,9 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
return false;
}
slot = buf_pool_reserve_tmp_slot(buf_pool);
/* For decompression, use crypt_buf. */
buf_tmp_reserve_crypt_buf(slot);
slot = buf_pool->io_buf.reserve();
ut_a(slot);
slot->allocate();
decompress_with_slot:
ut_d(fil_page_type_validate(space, dst_frame));
......@@ -595,10 +511,9 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
return false;
}
/* Find free slot from temporary memory array */
slot = buf_pool_reserve_tmp_slot(buf_pool);
buf_tmp_reserve_crypt_buf(slot);
slot = buf_pool->io_buf.reserve();
ut_a(slot);
slot->allocate();
ut_d(fil_page_type_validate(space, dst_frame));
/* decrypt using crypt_buf to dst_frame */
......@@ -1816,8 +1731,6 @@ buf_chunk_not_freed(
/* The page cleaner is disabled in
read-only mode. No pages can be
dirtied, so all of them must be clean. */
ut_ad(block->page.oldest_modification
== block->page.newest_modification);
ut_ad(block->page.oldest_modification == 0
|| block->page.oldest_modification
== recv_sys.recovered_lsn
......@@ -3544,7 +3457,6 @@ buf_pool_watch_set(
bpage = &buf_pool->watch[i];
ut_ad(bpage->access_time == 0);
ut_ad(bpage->newest_modification == 0);
ut_ad(bpage->oldest_modification == 0);
ut_ad(bpage->zip.data == NULL);
ut_ad(!bpage->in_zip_hash);
......@@ -5123,7 +5035,6 @@ buf_page_init_low(
bpage->old = 0;
bpage->freed_page_clock = 0;
bpage->access_time = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
bpage->write_size = 0;
bpage->real_size = 0;
......@@ -7254,197 +7165,6 @@ operator<<(
return(out);
}
/** Encrypt a buffer of temporary tablespace
@param[in] offset Page offset
@param[in] src_frame Page to encrypt
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
static byte* buf_tmp_page_encrypt(
ulint offset,
byte* src_frame,
byte* dst_frame)
{
/* Calculate the start offset in a page */
uint srclen = srv_page_size - (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ FIL_PAGE_FCRC32_CHECKSUM);
const byte* src = src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
byte* dst = dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
memcpy(dst_frame, src_frame, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
if (!log_tmp_block_encrypt(src, srclen, dst, (offset * srv_page_size),
true)) {
return NULL;
}
const ulint payload = srv_page_size - FIL_PAGE_FCRC32_CHECKSUM;
mach_write_to_4(dst_frame + payload, ut_crc32(dst_frame, payload));
srv_stats.pages_encrypted.inc();
srv_stats.n_temp_blocks_encrypted.inc();
return dst_frame;
}
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@param[in,out] space tablespace
@param[in,out] bpage buffer page
@param[in] src_frame physical page frame that is being encrypted
@return page frame to be written to file
(may be src_frame or an encrypted/compressed copy of it) */
UNIV_INTERN
byte*
buf_page_encrypt(
fil_space_t* space,
buf_page_t* bpage,
byte* src_frame)
{
ut_ad(space->id == bpage->id.space());
bpage->real_size = srv_page_size;
ut_d(fil_page_type_validate(space, src_frame));
switch (bpage->id.page_no()) {
case 0:
/* Page 0 of a tablespace is not encrypted/compressed */
return src_frame;
case TRX_SYS_PAGE_NO:
if (bpage->id.space() == TRX_SYS_SPACE) {
/* don't encrypt/compress page as it contains
address to dblwr buffer */
return src_frame;
}
}
fil_space_crypt_t* crypt_data = space->crypt_data;
bool encrypted, page_compressed;
if (space->purpose == FIL_TYPE_TEMPORARY) {
ut_ad(!crypt_data);
encrypted = innodb_encrypt_temporary_tables;
page_compressed = false;
} else {
encrypted = crypt_data
&& !crypt_data->not_encrypted()
&& crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
&& (!crypt_data->is_default_encryption()
|| srv_encrypt_tables);
page_compressed = space->is_compressed();
}
if (!encrypted && !page_compressed) {
/* No need to encrypt or page compress the page.
Clear key-version & crypt-checksum. */
if (space->full_crc32()) {
memset(src_frame + FIL_PAGE_FCRC32_KEY_VERSION, 0, 4);
} else {
memset(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
0, 8);
}
return src_frame;
}
ut_ad(!bpage->zip_size() || !page_compressed);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
/* Find free slot from temporary memory array */
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool);
slot->out_buf = NULL;
bpage->slot = slot;
buf_tmp_reserve_crypt_buf(slot);
byte *dst_frame = slot->crypt_buf;
const bool full_crc32 = space->full_crc32();
if (full_crc32) {
/* Write LSN for the full crc32 checksum before
encryption. Because lsn is one of the input for encryption. */
mach_write_to_8(src_frame + FIL_PAGE_LSN,
bpage->newest_modification);
if (!page_compressed) {
mach_write_to_4(
src_frame + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
(ulint) bpage->newest_modification);
}
}
if (!page_compressed) {
not_compressed:
byte* tmp;
if (space->purpose == FIL_TYPE_TEMPORARY) {
/* Encrypt temporary tablespace page content */
tmp = buf_tmp_page_encrypt(bpage->id.page_no(),
src_frame, dst_frame);
} else {
/* Encrypt page content */
tmp = fil_space_encrypt(
space, bpage->id.page_no(),
bpage->newest_modification,
src_frame, dst_frame);
}
bpage->real_size = srv_page_size;
slot->out_buf = dst_frame = tmp;
ut_d(fil_page_type_validate(space, tmp));
} else {
ut_ad(space->purpose != FIL_TYPE_TEMPORARY);
/* First we compress the page content */
buf_tmp_reserve_compression_buf(slot);
byte* tmp = slot->comp_buf;
ulint out_len = fil_page_compress(
src_frame, tmp, space->flags,
fil_space_get_block_size(space, bpage->id.page_no()),
encrypted);
if (!out_len) {
goto not_compressed;
}
bpage->real_size = out_len;
if (full_crc32) {
ut_d(bool compressed = false);
out_len = buf_page_full_crc32_size(tmp,
#ifdef UNIV_DEBUG
&compressed,
#else
NULL,
#endif
NULL);
ut_ad(compressed);
}
/* Workaround for MDEV-15527. */
memset(tmp + out_len, 0 , srv_page_size - out_len);
ut_d(fil_page_type_validate(space, tmp));
if (encrypted) {
/* And then we encrypt the page content */
tmp = fil_space_encrypt(space,
bpage->id.page_no(),
bpage->newest_modification,
tmp,
dst_frame);
}
if (full_crc32) {
compile_time_assert(FIL_PAGE_FCRC32_CHECKSUM == 4);
mach_write_to_4(tmp + out_len - 4,
ut_crc32(tmp, out_len - 4));
ut_ad(!buf_page_is_corrupted(true, tmp, space->flags));
}
slot->out_buf = dst_frame = tmp;
}
ut_d(fil_page_type_validate(space, dst_frame));
// return dst_frame which will be written
return dst_frame;
}
/**
Should we punch hole to deallocate unused portion of the page.
@param[in] bpage Page control block
......
......@@ -42,6 +42,7 @@ Created 11/11/1995 Heikki Tuuri
#include "buf0rea.h"
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "log0crypt.h"
#include "os0file.h"
#include "trx0sys.h"
#include "srv0mon.h"
......@@ -55,6 +56,13 @@ Created 11/11/1995 Heikki Tuuri
#include <sys/resource.h>
static const int buf_flush_page_cleaner_priority = -20;
#endif /* UNIV_LINUX */
#ifdef HAVE_LZO
#include "lzo/lzo1x.h"
#endif
#ifdef HAVE_SNAPPY
#include "snappy-c.h"
#endif
/** Sleep time in microseconds for loop waiting for the oldest
modification lsn */
......@@ -732,25 +740,16 @@ void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
}
}
/** Calculate the checksum of a page from compressed table and update
the page.
/** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
@param[in,out] page page to update
@param[in] size compressed page size
@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
buf_frame_t* page,
ulint size,
lsn_t lsn)
@param[in] size compressed page size */
void buf_flush_update_zip_checksum(buf_frame_t *page, ulint size)
{
ut_a(size > 0);
const uint32_t checksum = page_zip_calc_checksum(
page, size,
static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
mach_write_to_8(page + FIL_PAGE_LSN, lsn);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
ut_ad(size > 0);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
page_zip_calc_checksum(page, size,
static_cast<srv_checksum_algorithm_t>
(srv_checksum_algorithm)));
}
/** Assign the full crc32 checksum for non-compressed page.
......@@ -774,14 +773,12 @@ void buf_flush_assign_full_crc32_checksum(byte* page)
@param[in,out] page page frame
@param[in,out] page_zip_ compressed page, or NULL if
uncompressed
@param[in] newest_lsn newest modification LSN to the page
@param[in] use_full_checksum whether tablespace uses full checksum */
void
buf_flush_init_for_writing(
const buf_block_t* block,
byte* page,
void* page_zip_,
lsn_t newest_lsn,
bool use_full_checksum)
{
if (block != NULL && block->frame != page) {
......@@ -794,9 +791,7 @@ buf_flush_init_for_writing(
ut_ad(block == NULL || block->frame == page);
ut_ad(block == NULL || page_zip_ == NULL
|| &block->page.zip == page_zip_);
ut_ad(!block || newest_lsn);
ut_ad(page);
ut_ad(!newest_lsn || fil_page_get_type(page));
if (page_zip_) {
page_zip_des_t* page_zip;
......@@ -822,10 +817,7 @@ buf_flush_init_for_writing(
case FIL_PAGE_TYPE_ZBLOB2:
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
buf_flush_update_zip_checksum(
page_zip->data, size, newest_lsn);
buf_flush_update_zip_checksum(page_zip->data, size);
return;
}
......@@ -838,18 +830,15 @@ buf_flush_init_for_writing(
ut_error;
}
/* Write the newest modification lsn to the page header and trailer */
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
if (use_full_checksum) {
mach_write_to_4(page + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
static_cast<uint32_t>(newest_lsn));
memcpy(page + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
FIL_PAGE_LSN + 4 + page, 4);
} else {
mach_write_to_8(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
newest_lsn);
memcpy(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
FIL_PAGE_LSN + page, 8);
}
if (block && srv_page_size == 16384) {
if (block && !use_full_checksum && srv_page_size == 16384) {
/* The page type could be garbage in old files
created before MySQL 5.5. Such files always
had a page size of 16 kilobytes. */
......@@ -951,6 +940,179 @@ buf_flush_init_for_writing(
checksum);
}
/** Reserve a buffer for compression.
@param[in,out] slot reserved slot */
static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot)
{
if (slot->comp_buf)
return;
/* Both Snappy and LZO compression methods require that the output
buffer be bigger than input buffer. Adjust the allocated size. */
ulint size= srv_page_size;
#ifdef HAVE_LZO
size+= LZO1X_1_15_MEM_COMPRESS;
#elif defined HAVE_SNAPPY
size= snappy_max_compressed_length(size);
#endif
slot->comp_buf= static_cast<byte*>(aligned_malloc(size, srv_page_size));
}
/** Encrypt a buffer of temporary tablespace
@param[in] offset Page offset
@param[in] s Page to encrypt
@param[in,out] d Output buffer
@return encrypted buffer or NULL */
static byte* buf_tmp_page_encrypt(ulint offset, const byte* s, byte* d)
{
/* Calculate the start offset in a page */
uint srclen= srv_page_size - (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION +
FIL_PAGE_FCRC32_CHECKSUM);
const byte* src= s + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
byte* dst= d + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
memcpy(d, s, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
if (!log_tmp_block_encrypt(src, srclen, dst, (offset * srv_page_size), true))
return NULL;
const ulint payload= srv_page_size - FIL_PAGE_FCRC32_CHECKSUM;
mach_write_to_4(d + payload, ut_crc32(d, payload));
srv_stats.pages_encrypted.inc();
srv_stats.n_temp_blocks_encrypted.inc();
return d;
}
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@param[in,out] space tablespace
@param[in,out] bpage buffer page
@param[in] s physical page frame that is being encrypted
@return page frame to be written to file
(may be src_frame or an encrypted/compressed copy of it) */
static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
{
ut_ad(space->id == bpage->id.space());
bpage->real_size = srv_page_size;
ut_d(fil_page_type_validate(space, s));
switch (bpage->id.page_no()) {
case TRX_SYS_PAGE_NO:
if (bpage->id.space() != TRX_SYS_SPACE)
break;
/* The TRX_SYS page is neither encrypted nor compressed, because
it contains the address of the doublewrite buffer. */
/* fall through */
case 0:
/* Page 0 of a tablespace is not encrypted/compressed */
return s;
}
fil_space_crypt_t *crypt_data= space->crypt_data;
bool encrypted, page_compressed;
if (space->purpose == FIL_TYPE_TEMPORARY)
{
ut_ad(!crypt_data);
encrypted= innodb_encrypt_temporary_tables;
page_compressed= false;
}
else
{
encrypted= crypt_data && !crypt_data->not_encrypted() &&
crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
(!crypt_data->is_default_encryption() || srv_encrypt_tables);
page_compressed= space->is_compressed();
}
const bool full_crc32= space->full_crc32();
if (!encrypted && !page_compressed)
{
/* No need to encrypt or compress. Clear key-version & crypt-checksum. */
if (full_crc32)
memset(s + FIL_PAGE_FCRC32_KEY_VERSION, 0, 4);
else
memset(s + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
return s;
}
if (full_crc32)
memcpy(s + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
FIL_PAGE_LSN + 4 + s, 4);
ut_ad(!bpage->zip_size() || !page_compressed);
buf_pool_t *buf_pool= buf_pool_from_bpage(bpage);
/* Find free slot from temporary memory array */
buf_tmp_buffer_t *slot= buf_pool->io_buf.reserve();
ut_a(slot);
slot->allocate();
slot->out_buf= NULL;
bpage->slot= slot;
byte *d= slot->crypt_buf;
if (!page_compressed)
{
not_compressed:
byte *tmp= space->purpose == FIL_TYPE_TEMPORARY
? buf_tmp_page_encrypt(bpage->id.page_no(), s, d)
: fil_space_encrypt(space, bpage->id.page_no(), s, d);
slot->out_buf= d= tmp;
ut_d(fil_page_type_validate(space, tmp));
}
else
{
ut_ad(space->purpose != FIL_TYPE_TEMPORARY);
/* First we compress the page content */
buf_tmp_reserve_compression_buf(slot);
byte *tmp= slot->comp_buf;
ulint len= fil_page_compress(s, tmp, space->flags,
fil_space_get_block_size(space,
bpage->id.page_no()),
encrypted);
if (!len)
goto not_compressed;
bpage->real_size= len;
if (full_crc32)
{
ut_d(bool compressed = false);
len= buf_page_full_crc32_size(tmp,
#ifdef UNIV_DEBUG
&compressed,
#else
NULL,
#endif
NULL);
ut_ad(compressed);
}
/* Workaround for MDEV-15527. */
memset(tmp + len, 0 , srv_page_size - len);
ut_d(fil_page_type_validate(space, tmp));
if (encrypted)
tmp = fil_space_encrypt(space, bpage->id.page_no(), tmp, d);
if (full_crc32)
{
compile_time_assert(FIL_PAGE_FCRC32_CHECKSUM == 4);
mach_write_to_4(tmp + len - 4, ut_crc32(tmp, len - 4));
ut_ad(!buf_page_is_corrupted(true, tmp, space->flags));
}
slot->out_buf= d= tmp;
}
ut_d(fil_page_type_validate(space, d));
return d;
}
/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: when the
doublewrite buffer is used, we must call
......@@ -998,12 +1160,6 @@ buf_flush_write_block_low(
ut_ad(!buf_page_get_mutex(bpage)->is_owned());
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification != 0);
ut_ad(bpage->newest_modification != 0);
/* Force the log to the disk before writing the modified block */
if (!srv_read_only_mode) {
log_write_up_to(bpage->newest_modification, true);
}
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_POOL_WATCH:
......@@ -1016,10 +1172,6 @@ buf_flush_write_block_low(
break;
case BUF_BLOCK_ZIP_DIRTY:
frame = bpage->zip.data;
mach_write_to_8(frame + FIL_PAGE_LSN,
bpage->newest_modification);
ut_a(page_zip_verify_checksum(frame, bpage->zip_size()));
break;
case BUF_BLOCK_FILE_PAGE:
......@@ -1037,8 +1189,7 @@ buf_flush_write_block_low(
buf_flush_init_for_writing(
reinterpret_cast<const buf_block_t*>(bpage), page,
bpage->zip.data ? &bpage->zip : NULL,
bpage->newest_modification, full_crc32);
bpage->zip.data ? &bpage->zip : NULL, full_crc32);
break;
}
......@@ -1048,6 +1199,11 @@ buf_flush_write_block_low(
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|| space->atomic_write_supported);
if (space->purpose == FIL_TYPE_TABLESPACE) {
log_write_up_to(mach_read_from_8(frame + FIL_PAGE_LSN), true);
}
const bool use_doublewrite = !bpage->init_on_flush
&& space->use_doublewrite();
......
......@@ -678,26 +678,25 @@ static byte* fil_encrypt_buf_for_full_crc32(
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in] zip_size ROW_FORMAT=COMPRESSED
page size, or 0
@param[in,out] dst_frame Output buffer
@param[in] use_full_checksum full crc32 algo is used
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_encrypt_buf(
byte* fil_encrypt_buf(
fil_space_crypt_t* crypt_data,
ulint space,
ulint offset,
lsn_t lsn,
const byte* src_frame,
ulint zip_size,
byte* dst_frame,
bool use_full_checksum)
{
const lsn_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
if (use_full_checksum) {
ut_ad(!zip_size);
return fil_encrypt_buf_for_full_crc32(
crypt_data, space, offset,
lsn, src_frame, dst_frame);
......@@ -732,16 +731,12 @@ Encrypt a page
@param[in] space Tablespace
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_space_encrypt(
byte* fil_space_encrypt(
const fil_space_t* space,
ulint offset,
lsn_t lsn,
byte* src_frame,
byte* dst_frame)
{
......@@ -759,7 +754,7 @@ fil_space_encrypt(
const bool full_crc32 = space->full_crc32();
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn,
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset,
src_frame, zip_size, dst_frame,
full_crc32);
......@@ -1994,8 +1989,8 @@ fil_crypt_rotate_page(
&sleeptime_ms)) {
bool modified = false;
int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
lsn_t block_lsn = block->page.newest_modification;
byte* frame = buf_block_get_frame(block);
const lsn_t block_lsn = mach_read_from_8(FIL_PAGE_LSN + frame);
uint kv = buf_page_get_key_version(frame, space->flags);
if (space->is_stopping()) {
......
......@@ -3036,12 +3036,12 @@ fil_ibd_create(
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
buf_flush_init_for_writing(NULL, page, &page_zip, 0, false);
buf_flush_init_for_writing(NULL, page, &page_zip, false);
*err = os_file_write(
IORequestWrite, path, file, page_zip.data, 0, zip_size);
} else {
buf_flush_init_for_writing(NULL, page, NULL, 0,
buf_flush_init_for_writing(NULL, page, NULL,
fil_space_t::full_crc32(flags));
*err = os_file_write(
......
......@@ -4255,8 +4255,6 @@ i_s_innodb_buffer_page_get_info(
page_info->fix_count = bpage->buf_fix_count;
page_info->newest_mod = bpage->newest_modification;
page_info->oldest_mod = bpage->oldest_modification;
page_info->access_time = bpage->access_time;
......@@ -4276,6 +4274,7 @@ i_s_innodb_buffer_page_get_info(
break;
case BUF_IO_READ:
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
page_info->newest_mod = 0;
return;
}
......@@ -4296,6 +4295,7 @@ i_s_innodb_buffer_page_get_info(
frame = bpage->zip.data;
}
page_info->newest_mod = mach_read_from_8(FIL_PAGE_LSN + frame);
i_s_innodb_set_page_type(page_info, frame);
} else {
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
......
......@@ -540,16 +540,6 @@ inline void buf_page_make_young_if_needed(buf_pool_t* buf_pool,
}
}
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage); /*!< in: block containing the
page frame */
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
......@@ -708,6 +698,34 @@ inline uint buf_page_full_crc32_size(const byte* buf, bool* comp, bool* cr)
}
#ifndef UNIV_INNOCHECKSUM
# ifdef UNIV_LINUX
# include <stdlib.h>
# endif
inline void* aligned_malloc(size_t size, size_t align)
{
#ifdef _MSC_VER
return _aligned_malloc(size, align);
#elif defined HAVE_POSIX_MEMALIGN
void *result;
if (posix_memalign(&result, align, size))
result= NULL;
return result;
#else
/* Use unaligned malloc as fallback */
return malloc(size);
#endif
}
inline void aligned_free(void *ptr)
{
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
......@@ -1363,30 +1381,10 @@ bool buf_page_verify_crypt_checksum(
const byte* page,
ulint fsp_flags);
/** Calculate the checksum of a page from compressed table and update the
page.
@param[in,out] page page to update
@param[in] size compressed page size
@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
buf_frame_t* page,
ulint size,
lsn_t lsn);
/** Encryption and page_compression hook that is called just before
a page is written to disk.
@param[in,out] space tablespace
@param[in,out] bpage buffer page
@param[in] src_frame physical page frame that is being encrypted
@return page frame to be written to file
(may be src_frame or an encrypted/compressed copy of it) */
UNIV_INTERN
byte*
buf_page_encrypt(
fil_space_t* space,
buf_page_t* bpage,
byte* src_frame);
/** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
@param[in,out] page page to update
@param[in] size compressed page size */
void buf_flush_update_zip_checksum(buf_frame_t* page, ulint size);
/** @brief The temporary memory structure.
......@@ -1420,6 +1418,15 @@ class buf_tmp_buffer_t {
{
return !reserved.exchange(true, std::memory_order_relaxed);
}
/** Allocate a buffer for encryption, decryption or decompression. */
void allocate()
{
if (!crypt_buf) {
crypt_buf= static_cast<byte*>(
aligned_malloc(srv_page_size, srv_page_size));
}
}
};
/** The common buffer control block structure
......@@ -1548,12 +1555,6 @@ class buf_page_t {
FlushObserver* flush_observer; /*!< flush observer */
lsn_t newest_modification;
/*!< log sequence number of
the youngest modification to
this block, zero if not
modified. Protected by block
mutex */
lsn_t oldest_modification;
/*!< log sequence number of
the START of the log entry
......@@ -2233,7 +2234,15 @@ struct buf_pool_t{
memset((void*) slots, 0, n_slots * sizeof *slots);
}
~io_buf_t();
~io_buf_t()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots;
s != e; s++) {
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
}
/** Reserve a buffer */
buf_tmp_buffer_t* reserve()
......
......@@ -864,33 +864,6 @@ buf_frame_copy(
return(buf);
}
/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
Returns zero if not file page or no modification occurred yet.
@return newest modification to page */
UNIV_INLINE
lsn_t
buf_page_get_newest_modification(
/*=============================*/
const buf_page_t* bpage) /*!< in: block containing the
page frame */
{
lsn_t lsn;
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (buf_page_in_file(bpage)) {
lsn = bpage->newest_modification;
} else {
lsn = 0;
}
mutex_exit(block_mutex);
return(lsn);
}
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
......
......@@ -85,14 +85,12 @@ void buf_flush_assign_full_crc32_checksum(byte* page);
@param[in] block buffer block; NULL if bypassing the buffer pool
@param[in,out] page page frame
@param[in,out] page_zip_ compressed page, or NULL if uncompressed
@param[in] newest_lsn newest modification LSN to the page
@param[in] use_full_checksum whether tablespace uses full checksum */
void
buf_flush_init_for_writing(
const buf_block_t* block,
byte* page,
void* page_zip_,
lsn_t newest_lsn,
bool use_full_checksum);
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
......
......@@ -57,8 +57,15 @@ buf_flush_note_modification(
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn;
ut_ad(mach_read_from_8(block->frame + FIL_PAGE_LSN) <= end_lsn);
mach_write_to_8(block->frame + FIL_PAGE_LSN, end_lsn);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
compile_time_assert(FIL_PAGE_LSN % 8 == 0);
*reinterpret_cast<uint64_t*>(FIL_PAGE_LSN
+ block->page.zip.data)
= *reinterpret_cast<const uint64_t*>(FIL_PAGE_LSN
+ block->frame);
}
/* Don't allow to set flush observer from non-null to null,
or from one observer to another. */
......
......@@ -312,7 +312,6 @@ fil_parse_write_crypt_data(
@param[in,out] crypt_data Crypt data
@param[in] space space_id
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] dst_frame Output buffer
......@@ -324,7 +323,6 @@ fil_encrypt_buf(
fil_space_crypt_t* crypt_data,
ulint space,
ulint offset,
lsn_t lsn,
const byte* src_frame,
ulint zip_size,
byte* dst_frame,
......@@ -336,16 +334,12 @@ Encrypt a page.
@param[in] space Tablespace
@param[in] offset Page offset
@param[in] lsn Log sequence number
@param[in] src_frame Page to encrypt
@param[in,out] dst_frame Output buffer
@return encrypted buffer or NULL */
UNIV_INTERN
byte*
fil_space_encrypt(
byte* fil_space_encrypt(
const fil_space_t* space,
ulint offset,
lsn_t lsn,
byte* src_frame,
byte* dst_frame)
MY_ATTRIBUTE((warn_unused_result));
......
......@@ -1876,13 +1876,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
page = block->frame;
page_zip = buf_block_get_page_zip(block);
/* The page may have been modified in the buffer pool.
FIL_PAGE_LSN would only be updated right before flushing. */
lsn_t page_lsn = buf_page_get_newest_modification(&block->page);
if (!page_lsn) {
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
}
const lsn_t page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
bool free_page = false;
lsn_t start_lsn = 0, end_lsn = 0;
ut_d(lsn_t recv_start_lsn = 0);
......
......@@ -796,14 +796,12 @@ class PageConverter : public AbstractCallback {
AbstractCallback(trx, space_id),
m_cfg(cfg),
m_index(cfg->m_indexes),
m_current_lsn(log_get_lsn()),
m_page_zip_ptr(0),
m_rec_iter(),
m_offsets_(), m_offsets(m_offsets_),
m_heap(0),
m_cluster_index(dict_table_get_first_index(cfg->m_table))
{
ut_ad(m_current_lsn);
rec_offs_init(m_offsets_);
}
......@@ -906,9 +904,6 @@ class PageConverter : public AbstractCallback {
/** Current index whose pages are being imported */
row_index_t* m_index;
/** Current system LSN */
lsn_t m_current_lsn;
/** Alias for m_page_zip, only set for compressed pages. */
page_zip_des_t* m_page_zip_ptr;
......@@ -1921,9 +1916,7 @@ PageConverter::update_header(
ib::warn() << "Space id check in the header failed: ignored";
}
mach_write_to_8(
get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
m_current_lsn);
memset(get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,0,8);
/* Write back the adjusted flags. */
mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
......@@ -2036,20 +2029,22 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
}
const bool full_crc32 = fil_space_t::full_crc32(get_space_flags());
byte* frame = get_frame(block);
compile_time_assert(FIL_PAGE_LSN % 8 == 0);
*reinterpret_cast<uint64_t*>(frame + FIL_PAGE_LSN)= 0;
if (!block->page.zip.data) {
buf_flush_init_for_writing(
NULL, block->frame, NULL, m_current_lsn, full_crc32);
NULL, block->frame, NULL, full_crc32);
} else if (fil_page_type_is_index(page_type)) {
buf_flush_init_for_writing(
NULL, block->page.zip.data, &block->page.zip,
m_current_lsn, full_crc32);
full_crc32);
} else {
/* Calculate and update the checksum of non-index
pages for ROW_FORMAT=COMPRESSED tables. */
buf_flush_update_zip_checksum(
block->page.zip.data, block->zip_size(),
m_current_lsn);
block->page.zip.data, block->zip_size());
}
return DB_SUCCESS;
......@@ -3554,7 +3549,6 @@ fil_iterate(
iter.crypt_data,
block->page.id.space(),
block->page.id.page_no(),
mach_read_from_8(src + FIL_PAGE_LSN),
src, block->zip_size(), dest,
full_crc32);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment