Commit 82d59945 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26110: Do not rely on alignment on static allocation

It is implementation-defined whether alignment requirements
that are larger than std::max_align_t (typically 8 or 16 bytes)
will be honored by the compiler and linker.

It turns out that on IBM AIX, both alignas() and MY_ALIGNED()
only guarantees alignment up to 16 bytes.

For some data structures, specifying alignment to the CPU
cache line size (typically 64 or 128 bytes) is a mere performance
optimization, and we do not really care whether the requested
alignment is guaranteed.

But, for the correct operation of direct I/O, we do require that
the buffers be aligned at a block size boundary.

field_ref_zero: Define as a pointer, not an array.
For innochecksum, we can make this point to unaligned memory;
for anything else, we will allocate an aligned buffer from the heap.
This buffer will be used for overwriting freed data pages when
innodb_immediate_scrub_data_uncompressed=ON. And exactly that code
hit an assertion failure on AIX, in the test innodb.innodb_scrub.

log_sys.checkpoint_buf: Define as a pointer to aligned memory
that is allocated from heap.

log_t::file::write_header_durable(): Reuse log_sys.checkpoint_buf
instead of trying to allocate an aligned buffer from the stack.
parent bf435a3f
......@@ -97,6 +97,9 @@ FILE* log_file = NULL;
/* Enabled for log write option. */
static bool is_log_enabled = false;
static byte field_ref_zero_buf[UNIV_PAGE_SIZE_MAX];
const byte *field_ref_zero = field_ref_zero_buf;
#ifndef _WIN32
/* advisory lock for non-window system. */
struct flock lk;
......
......@@ -4501,6 +4501,14 @@ static bool xtrabackup_backup_func()
goto fail;
}
if (auto b = aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) {
field_ref_zero = static_cast<byte*>(
memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX));
} else {
goto fail;
}
{
/* definition from recv_recovery_from_checkpoint_start() */
ulint max_cp_field;
......@@ -4515,14 +4523,17 @@ static bool xtrabackup_backup_func()
if (err != DB_SUCCESS) {
msg("Error: cannot read redo log header");
unlock_and_fail:
mysql_mutex_unlock(&log_sys.mutex);
free_and_fail:
aligned_free(const_cast<byte*>(field_ref_zero));
field_ref_zero = nullptr;
goto fail;
}
if (log_sys.log.format == 0) {
msg("Error: cannot process redo log before MariaDB 10.2.2");
mysql_mutex_unlock(&log_sys.mutex);
goto fail;
goto unlock_and_fail;
}
byte* buf = log_sys.checkpoint_buf;
......@@ -4543,7 +4554,7 @@ static bool xtrabackup_backup_func()
xtrabackup_init_datasinks();
if (!select_history()) {
goto fail;
goto free_and_fail;
}
/* open the log file */
......@@ -4552,12 +4563,13 @@ static bool xtrabackup_backup_func()
if (dst_log_file == NULL) {
msg("Error: failed to open the target stream for '%s'.",
LOG_FILE_NAME);
goto fail;
goto free_and_fail;
}
/* label it */
byte MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) log_hdr_buf[LOG_FILE_HDR_SIZE];
memset(log_hdr_buf, 0, sizeof log_hdr_buf);
byte* log_hdr_buf = static_cast<byte*>(
aligned_malloc(LOG_FILE_HDR_SIZE, OS_FILE_LOG_BLOCK_SIZE));
memset(log_hdr_buf, 0, LOG_FILE_HDR_SIZE);
byte *log_hdr_field = log_hdr_buf;
mach_write_to_4(LOG_HEADER_FORMAT + log_hdr_field, log_sys.log.format);
......@@ -4586,11 +4598,13 @@ static bool xtrabackup_backup_func()
log_block_calc_checksum_crc32(log_hdr_field));
/* Write log header*/
if (ds_write(dst_log_file, log_hdr_buf, sizeof(log_hdr_buf))) {
if (ds_write(dst_log_file, log_hdr_buf, LOG_FILE_HDR_SIZE)) {
msg("error: write to logfile failed");
goto fail;
aligned_free(log_hdr_buf);
goto free_and_fail;
}
aligned_free(log_hdr_buf);
log_copying_running = true;
/* start io throttle */
if(xtrabackup_throttle) {
......@@ -4608,7 +4622,7 @@ static bool xtrabackup_backup_func()
" error %s.", ut_strerr(err));
fail_before_log_copying_thread_start:
log_copying_running = false;
goto fail;
goto free_and_fail;
}
/* copy log file by current position */
......@@ -4625,7 +4639,7 @@ static bool xtrabackup_backup_func()
/* FLUSH CHANGED_PAGE_BITMAPS call */
if (!flush_changed_page_bitmaps()) {
goto fail;
goto free_and_fail;
}
ut_a(xtrabackup_parallel > 0);
......@@ -4647,7 +4661,7 @@ static bool xtrabackup_backup_func()
datafiles_iter_t *it = datafiles_iter_new();
if (it == NULL) {
msg("mariabackup: Error: datafiles_iter_new() failed.");
goto fail;
goto free_and_fail;
}
/* Create data copying threads */
......@@ -4702,6 +4716,9 @@ static bool xtrabackup_backup_func()
if (opt_log_innodb_page_corruption)
ok = corrupted_pages.print_to_file(MB_CORRUPTED_PAGES_FILE);
aligned_free(const_cast<byte*>(field_ref_zero));
field_ref_zero = nullptr;
if (!ok) {
goto fail;
}
......
......@@ -321,6 +321,12 @@ constexpr ulint BUF_PAGE_READ_MAX_RETRIES= 100;
read-ahead buffer. (Divide buf_pool size by this amount) */
constexpr uint32_t BUF_READ_AHEAD_PORTION= 32;
/** A 64KiB buffer of NUL bytes, for use in assertions and checks,
and dummy default values of instantly dropped columns.
Initially, BLOB field references are set to NUL bytes, in
dtuple_convert_big_rec(). */
const byte *field_ref_zero;
/** The InnoDB buffer pool */
buf_pool_t buf_pool;
buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg;
......@@ -698,7 +704,7 @@ static void buf_page_check_lsn(bool check_lsn, const byte* read_buf)
@return whether the buffer is all zeroes */
bool buf_is_zeroes(span<const byte> buf)
{
ut_ad(buf.size() <= sizeof field_ref_zero);
ut_ad(buf.size() <= UNIV_PAGE_SIZE_MAX);
return memcmp(buf.data(), field_ref_zero, buf.size()) == 0;
}
......@@ -1391,11 +1397,17 @@ bool buf_pool_t::create()
ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0);
ut_ad(!is_initialised());
ut_ad(srv_buf_pool_size > 0);
ut_ad(!resizing);
ut_ad(!chunks_old);
ut_ad(!field_ref_zero);
NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
ut_ad(!resizing);
ut_ad(!chunks_old);
if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096))
field_ref_zero= static_cast<const byte*>
(memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX));
else
return true;
chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map());
......@@ -1426,6 +1438,8 @@ bool buf_pool_t::create()
chunks= nullptr;
UT_DELETE(chunk_t::map_reg);
chunk_t::map_reg= nullptr;
aligned_free(const_cast<byte*>(field_ref_zero));
field_ref_zero= nullptr;
ut_ad(!is_initialised());
return true;
}
......@@ -1541,6 +1555,8 @@ void buf_pool_t::close()
io_buf.close();
UT_DELETE(chunk_t::map_reg);
chunk_t::map_reg= chunk_t::map_ref= nullptr;
aligned_free(const_cast<byte*>(field_ref_zero));
field_ref_zero= nullptr;
}
/** Try to reallocate a control block.
......
......@@ -526,7 +526,7 @@ inline bool dict_table_t::instant_column(const dict_table_t& table,
}
DBUG_ASSERT(c.is_added());
if (c.def_val.len <= sizeof field_ref_zero
if (c.def_val.len <= UNIV_PAGE_SIZE_MAX
&& (!c.def_val.len
|| !memcmp(c.def_val.data, field_ref_zero,
c.def_val.len))) {
......
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2019, 2020, MariaDB Corporation.
Copyright (c) 2019, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -177,11 +177,11 @@ class page_id_t
uint64_t m_id;
};
/** A field reference full of zero, for use in assertions and checks,
/** A 64KiB buffer of NUL bytes, for use in assertions and checks,
and dummy default values of instantly dropped columns.
Initially, BLOB field references are set to zero, in
Initially, BLOB field references are set to NUL bytes, in
dtuple_convert_big_rec(). */
extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX];
extern const byte *field_ref_zero;
#ifndef UNIV_INNOCHECKSUM
......
......@@ -618,8 +618,7 @@ struct log_t{
checkpoint writes */
/** buffer for checkpoint header */
MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
byte *checkpoint_buf;
/* @} */
private:
......
......@@ -219,6 +219,8 @@ void log_t::create()
log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
buf_free= LOG_BLOCK_HDR_SIZE;
checkpoint_buf= static_cast<byte*>
(aligned_malloc(OS_FILE_LOG_BLOCK_SIZE, OS_FILE_LOG_BLOCK_SIZE));
}
mapped_file_t::~mapped_file_t() noexcept
......@@ -461,8 +463,8 @@ void log_t::file::write_header_durable(lsn_t lsn)
ut_ad(log_sys.log.format == log_t::FORMAT_10_5 ||
log_sys.log.format == log_t::FORMAT_ENC_10_5);
// man 2 open suggests this buffer to be aligned by 512 for O_DIRECT
MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) byte buf[OS_FILE_LOG_BLOCK_SIZE] = {0};
byte *buf= log_sys.checkpoint_buf;
memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
......@@ -475,7 +477,7 @@ void log_t::file::write_header_durable(lsn_t lsn)
DBUG_PRINT("ib_log", ("write " LSN_PF, lsn));
log_sys.log.write(0, buf);
log_sys.log.write(0, {buf, OS_FILE_LOG_BLOCK_SIZE});
if (!log_sys.log.writes_are_durable())
log_sys.log.flush();
}
......@@ -880,7 +882,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
log_sys.next_checkpoint_lsn));
byte* buf = log_sys.checkpoint_buf;
memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
......@@ -1282,18 +1284,21 @@ void log_t::close()
{
ut_ad(this == &log_sys);
if (!is_initialised()) return;
m_initialised = false;
m_initialised= false;
log.close();
ut_free_dodump(buf, srv_log_buffer_size);
buf = NULL;
buf= nullptr;
ut_free_dodump(flush_buf, srv_log_buffer_size);
flush_buf = NULL;
flush_buf= nullptr;
mysql_mutex_destroy(&mutex);
mysql_mutex_destroy(&flush_order_mutex);
recv_sys.close();
aligned_free(checkpoint_buf);
checkpoint_buf= nullptr;
}
std::string get_log_file_path(const char *filename)
......
......@@ -35,12 +35,6 @@ Created June 2005 by Marko Makela
using st_::span;
/** A BLOB field reference full of zero, for use in assertions and tests.
Initially, BLOB field references are set to zero, in
dtuple_convert_big_rec(). */
alignas(UNIV_PAGE_SIZE_MIN)
const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, };
#ifndef UNIV_INNOCHECKSUM
#include "mtr0log.h"
#include "dict0dict.h"
......@@ -92,16 +86,12 @@ static const byte supremum_extra_data alignas(4) [] = {
};
/** Assert that a block of memory is filled with zero bytes.
Compare at most sizeof(field_ref_zero) bytes.
@param b in: memory block
@param s in: size of the memory block, in bytes */
#define ASSERT_ZERO(b, s) \
ut_ad(!memcmp(b, field_ref_zero, \
std::min<size_t>(s, sizeof field_ref_zero)));
#define ASSERT_ZERO(b, s) ut_ad(!memcmp(b, field_ref_zero, s))
/** Assert that a BLOB pointer is filled with zero bytes.
@param b in: BLOB pointer */
#define ASSERT_ZERO_BLOB(b) \
ut_ad(!memcmp(b, field_ref_zero, FIELD_REF_SIZE))
#define ASSERT_ZERO_BLOB(b) ASSERT_ZERO(b, FIELD_REF_SIZE)
/* Enable some extra debugging output. This code can be enabled
independently of any UNIV_ debugging conditions. */
......
......@@ -3424,7 +3424,7 @@ row_ins_index_entry_set_vals(
field->len = UNIV_SQL_NULL;
field->type.prtype = DATA_BINARY_TYPE;
} else {
ut_ad(col->len <= sizeof field_ref_zero);
ut_ad(col->len <= UNIV_PAGE_SIZE_MAX);
ut_ad(ind_field->fixed_len <= col->len);
dfield_set_data(field, field_ref_zero,
ind_field->fixed_len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment