Commit ce64a65f authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14310 Possible corruption by table-rebuilding or index-creating ALTER TABLE…ALGORITHM=INPLACE

Also, MDEV-14317 When ALTER TABLE is aborted, do not write garbage pages to data files

As pointed out by Shaohua Wang, the merge of MDEV-13328 from
MariaDB 10.1 (based on MySQL 5.6) to 10.2 (based on 5.7)
was performed incorrectly.

Let us always pass a non-NULL FlushObserver* when writing
to data files is desired.

FlushObserver::is_partial_flush(): Check if this is a bulk-load
(partial flush of the tablespace).

FlushObserver::is_interrupted(): Check for interrupt status.

buf_LRU_flush_or_remove_pages(): Instead of trx_t*, take
FlushObserver* as a parameter.

buf_flush_or_remove_pages(): Remove the parameters flush, trx.
If observer!=NULL, write out the data pages. Use the new predicate
observer->is_partial() to distinguish a partial tablespace flush
(after bulk-loading) from a full tablespace flush (export).
Return a bool (whether all pages were removed from the flush_list).

buf_flush_dirty_pages(): Remove the parameter trx.
parent a20c1217
......@@ -3821,16 +3821,14 @@ FlushObserver::notify_remove(
void
FlushObserver::flush()
{
ut_ad(m_trx);
if (!m_interrupted && m_stage) {
m_stage->begin_phase_flush(buf_flush_get_dirty_pages_count(
m_space_id, this));
}
/* MDEV-14317 FIXME: Discard all changes to only those pages
that will be freed by the clean-up of the ALTER operation.
(Maybe, instead of buf_pool->flush_list, use a dedicated list
for pages on which redo logging has been disabled.) */
buf_LRU_flush_or_remove_pages(m_space_id, m_trx);
buf_LRU_flush_or_remove_pages(m_space_id, this);
/* Wait for all dirty pages were flushed. */
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
......
......@@ -542,27 +542,21 @@ buf_flush_or_remove_page(
return(processed);
}
/******************************************************************//**
Remove all dirty pages belonging to a given tablespace inside a specific
/** Remove all dirty pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU.
@retval DB_SUCCESS if all freed
@retval DB_FAIL if not all freed
@retval DB_INTERRUPTED if the transaction was interrupted */
@param[in,out] buf_pool buffer pool
@param[in] id tablespace identifier
@param[in] observer flush observer (to check for interrupt),
or NULL if the files should not be written to
@return whether all dirty pages were freed */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
bool
buf_flush_or_remove_pages(
/*======================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id, /*!< in: target space id for which
to remove or flush pages */
FlushObserver* observer, /*!< in: flush observer */
bool flush, /*!< in: flush to disk if true but
don't remove else remove without
flushing to disk */
const trx_t* trx) /*!< to check if the operation must
be interrupted, can be 0 */
buf_pool_t* buf_pool,
ulint id,
FlushObserver* observer)
{
buf_page_t* prev;
buf_page_t* bpage;
......@@ -584,15 +578,27 @@ buf_flush_or_remove_pages(
prev = UT_LIST_GET_PREV(list, bpage);
/* If flush observer is NULL, flush page for space id,
or flush page for flush observer. */
if (observer ? (observer != bpage->flush_observer)
: (id != bpage->id.space())) {
/* Skip this block, as it does not belong to
the target space. */
} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
/* Flush the pages matching space id,
or pages matching the flush observer. */
if (observer && observer->is_partial_flush()) {
if (observer != bpage->flush_observer) {
/* Skip this block. */
} else if (!buf_flush_or_remove_page(
buf_pool, bpage,
!observer->is_interrupted())) {
all_freed = false;
} else if (!observer->is_interrupted()) {
/* The processing was successful. And during the
processing we have released the buf_pool mutex
when calling buf_page_flush(). We cannot trust
prev pointer. */
goto rescan;
}
} else if (id != bpage->id.space()) {
/* Skip this block, because it is for a
different tablespace. */
} else if (!buf_flush_or_remove_page(
buf_pool, bpage, observer != NULL)) {
/* Remove was unsuccessful, we have to try again
by scanning the entire list from the end.
......@@ -615,7 +621,7 @@ buf_flush_or_remove_pages(
iteration. */
all_freed = false;
} else if (flush) {
} else if (observer) {
/* The processing was successful. And during the
processing we have released the buf_pool mutex
......@@ -636,25 +642,14 @@ buf_flush_or_remove_pages(
/* The check for trx is interrupted is expensive, we want
to check every N iterations. */
if (!processed && trx && trx_is_interrupted(trx)) {
if (trx->flush_observer != NULL) {
if (flush) {
trx->flush_observer->interrupted();
} else {
/* We should remove all pages with the
the flush observer. */
continue;
}
}
buf_flush_list_mutex_exit(buf_pool);
return(DB_INTERRUPTED);
if (!processed && observer) {
observer->check_interrupted();
}
}
buf_flush_list_mutex_exit(buf_pool);
return(all_freed ? DB_SUCCESS : DB_FAIL);
return(all_freed);
}
/** Remove or flush all the dirty pages that belong to a given tablespace
......@@ -665,73 +660,58 @@ the tail of the LRU list.
@param[in] id tablespace identifier
@param[in] observer flush observer,
or NULL if the files should not be written to
@param[in] trx transaction (to check for interrupt),
or NULL if the files should not be written to
*/
static
void
buf_flush_dirty_pages(
buf_pool_t* buf_pool,
ulint id,
FlushObserver* observer,
const trx_t* trx)
FlushObserver* observer)
{
dberr_t err;
bool flush = trx != NULL;
do {
for (;;) {
buf_pool_mutex_enter(buf_pool);
err = buf_flush_or_remove_pages(
buf_pool, id, observer, flush, trx);
bool freed = buf_flush_or_remove_pages(buf_pool, id, observer);
buf_pool_mutex_exit(buf_pool);
ut_ad(buf_flush_validate(buf_pool));
if (err == DB_FAIL) {
os_thread_sleep(2000);
}
if (err == DB_INTERRUPTED && observer != NULL) {
ut_a(flush);
flush = false;
err = DB_FAIL;
if (freed) {
break;
}
/* DB_FAIL is a soft error, it means that the task wasn't
completed, needs to be retried. */
os_thread_sleep(2000);
ut_ad(buf_flush_validate(buf_pool));
}
} while (err == DB_FAIL);
ut_ad(err == DB_INTERRUPTED
ut_ad((observer && observer->is_interrupted())
|| buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
}
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
@param[in] observer flush observer,
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
void
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi)
buf_LRU_flush_or_remove_pages(
ulint id,
FlushObserver* observer,
bool drop_ahi)
{
FlushObserver* observer = (trx == NULL) ? NULL : trx->flush_observer;
/* Pages in the system tablespace must never be discarded. */
ut_ad(id || trx);
ut_ad(id || observer);
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
if (drop_ahi) {
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
}
buf_flush_dirty_pages(buf_pool, id, observer, trx);
buf_flush_dirty_pages(buf_pool, id, observer);
}
if (trx && !observer && !trx_is_interrupted(trx)) {
if (observer && !observer->is_interrupted()) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
fil_flush(id);
......
......@@ -2907,7 +2907,10 @@ fil_close_tablespace(
completely and permanently. The flag stop_new_ops also prevents
fil_flush() from being applied to this tablespace. */
buf_LRU_flush_or_remove_pages(id, trx);
{
FlushObserver observer(id, trx, NULL);
buf_LRU_flush_or_remove_pages(id, &observer);
}
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
......
......@@ -363,6 +363,12 @@ class FlushObserver {
|| m_interrupted);
}
/** @return whether to flush only some pages of the tablespace */
bool is_partial_flush() const { return m_stage != NULL; }
/** @return whether the operation was interrupted */
bool is_interrupted() const { return m_interrupted; }
/** Interrupt observer not to wait. */
void interrupted()
{
......@@ -375,7 +381,6 @@ class FlushObserver {
/** Flush dirty pages. */
void flush();
/** Notify observer of flushing a page
@param[in] buf_pool buffer pool instance
@param[in] bpage buffer page to flush */
......@@ -391,10 +396,10 @@ class FlushObserver {
buf_page_t* bpage);
private:
/** Table space id */
ulint m_space_id;
const ulint m_space_id;
/** Trx instance */
trx_t* m_trx;
trx_t* const m_trx;
/** Performance schema accounting object, used by ALTER TABLE.
If not NULL, then stage->begin_phase_flush() will be called initially,
......
......@@ -52,12 +52,14 @@ These are low-level functions
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
@param[in,out] observer flush observer,
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false);
buf_LRU_flush_or_remove_pages(
ulint id,
FlushObserver* observer,
bool drop_ahi = false);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
......
......@@ -3653,11 +3653,16 @@ row_import_for_mysql(
The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */
buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx);
{
FlushObserver observer(prebuilt->table->space, trx, NULL);
buf_LRU_flush_or_remove_pages(prebuilt->table->space,
&observer);
if (trx_is_interrupted(trx)) {
if (observer.is_interrupted()) {
ib::info() << "Phase III - Flush interrupted";
return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
return(row_import_error(prebuilt, trx,
DB_INTERRUPTED));
}
}
ib::info() << "Phase IV - Flush complete";
......
......@@ -535,7 +535,10 @@ row_quiesce_table_start(
}
if (!trx_is_interrupted(trx)) {
buf_LRU_flush_or_remove_pages(table->space, trx);
{
FlushObserver observer(table->space, trx, NULL);
buf_LRU_flush_or_remove_pages(table->space, &observer);
}
if (trx_is_interrupted(trx)) {
......
......@@ -1100,22 +1100,19 @@ srv_undo_tablespaces_init(bool create_new_db)
mtr_commit(&mtr);
/* Step-2: Flush the dirty pages from the buffer pool. */
trx_t* trx = trx_allocate_for_background();
for (undo::undo_spaces_t::const_iterator it
= undo::Truncate::s_fix_up_spaces.begin();
it != undo::Truncate::s_fix_up_spaces.end();
++it) {
buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, trx);
buf_LRU_flush_or_remove_pages(*it, trx);
FlushObserver dummy(TRX_SYS_SPACE, NULL, NULL);
buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy);
FlushObserver dummy2(*it, NULL, NULL);
buf_LRU_flush_or_remove_pages(*it, &dummy2);
/* Remove the truncate redo log file. */
undo::Truncate undo_trunc;
undo_trunc.done_logging(*it);
}
trx_free_for_background(trx);
}
return(DB_SUCCESS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment