Commit ce64a65f authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14310 Possible corruption by table-rebuilding or index-creating ALTER TABLE…ALGORITHM=INPLACE

Also, MDEV-14317 When ALTER TABLE is aborted, do not write garbage pages to data files

As pointed out by Shaohua Wang, the merge of MDEV-13328 from
MariaDB 10.1 (based on MySQL 5.6) to 10.2 (based on 5.7)
was performed incorrectly.

Let us always pass a non-NULL FlushObserver* when writing
to data files is desired.

FlushObserver::is_partial_flush(): Check if this is a bulk-load
(partial flush of the tablespace).

FlushObserver::is_interrupted(): Check for interrupt status.

buf_LRU_flush_or_remove_pages(): Instead of trx_t*, take
FlushObserver* as a parameter.

buf_flush_or_remove_pages(): Remove the parameters flush, trx.
If observer!=NULL, write out the data pages. Use the new predicate
observer->is_partial() to distinguish a partial tablespace flush
(after bulk-loading) from a full tablespace flush (export).
Return a bool (whether all pages were removed from the flush_list).

buf_flush_dirty_pages(): Remove the parameter trx.
parent a20c1217
...@@ -3821,16 +3821,14 @@ FlushObserver::notify_remove( ...@@ -3821,16 +3821,14 @@ FlushObserver::notify_remove(
void void
FlushObserver::flush() FlushObserver::flush()
{ {
ut_ad(m_trx);
if (!m_interrupted && m_stage) { if (!m_interrupted && m_stage) {
m_stage->begin_phase_flush(buf_flush_get_dirty_pages_count( m_stage->begin_phase_flush(buf_flush_get_dirty_pages_count(
m_space_id, this)); m_space_id, this));
} }
/* MDEV-14317 FIXME: Discard all changes to only those pages buf_LRU_flush_or_remove_pages(m_space_id, this);
that will be freed by the clean-up of the ALTER operation.
(Maybe, instead of buf_pool->flush_list, use a dedicated list
for pages on which redo logging has been disabled.) */
buf_LRU_flush_or_remove_pages(m_space_id, m_trx);
/* Wait for all dirty pages were flushed. */ /* Wait for all dirty pages were flushed. */
for (ulint i = 0; i < srv_buf_pool_instances; i++) { for (ulint i = 0; i < srv_buf_pool_instances; i++) {
......
...@@ -542,27 +542,21 @@ buf_flush_or_remove_page( ...@@ -542,27 +542,21 @@ buf_flush_or_remove_page(
return(processed); return(processed);
} }
/******************************************************************//** /** Remove all dirty pages belonging to a given tablespace inside a specific
Remove all dirty pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU. the list as they age towards the tail of the LRU.
@retval DB_SUCCESS if all freed @param[in,out] buf_pool buffer pool
@retval DB_FAIL if not all freed @param[in] id tablespace identifier
@retval DB_INTERRUPTED if the transaction was interrupted */ @param[in] observer flush observer (to check for interrupt),
or NULL if the files should not be written to
@return whether all dirty pages were freed */
static MY_ATTRIBUTE((warn_unused_result)) static MY_ATTRIBUTE((warn_unused_result))
dberr_t bool
buf_flush_or_remove_pages( buf_flush_or_remove_pages(
/*======================*/ buf_pool_t* buf_pool,
buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint id,
ulint id, /*!< in: target space id for which FlushObserver* observer)
to remove or flush pages */
FlushObserver* observer, /*!< in: flush observer */
bool flush, /*!< in: flush to disk if true but
don't remove else remove without
flushing to disk */
const trx_t* trx) /*!< to check if the operation must
be interrupted, can be 0 */
{ {
buf_page_t* prev; buf_page_t* prev;
buf_page_t* bpage; buf_page_t* bpage;
...@@ -584,15 +578,27 @@ buf_flush_or_remove_pages( ...@@ -584,15 +578,27 @@ buf_flush_or_remove_pages(
prev = UT_LIST_GET_PREV(list, bpage); prev = UT_LIST_GET_PREV(list, bpage);
/* If flush observer is NULL, flush page for space id, /* Flush the pages matching space id,
or flush page for flush observer. */ or pages matching the flush observer. */
if (observer ? (observer != bpage->flush_observer) if (observer && observer->is_partial_flush()) {
: (id != bpage->id.space())) { if (observer != bpage->flush_observer) {
/* Skip this block. */
/* Skip this block, as it does not belong to } else if (!buf_flush_or_remove_page(
the target space. */ buf_pool, bpage,
!observer->is_interrupted())) {
} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) { all_freed = false;
} else if (!observer->is_interrupted()) {
/* The processing was successful. And during the
processing we have released the buf_pool mutex
when calling buf_page_flush(). We cannot trust
prev pointer. */
goto rescan;
}
} else if (id != bpage->id.space()) {
/* Skip this block, because it is for a
different tablespace. */
} else if (!buf_flush_or_remove_page(
buf_pool, bpage, observer != NULL)) {
/* Remove was unsuccessful, we have to try again /* Remove was unsuccessful, we have to try again
by scanning the entire list from the end. by scanning the entire list from the end.
...@@ -615,7 +621,7 @@ buf_flush_or_remove_pages( ...@@ -615,7 +621,7 @@ buf_flush_or_remove_pages(
iteration. */ iteration. */
all_freed = false; all_freed = false;
} else if (flush) { } else if (observer) {
/* The processing was successful. And during the /* The processing was successful. And during the
processing we have released the buf_pool mutex processing we have released the buf_pool mutex
...@@ -636,25 +642,14 @@ buf_flush_or_remove_pages( ...@@ -636,25 +642,14 @@ buf_flush_or_remove_pages(
/* The check for trx is interrupted is expensive, we want /* The check for trx is interrupted is expensive, we want
to check every N iterations. */ to check every N iterations. */
if (!processed && trx && trx_is_interrupted(trx)) { if (!processed && observer) {
if (trx->flush_observer != NULL) { observer->check_interrupted();
if (flush) {
trx->flush_observer->interrupted();
} else {
/* We should remove all pages with the
the flush observer. */
continue;
}
}
buf_flush_list_mutex_exit(buf_pool);
return(DB_INTERRUPTED);
} }
} }
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
return(all_freed ? DB_SUCCESS : DB_FAIL); return(all_freed);
} }
/** Remove or flush all the dirty pages that belong to a given tablespace /** Remove or flush all the dirty pages that belong to a given tablespace
...@@ -665,73 +660,58 @@ the tail of the LRU list. ...@@ -665,73 +660,58 @@ the tail of the LRU list.
@param[in] id tablespace identifier @param[in] id tablespace identifier
@param[in] observer flush observer, @param[in] observer flush observer,
or NULL if the files should not be written to or NULL if the files should not be written to
@param[in] trx transaction (to check for interrupt),
or NULL if the files should not be written to
*/ */
static static
void void
buf_flush_dirty_pages( buf_flush_dirty_pages(
buf_pool_t* buf_pool, buf_pool_t* buf_pool,
ulint id, ulint id,
FlushObserver* observer, FlushObserver* observer)
const trx_t* trx)
{ {
dberr_t err; for (;;) {
bool flush = trx != NULL;
do {
buf_pool_mutex_enter(buf_pool); buf_pool_mutex_enter(buf_pool);
err = buf_flush_or_remove_pages( bool freed = buf_flush_or_remove_pages(buf_pool, id, observer);
buf_pool, id, observer, flush, trx);
buf_pool_mutex_exit(buf_pool); buf_pool_mutex_exit(buf_pool);
ut_ad(buf_flush_validate(buf_pool)); ut_ad(buf_flush_validate(buf_pool));
if (err == DB_FAIL) { if (freed) {
os_thread_sleep(2000); break;
}
if (err == DB_INTERRUPTED && observer != NULL) {
ut_a(flush);
flush = false;
err = DB_FAIL;
} }
/* DB_FAIL is a soft error, it means that the task wasn't os_thread_sleep(2000);
completed, needs to be retried. */
ut_ad(buf_flush_validate(buf_pool)); ut_ad(buf_flush_validate(buf_pool));
}
} while (err == DB_FAIL); ut_ad((observer && observer->is_interrupted())
ut_ad(err == DB_INTERRUPTED
|| buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0); || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
} }
/** Empty the flush list for all pages belonging to a tablespace. /** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier @param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt; @param[in] observer flush observer,
or NULL if nothing is to be written or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */ @param[in] drop_ahi whether to drop the adaptive hash index */
void void
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi) buf_LRU_flush_or_remove_pages(
ulint id,
FlushObserver* observer,
bool drop_ahi)
{ {
FlushObserver* observer = (trx == NULL) ? NULL : trx->flush_observer;
/* Pages in the system tablespace must never be discarded. */ /* Pages in the system tablespace must never be discarded. */
ut_ad(id || trx); ut_ad(id || observer);
for (ulint i = 0; i < srv_buf_pool_instances; i++) { for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i); buf_pool_t* buf_pool = buf_pool_from_array(i);
if (drop_ahi) { if (drop_ahi) {
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
} }
buf_flush_dirty_pages(buf_pool, id, observer, trx); buf_flush_dirty_pages(buf_pool, id, observer);
} }
if (trx && !observer && !trx_is_interrupted(trx)) { if (observer && !observer->is_interrupted()) {
/* Ensure that all asynchronous IO is completed. */ /* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes(); os_aio_wait_until_no_pending_writes();
fil_flush(id); fil_flush(id);
......
...@@ -2907,7 +2907,10 @@ fil_close_tablespace( ...@@ -2907,7 +2907,10 @@ fil_close_tablespace(
completely and permanently. The flag stop_new_ops also prevents completely and permanently. The flag stop_new_ops also prevents
fil_flush() from being applied to this tablespace. */ fil_flush() from being applied to this tablespace. */
buf_LRU_flush_or_remove_pages(id, trx); {
FlushObserver observer(id, trx, NULL);
buf_LRU_flush_or_remove_pages(id, &observer);
}
/* If the free is successful, the X lock will be released before /* If the free is successful, the X lock will be released before
the space memory data structure is freed. */ the space memory data structure is freed. */
......
...@@ -363,6 +363,12 @@ class FlushObserver { ...@@ -363,6 +363,12 @@ class FlushObserver {
|| m_interrupted); || m_interrupted);
} }
/** @return whether to flush only some pages of the tablespace */
bool is_partial_flush() const { return m_stage != NULL; }
/** @return whether the operation was interrupted */
bool is_interrupted() const { return m_interrupted; }
/** Interrupt observer not to wait. */ /** Interrupt observer not to wait. */
void interrupted() void interrupted()
{ {
...@@ -375,7 +381,6 @@ class FlushObserver { ...@@ -375,7 +381,6 @@ class FlushObserver {
/** Flush dirty pages. */ /** Flush dirty pages. */
void flush(); void flush();
/** Notify observer of flushing a page /** Notify observer of flushing a page
@param[in] buf_pool buffer pool instance @param[in] buf_pool buffer pool instance
@param[in] bpage buffer page to flush */ @param[in] bpage buffer page to flush */
...@@ -391,10 +396,10 @@ class FlushObserver { ...@@ -391,10 +396,10 @@ class FlushObserver {
buf_page_t* bpage); buf_page_t* bpage);
private: private:
/** Table space id */ /** Table space id */
ulint m_space_id; const ulint m_space_id;
/** Trx instance */ /** Trx instance */
trx_t* m_trx; trx_t* const m_trx;
/** Performance schema accounting object, used by ALTER TABLE. /** Performance schema accounting object, used by ALTER TABLE.
If not NULL, then stage->begin_phase_flush() will be called initially, If not NULL, then stage->begin_phase_flush() will be called initially,
......
...@@ -52,12 +52,14 @@ These are low-level functions ...@@ -52,12 +52,14 @@ These are low-level functions
/** Empty the flush list for all pages belonging to a tablespace. /** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier @param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt; @param[in,out] observer flush observer,
or NULL if nothing is to be written or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */ @param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void void
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false); buf_LRU_flush_or_remove_pages(
ulint id,
FlushObserver* observer,
bool drop_ahi = false);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//** /********************************************************************//**
......
...@@ -3653,11 +3653,16 @@ row_import_for_mysql( ...@@ -3653,11 +3653,16 @@ row_import_for_mysql(
The only dirty pages generated should be from the pessimistic purge The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */ of delete marked records that couldn't be purged in Phase I. */
buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx); {
FlushObserver observer(prebuilt->table->space, trx, NULL);
buf_LRU_flush_or_remove_pages(prebuilt->table->space,
&observer);
if (trx_is_interrupted(trx)) { if (observer.is_interrupted()) {
ib::info() << "Phase III - Flush interrupted"; ib::info() << "Phase III - Flush interrupted";
return(row_import_error(prebuilt, trx, DB_INTERRUPTED)); return(row_import_error(prebuilt, trx,
DB_INTERRUPTED));
}
} }
ib::info() << "Phase IV - Flush complete"; ib::info() << "Phase IV - Flush complete";
......
...@@ -535,7 +535,10 @@ row_quiesce_table_start( ...@@ -535,7 +535,10 @@ row_quiesce_table_start(
} }
if (!trx_is_interrupted(trx)) { if (!trx_is_interrupted(trx)) {
buf_LRU_flush_or_remove_pages(table->space, trx); {
FlushObserver observer(table->space, trx, NULL);
buf_LRU_flush_or_remove_pages(table->space, &observer);
}
if (trx_is_interrupted(trx)) { if (trx_is_interrupted(trx)) {
......
...@@ -1100,22 +1100,19 @@ srv_undo_tablespaces_init(bool create_new_db) ...@@ -1100,22 +1100,19 @@ srv_undo_tablespaces_init(bool create_new_db)
mtr_commit(&mtr); mtr_commit(&mtr);
/* Step-2: Flush the dirty pages from the buffer pool. */ /* Step-2: Flush the dirty pages from the buffer pool. */
trx_t* trx = trx_allocate_for_background();
for (undo::undo_spaces_t::const_iterator it for (undo::undo_spaces_t::const_iterator it
= undo::Truncate::s_fix_up_spaces.begin(); = undo::Truncate::s_fix_up_spaces.begin();
it != undo::Truncate::s_fix_up_spaces.end(); it != undo::Truncate::s_fix_up_spaces.end();
++it) { ++it) {
FlushObserver dummy(TRX_SYS_SPACE, NULL, NULL);
buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, trx); buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy);
FlushObserver dummy2(*it, NULL, NULL);
buf_LRU_flush_or_remove_pages(*it, trx); buf_LRU_flush_or_remove_pages(*it, &dummy2);
/* Remove the truncate redo log file. */ /* Remove the truncate redo log file. */
undo::Truncate undo_trunc; undo::Truncate undo_trunc;
undo_trunc.done_logging(*it); undo_trunc.done_logging(*it);
} }
trx_free_for_background(trx);
} }
return(DB_SUCCESS); return(DB_SUCCESS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment