Commit 264a98ea authored by Eugene Kosov's avatar Eugene Kosov

MDEV-8069 DROP or rebuild of a large table may lock up InnoDB

Problematic mutex is dict_sys.mutex.
Idea of the patch: unlink() fd under that mutex while
it's still open. This way unlink() will be fast and
actual file removal will happen on close().
And close() will be called outside of dict_sys.mutex.
This should be safe against crash which may happen between
unlink() and close(): file will be removed by OS anyway.
The same applies to both *nix and Windows.

I created and removed a 4G file on some NVMe SSD on ext4:

write(3, "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"..., 1048576) = 1048576 <0.000519>
fdatasync(3)                            = 0 <3.533763>
close(3)                                = 0 <0.000011>
unlink("file")                          = 0 <0.411563>

write(3, "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"..., 1048576) = 1048576 <0.000520>
fdatasync(3)                            = 0 <3.544938>
unlink("file")                          = 0 <0.000029>
close(3)                                = 0 <0.407057>

Such systems can benefit of this patch.

fil_node_t::detach(): closes fil_node_t but not file handle,
  returns that file handle

fil_node_t::prepare_to_close_or_deatch(): 'closes' fil_node_t

fil_node_t:close_to_free(): new argument detach_handle

fil_system_t::detach(): now can detach file handles

fil_delete_tablespace(): now can detach file handles

row_drop_table_for_mysql(): performs actual file removal
parent af194ab5
......@@ -511,32 +511,41 @@ static bool fil_node_open_file(fil_node_t* node)
/** Close the file handle. */
void fil_node_t::close()
{
bool ret;
prepare_to_close_or_detach();
ut_ad(mutex_own(&fil_system.mutex));
ut_a(is_open());
ut_a(n_pending == 0);
ut_a(n_pending_flushes == 0);
ut_a(!being_extended);
ut_a(!needs_flush
|| space->purpose == FIL_TYPE_TEMPORARY
|| srv_fast_shutdown == 2
|| !srv_was_started);
/* printf("Closing file %s\n", name); */
int ret= os_file_close(handle);
ut_a(ret);
handle= OS_FILE_CLOSED;
}
ret = os_file_close(handle);
ut_a(ret);
pfs_os_file_t fil_node_t::detach()
{
prepare_to_close_or_detach();
/* printf("Closing file %s\n", name); */
pfs_os_file_t result= handle;
handle= OS_FILE_CLOSED;
return result;
}
handle = OS_FILE_CLOSED;
ut_ad(!is_open());
ut_a(fil_system.n_open > 0);
fil_system.n_open--;
void fil_node_t::prepare_to_close_or_detach()
{
ut_ad(mutex_own(&fil_system.mutex));
ut_a(is_open());
ut_a(n_pending == 0);
ut_a(n_pending_flushes == 0);
ut_a(!being_extended);
ut_a(!needs_flush || space->purpose == FIL_TYPE_TEMPORARY ||
srv_fast_shutdown == 2 || !srv_was_started);
if (fil_space_belongs_in_lru(space)) {
ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
UT_LIST_REMOVE(fil_system.LRU, this);
}
ut_a(fil_system.n_open > 0);
fil_system.n_open--;
if (fil_space_belongs_in_lru(space))
{
ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
UT_LIST_REMOVE(fil_system.LRU, this);
}
}
/** Tries to close a file in the LRU list. The caller must hold the fil_sys
......@@ -917,7 +926,7 @@ fil_space_extend(
}
/** Prepare to free a file from fil_system. */
inline void fil_node_t::close_to_free()
pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_a(magic_n == FIL_NODE_MAGIC_N);
......@@ -958,15 +967,24 @@ inline void fil_node_t::close_to_free()
}
ut_a(!n_pending_flushes);
ut_a(!being_extended);
if (detach_handle)
{
auto result= handle;
handle= OS_FILE_CLOSED;
return result;
}
bool ret= os_file_close(handle);
ut_a(ret);
handle= OS_FILE_CLOSED;
break;
}
return OS_FILE_CLOSED;
}
/** Detach a tablespace from the cache and close the files. */
inline void fil_system_t::detach(fil_space_t *space)
std::vector<pfs_os_file_t> fil_system_t::detach(fil_space_t *space,
bool detach_handle)
{
ut_ad(mutex_own(&fil_system.mutex));
HASH_DELETE(fil_space_t, hash, spaces, space->id, space);
......@@ -1000,9 +1018,18 @@ inline void fil_system_t::detach(fil_space_t *space)
n_open--;
}
std::vector<pfs_os_file_t> handles;
handles.reserve(UT_LIST_GET_LEN(space->chain));
for (fil_node_t* node= UT_LIST_GET_FIRST(space->chain); node;
node= UT_LIST_GET_NEXT(chain, node))
node->close_to_free();
{
auto handle= node->close_to_free(detach_handle);
if (handle != OS_FILE_CLOSED)
handles.push_back(handle);
}
return handles;
}
/** Free a tablespace object on which fil_system_t::detach() was invoked.
......@@ -2180,11 +2207,14 @@ bool fil_table_accessible(const dict_table_t* table)
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] if_exists whether to ignore missing tablespace
@param[in,out] detached_handles return detached handles if not nullptr
@return DB_SUCCESS or error */
dberr_t fil_delete_tablespace(ulint id, bool if_exists)
dberr_t fil_delete_tablespace(ulint id, bool if_exists,
std::vector<pfs_os_file_t>* detached_handles)
{
char* path = NULL;
ut_ad(!is_system_tablespace(id));
ut_ad(!detached_handles || detached_handles->empty());
dberr_t err;
fil_space_t *space = fil_check_pending_operations(id, false, &path);
......@@ -2260,7 +2290,11 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists)
ut_a(s == space);
ut_a(!space->referenced());
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
fil_system.detach(space);
auto handles = fil_system.detach(space,
detached_handles != nullptr);
if (detached_handles) {
*detached_handles = std::move(handles);
}
mutex_exit(&fil_system.mutex);
log_mutex_enter();
......
......@@ -653,11 +653,19 @@ struct fil_node_t {
/** Close the file handle. */
void close();
/** Prepare to free a file from fil_system. */
inline void close_to_free();
/** Same as close() but returns file handle instead of closing it. */
pfs_os_file_t detach() MY_ATTRIBUTE((warn_unused_result));
/** Prepare to free a file from fil_system.
@param detach_handle whether to detach instead of closing a handle
@return detached handle or OS_FILE_CLOSED */
pfs_os_file_t close_to_free(bool detach_handle= false);
/** Update the data structures on I/O completion */
inline void complete_io(bool write= false);
private:
/** Does stuff common for close() and detach() */
void prepare_to_close_or_detach();
};
/** Value of fil_node_t::magic_n */
......@@ -973,8 +981,12 @@ struct fil_system_t {
}
#endif
public:
/** Detach a tablespace from the cache and close the files. */
inline void detach(fil_space_t *space);
/** Detach a tablespace from the cache and close the files.
@param space tablespace
@param detach_handle whether to detach or close handles
@return detached handles or empty vector */
std::vector<pfs_os_file_t> detach(fil_space_t *space,
bool detach_handle= false);
ib_mutex_t mutex; /*!< The mutex protecting the cache */
fil_space_t* sys_space; /*!< The innodb_system tablespace */
......@@ -1285,8 +1297,11 @@ bool fil_table_accessible(const dict_table_t* table)
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] if_exists whether to ignore missing tablespace
@param[out] leaked_handles return detached handles here
@return DB_SUCCESS or error */
dberr_t fil_delete_tablespace(ulint id, bool if_exists= false);
dberr_t
fil_delete_tablespace(ulint id, bool if_exists= false,
std::vector<pfs_os_file_t> *detached_handles= nullptr);
/** Prepare to truncate an undo tablespace.
@param[in] space_id undo tablespace id
......
......@@ -3374,6 +3374,8 @@ row_drop_table_for_mysql(
DBUG_RETURN(DB_TABLE_NOT_FOUND);
}
std::vector<pfs_os_file_t> detached_handles;
const bool is_temp_name = strstr(table->name.m_name,
"/" TEMP_FILE_PREFIX);
......@@ -3757,7 +3759,8 @@ row_drop_table_for_mysql(
ut_ad(!filepath);
if (space->id != TRX_SYS_SPACE) {
err = fil_delete_tablespace(space->id);
err = fil_delete_tablespace(space->id, false,
&detached_handles);
}
break;
......@@ -3837,6 +3840,11 @@ row_drop_table_for_mysql(
row_mysql_unlock_data_dictionary(trx);
}
for (const auto& handle : detached_handles) {
ut_ad(handle != OS_FILE_CLOSED);
os_file_close(handle);
}
trx->op_info = "";
srv_wake_master_thread();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment