Commit db006a9a authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-21452: Remove os_event_t, MUTEX_EVENT, TTASEventMutex, sync_array

We will default to MUTEXTYPE=sys (using OSTrackMutex) for those
ib_mutex_t that have not been replaced yet.

The view INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS is removed.

The parameter innodb_sync_array_size is removed.

FIXME: innodb_fatal_semaphore_wait_threshold will no longer be enforced.
We should enforce it for lock_sys.mutex and dict_sys.mutex somehow!

innodb_sync_debug=ON might still cover ib_mutex_t.
parent 38fd7b7d
......@@ -50,8 +50,6 @@ create sql security invoker view i_sys_foreign_cols as select * from information
create sql security definer view d_sys_foreign_cols as select * from information_schema.innodb_sys_foreign_cols;
create sql security invoker view i_sys_indexes as select * from information_schema.innodb_sys_indexes;
create sql security definer view d_sys_indexes as select * from information_schema.innodb_sys_indexes;
create sql security invoker view i_sys_semaphore_waits as select * from information_schema.innodb_sys_semaphore_waits;
create sql security definer view d_sys_semaphore_waits as select * from information_schema.innodb_sys_semaphore_waits;
create sql security invoker view i_sys_tables as select * from information_schema.innodb_sys_tables;
create sql security definer view d_sys_tables as select * from information_schema.innodb_sys_tables;
create sql security invoker view i_sys_tablespaces as select * from information_schema.innodb_sys_tablespaces;
......@@ -222,13 +220,6 @@ ERROR 42000: Access denied; you need (at least one of) the PROCESS privilege(s)
select count(*) > -1 from d_sys_indexes;
count(*) > -1
1
select count(*) > -1 from information_schema.innodb_sys_semaphore_waits;
ERROR 42000: Access denied; you need (at least one of) the PROCESS privilege(s) for this operation
select count(*) > -1 from i_sys_semaphore_waits;
ERROR 42000: Access denied; you need (at least one of) the PROCESS privilege(s) for this operation
select count(*) > -1 from d_sys_semaphore_waits;
count(*) > -1
1
select count(*) > -1 from information_schema.innodb_sys_tables;
ERROR 42000: Access denied; you need (at least one of) the PROCESS privilege(s) for this operation
select count(*) > -1 from i_sys_tables;
......
......@@ -363,7 +363,3 @@ select * from information_schema.innodb_tablespaces_encryption;
SPACE NAME ENCRYPTION_SCHEME KEYSERVER_REQUESTS MIN_KEY_VERSION CURRENT_KEY_VERSION KEY_ROTATION_PAGE_NUMBER KEY_ROTATION_MAX_PAGE_NUMBER CURRENT_KEY_ID ROTATING_OR_FLUSHING
Warnings:
Warning 1012 InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_tablespaces_encryption but the InnoDB storage engine is not installed
select * from information_schema.innodb_sys_semaphore_waits;
THREAD_ID OBJECT_NAME FILE LINE WAIT_TIME WAIT_OBJECT WAIT_TYPE HOLDER_THREAD_ID HOLDER_FILE HOLDER_LINE CREATED_FILE CREATED_LINE WRITER_THREAD RESERVATION_MODE READERS WAITERS_FLAG LOCK_WORD LAST_WRITER_FILE LAST_WRITER_LINE OS_WAIT_COUNT
Warnings:
Warning 1012 InnoDB: SELECTing from INFORMATION_SCHEMA.innodb_sys_semaphore_waits but the InnoDB storage engine is not installed
connect con1,localhost,root,,;
connect con2,localhost,root,,;
drop table if exists t1;
connection con1;
create table t1 (id integer, x integer) engine = InnoDB;
insert into t1 values(0, 0);
SET @saved_dbug = @@SESSION.debug_dbug;
set DEBUG_DBUG='+d,fatal-semaphore-timeout';
set autocommit=0;
# Sending query on con1,
# the session will hold lock table mutex and sleep
SELECT * from t1 where id = 0 FOR UPDATE;
connection con2;
set autocommit=0;
# Sending query on con2,
# the session will be blocked on the lock table mutex and
# thus be put into sync arry
SELECT * from t1 where id = 0 FOR UPDATE;
connection default;
# Waitting for mysqld to crash
# Mysqld crash was detected
# Waitting for reconnect after mysqld restarts
# Reconnected after mysqld was successfully restarted
# Cleaning up before exit
SET debug_dbug = @saved_dbug;
drop table if exists t1;
# Clean exit
......@@ -26,5 +26,4 @@
--enable-plugin-innodb-sys-foreign-cols
--enable-plugin-innodb-sys-tablespaces
--enable-plugin-innodb-sys-virtual
--enable-plugin-innodb-sys-semaphore-waits
--enable-plugin-innodb-tablespaces-encryption
......@@ -79,9 +79,6 @@ create sql security definer view d_sys_foreign_cols as select * from information
create sql security invoker view i_sys_indexes as select * from information_schema.innodb_sys_indexes;
create sql security definer view d_sys_indexes as select * from information_schema.innodb_sys_indexes;
create sql security invoker view i_sys_semaphore_waits as select * from information_schema.innodb_sys_semaphore_waits;
create sql security definer view d_sys_semaphore_waits as select * from information_schema.innodb_sys_semaphore_waits;
create sql security invoker view i_sys_tables as select * from information_schema.innodb_sys_tables;
create sql security definer view d_sys_tables as select * from information_schema.innodb_sys_tables;
......@@ -236,12 +233,6 @@ select count(*) > -1 from information_schema.innodb_sys_indexes;
select count(*) > -1 from i_sys_indexes;
select count(*) > -1 from d_sys_indexes;
--error ER_SPECIFIC_ACCESS_DENIED_ERROR
select count(*) > -1 from information_schema.innodb_sys_semaphore_waits;
--error ER_SPECIFIC_ACCESS_DENIED_ERROR
select count(*) > -1 from i_sys_semaphore_waits;
select count(*) > -1 from d_sys_semaphore_waits;
--error ER_SPECIFIC_ACCESS_DENIED_ERROR
select count(*) > -1 from information_schema.innodb_sys_tables;
--error ER_SPECIFIC_ACCESS_DENIED_ERROR
......
......@@ -59,4 +59,3 @@
--loose-innodb_sys_datafiles
--loose-innodb_changed_pages
--loose-innodb_tablespaces_encryption
--loose-innodb_sys_semaphore_waits
......@@ -27,4 +27,3 @@ select * from information_schema.innodb_sys_foreign;
select * from information_schema.innodb_sys_foreign_cols;
select * from information_schema.innodb_sys_tablespaces;
select * from information_schema.innodb_tablespaces_encryption;
select * from information_schema.innodb_sys_semaphore_waits;
--innodb-fatal-semaphore-wait-threshold=1
--innodb-sys-semaphore-waits=1
--source include/have_innodb.inc
--source include/not_windows.inc
--source include/not_valgrind.inc
--source include/not_embedded.inc
# DEBUG_SYNC must be compiled in.
--source include/have_debug_sync.inc
connect (con1,localhost,root,,);
connect (con2,localhost,root,,);
--disable_warnings
drop table if exists t1;
--enable_warnings
connection con1;
eval create table t1 (id integer, x integer) engine = InnoDB;
insert into t1 values(0, 0);
# Enable the debug injection.
SET @saved_dbug = @@SESSION.debug_dbug;
set DEBUG_DBUG='+d,fatal-semaphore-timeout';
set autocommit=0;
# The following query will hang for an hour since the debug injection
# code will sleep an hour after holding the lock table mutex
--echo # Sending query on con1,
--echo # the session will hold lock table mutex and sleep
--send
SELECT * from t1 where id = 0 FOR UPDATE;
# To make sure con1 holding the lock table mutex and sleeping
--sleep 2
connection con2;
set autocommit=0;
# The following query will be blocked on the lock table mutex held by
# con1 so it will be put into sync array.
--echo # Sending query on con2,
--echo # the session will be blocked on the lock table mutex and
--echo # thus be put into sync arry
--send
SELECT * from t1 where id = 0 FOR UPDATE;
# Waitting for mysqld to abort due to fatal semaphore timeout.
# Please note that, in the master.opt file, the fatal timeout
# was set to 1 second, but in mysqld debug mode, this timeout
# value will be timed 10 because UNIV_DEBUG_VALGRIND is set
# (see sync_array_print_long_waits_low() in storage/innobase/sync/sync0arr.cc)
# so the actual timeout will be 1 * 10 = 10 seconds. Besides,
# mysqld will abort after detecting this fatal timeout 10 times in
# a loop with interval of 1 second (see srv_error_monitor_thread
# thread in torage/innobase/srv/srv0srv.cc), so mysqld will abort
# in 1 * 10 + 1 * 10 = 20 seconds after con2 being blocked on
# the lock table mutex.
#
# P.S. the default fatal sempahore timeout is 600 seconds,
# so mysqld will abort after 600 * 10 + 1 * 10 = 6010 seconds
# in debug mode and 600 + 1 * 10 = 610 seconds in release mode.
connection default;
--disable_result_log
--disable_query_log
# Since this test generates lot of errors in log, suppress checking errors
call mtr.add_suppression(".*");
# The crash is expected
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
--echo # Waitting for mysqld to crash
# It will take 20 seconds to detect the long semaphore and mysqld to abort.
# This test will be treated as pass as long as mysqld crash/restart is dectected
# in 80 seconds.
let $counter= 80;
let $mysql_errno= 0;
while (!$mysql_errno)
{
--error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
show status;
--error 0,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,2002,2006,2013
select * from information_schema.innodb_sys_semaphore_waits;
dec $counter;
if (!$counter)
{
# This will fail this test.
--die Server failed to dissapear
}
--sleep 1
}
--echo # Mysqld crash was detected
--echo # Waitting for reconnect after mysqld restarts
enable_reconnect;
connection default;
--exec echo "restart:--log-error=$error_log" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
# Call script that will poll the server waiting for it to be back online again
source include/wait_until_connected_again.inc;
--echo # Reconnected after mysqld was successfully restarted
--echo # Cleaning up before exit
--disable_warnings
SET debug_dbug = @saved_dbug;
drop table if exists t1;
--enable_warnings
--echo # Clean exit
SHOW CREATE TABLE INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS;
Table Create Table
INNODB_SYS_SEMAPHORE_WAITS CREATE TEMPORARY TABLE `INNODB_SYS_SEMAPHORE_WAITS` (
`THREAD_ID` bigint(21) unsigned NOT NULL DEFAULT 0,
`OBJECT_NAME` varchar(4000) DEFAULT NULL,
`FILE` varchar(4000) DEFAULT NULL,
`LINE` int(11) unsigned NOT NULL DEFAULT 0,
`WAIT_TIME` bigint(21) unsigned NOT NULL DEFAULT 0,
`WAIT_OBJECT` bigint(21) unsigned NOT NULL DEFAULT 0,
`WAIT_TYPE` varchar(16) DEFAULT NULL,
`HOLDER_THREAD_ID` bigint(21) unsigned NOT NULL DEFAULT 0,
`HOLDER_FILE` varchar(4000) DEFAULT NULL,
`HOLDER_LINE` int(11) unsigned NOT NULL DEFAULT 0,
`CREATED_FILE` varchar(4000) DEFAULT NULL,
`CREATED_LINE` int(11) unsigned NOT NULL DEFAULT 0,
`WRITER_THREAD` bigint(21) unsigned NOT NULL DEFAULT 0,
`RESERVATION_MODE` varchar(16) DEFAULT NULL,
`READERS` int(11) unsigned NOT NULL DEFAULT 0,
`WAITERS_FLAG` bigint(21) unsigned NOT NULL DEFAULT 0,
`LOCK_WORD` bigint(21) unsigned NOT NULL DEFAULT 0,
`LAST_WRITER_FILE` varchar(4000) DEFAULT NULL,
`LAST_WRITER_LINE` int(11) unsigned NOT NULL DEFAULT 0,
`OS_WAIT_COUNT` int(11) unsigned NOT NULL DEFAULT 0
) ENGINE=MEMORY DEFAULT CHARSET=utf8
--source include/have_innodb.inc
SHOW CREATE TABLE INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS;
Valid values are between 0 and 1024
SELECT @@global.innodb_sync_array_size between 0 and 1024;
@@global.innodb_sync_array_size between 0 and 1024
1
SELECT @@global.innodb_sync_array_size;
@@global.innodb_sync_array_size
1
SELECT @@session.innodb_sync_array_size;
ERROR HY000: Variable 'innodb_sync_array_size' is a GLOBAL variable
SHOW GLOBAL variables LIKE 'innodb_sync_array_size';
Variable_name Value
innodb_sync_array_size 1
SHOW SESSION variables LIKE 'innodb_sync_array_size';
Variable_name Value
innodb_sync_array_size 1
SELECT * FROM information_schema.global_variables
WHERE variable_name='innodb_sync_array_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_SYNC_ARRAY_SIZE 1
SELECT * FROM information_schema.session_variables
WHERE variable_name='innodb_sync_array_size';
VARIABLE_NAME VARIABLE_VALUE
INNODB_SYNC_ARRAY_SIZE 1
SET GLOBAL innodb_sync_array_size=10;
ERROR HY000: Variable 'innodb_sync_array_size' is a read only variable
SET SESSION innodb_sync_array_size=10;
ERROR HY000: Variable 'innodb_sync_array_size' is a read only variable
SELECT @@global.innodb_sync_array_size;
@@global.innodb_sync_array_size
1
......@@ -1665,18 +1665,6 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST OFF,ON
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_SYNC_ARRAY_SIZE
SESSION_VALUE NULL
DEFAULT_VALUE 1
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Size of the mutex/lock wait array.
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 1024
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_SYNC_DEBUG
SESSION_VALUE NULL
DEFAULT_VALUE OFF
......
# 2010-01-27 - Added
--source include/have_innodb.inc
# Exists as global only
#
--echo Valid values are between 0 and 1024
SELECT @@global.innodb_sync_array_size between 0 and 1024;
SELECT @@global.innodb_sync_array_size;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SELECT @@session.innodb_sync_array_size;
SHOW GLOBAL variables LIKE 'innodb_sync_array_size';
SHOW SESSION variables LIKE 'innodb_sync_array_size';
--disable_warnings
SELECT * FROM information_schema.global_variables
WHERE variable_name='innodb_sync_array_size';
SELECT * FROM information_schema.session_variables
WHERE variable_name='innodb_sync_array_size';
--enable_warnings
#
# Show that it's read-only
#
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SET GLOBAL innodb_sync_array_size=10;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SET SESSION innodb_sync_array_size=10;
SELECT @@global.innodb_sync_array_size;
......@@ -469,7 +469,6 @@ innodb.innodb_stats : MDEV-10682 - wrong result
innodb.innodb_stats_drop_locked : Modified in 10.5.7
innodb.innodb_stats_persistent : MDEV-21567 - Wrong result in execution plan
innodb.innodb_stats_persistent_debug : MDEV-14801 - Operation failed
innodb.innodb_sys_semaphore_waits : MDEV-10331 - Semaphore wait
innodb.innodb_trx_weight : Configuration deleted in 10.5.7
innodb.innodb_zip_innochecksum2 : MDEV-13882 - Warning: difficult to find free blocks
innodb.instant_alter_bugs : Modified in 10.5.7
......
......@@ -5049,6 +5049,7 @@ static int init_server_components()
MARIADB_REMOVED_OPTION("innodb-replication-delay"),
MARIADB_REMOVED_OPTION("innodb-scrub-log"),
MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"),
MARIADB_REMOVED_OPTION("innodb-sync-array-size"),
MARIADB_REMOVED_OPTION("innodb-thread-concurrency"),
MARIADB_REMOVED_OPTION("innodb-thread-sleep-delay"),
MARIADB_REMOVED_OPTION("innodb-undo-logs"),
......
......@@ -123,6 +123,7 @@ static const char *removed_variables[] =
"innodb_stats_sample_pages",
"innodb_stats_update_need_lock",
"innodb_support_xa",
"innodb_sync_array_size",
"innodb_thread_concurrency",
"innodb_thread_concurrency_timer_based",
"innodb_thread_sleep_delay",
......
......@@ -187,7 +187,6 @@ SET(INNOBASE_SOURCES
include/mtr0mtr.h
include/mtr0mtr.ic
include/mtr0types.h
include/os0event.h
include/os0file.h
include/os0file.ic
include/os0thread.h
......@@ -239,8 +238,6 @@ SET(INNOBASE_SOURCES
include/srv0mon.ic
include/srv0srv.h
include/srv0start.h
include/sync0arr.h
include/sync0arr.ic
include/sync0debug.h
include/sync0policy.h
include/sux_lock.h
......@@ -294,7 +291,6 @@ SET(INNOBASE_SOURCES
mem/mem0mem.cc
mtr/mtr0mtr.cc
os/os0file.cc
os/os0event.cc
os/os0thread.cc
page/page0cur.cc
page/page0page.cc
......@@ -328,7 +324,6 @@ SET(INNOBASE_SOURCES
srv/srv0srv.cc
srv/srv0start.cc
sync/srw_lock.cc
sync/sync0arr.cc
sync/sync0debug.cc
sync/sync0sync.cc
trx/trx0i_s.cc
......
......@@ -18444,14 +18444,6 @@ static MYSQL_SYSVAR_UINT(purge_threads, srv_n_purge_threads,
"Number of tasks for purging transaction history",
NULL, NULL, 4, 1, innodb_purge_threads_MAX, 0);
static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Size of the mutex/lock wait array.",
NULL, NULL,
1, /* Default setting */
1, /* Minimum value */
1024, 0); /* Maximum value */
static MYSQL_SYSVAR_UINT(fast_shutdown, srv_fast_shutdown,
PLUGIN_VAR_OPCMDARG,
"Speeds up the shutdown process of the InnoDB storage engine. Possible"
......@@ -19471,7 +19463,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(undo_log_truncate),
MYSQL_SYSVAR(undo_directory),
MYSQL_SYSVAR(undo_tablespaces),
MYSQL_SYSVAR(sync_array_size),
MYSQL_SYSVAR(compression_failure_threshold_pct),
MYSQL_SYSVAR(compression_pad_pct_max),
MYSQL_SYSVAR(default_row_format),
......@@ -19554,7 +19545,6 @@ i_s_innodb_sys_foreign,
i_s_innodb_sys_foreign_cols,
i_s_innodb_sys_tablespaces,
i_s_innodb_sys_virtual,
i_s_innodb_sys_semaphore_waits,
i_s_innodb_tablespaces_encryption
maria_declare_plugin_end;
......
......@@ -22,7 +22,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
InnoDB INFORMATION SCHEMA tables interface to MySQL.
Created July 18, 2007 Vasil Dimov
Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
*******************************************************/
#include "univ.i"
......@@ -54,7 +53,6 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
#include "fts0priv.h"
#include "btr0btr.h"
#include "page0zip.h"
#include "sync0arr.h"
#include "fil0fil.h"
#include "fil0crypt.h"
#include "dict0crea.h"
......@@ -171,20 +169,6 @@ time_t MYSQL_TYPE_DATETIME
---------------------------------
*/
/** Implemented on sync0arr.cc */
/*******************************************************************//**
Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
Loop through each item on sync array, and extract the column
information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
@return 0 on success */
UNIV_INTERN
int
sync_arr_fill_sys_semphore_waits_table(
/*===================================*/
THD* thd, /*!< in: thread */
TABLE_LIST* tables, /*!< in/out: tables to fill */
Item* ); /*!< in: condition (not used) */
/*******************************************************************//**
Common function to fill any of the dynamic tables:
INFORMATION_SCHEMA.innodb_trx
......@@ -245,6 +229,7 @@ field_store_time_t(
/*******************************************************************//**
Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
@return 0 on success */
static
int
field_store_string(
/*===============*/
......@@ -6879,143 +6864,3 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption =
STRUCT_FLD(version_info, INNODB_VERSION_STR),
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
};
namespace Show {
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS */
static ST_FIELD_INFO innodb_sys_semaphore_waits_fields_info[] =
{
// SYS_SEMAPHORE_WAITS_THREAD_ID 0
Column("THREAD_ID", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_OBJECT_NAME 1
Column("OBJECT_NAME", Varchar(OS_FILE_MAX_PATH), NULLABLE),
// SYS_SEMAPHORE_WAITS_FILE 2
Column("FILE", Varchar(OS_FILE_MAX_PATH), NULLABLE),
// SYS_SEMAPHORE_WAITS_LINE 3
Column("LINE", ULong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_WAIT_TIME 4
Column("WAIT_TIME", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5
Column("WAIT_OBJECT", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_WAIT_TYPE 6
Column("WAIT_TYPE", Varchar(16), NULLABLE),
// SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7
Column("HOLDER_THREAD_ID", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_HOLDER_FILE 8
Column("HOLDER_FILE", Varchar(OS_FILE_MAX_PATH), NULLABLE),
// SYS_SEMAPHORE_WAITS_HOLDER_LINE 9
Column("HOLDER_LINE", ULong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_CREATED_FILE 10
Column("CREATED_FILE", Varchar(OS_FILE_MAX_PATH), NULLABLE),
// SYS_SEMAPHORE_WAITS_CREATED_LINE 11
Column("CREATED_LINE", ULong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_WRITER_THREAD 12
Column("WRITER_THREAD", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13
Column("RESERVATION_MODE", Varchar(16), NULLABLE),
// SYS_SEMAPHORE_WAITS_READERS 14
Column("READERS", ULong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15
Column("WAITERS_FLAG", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_LOCK_WORD 16
Column("LOCK_WORD", ULonglong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 17
Column("LAST_WRITER_FILE", Varchar(OS_FILE_MAX_PATH), NULLABLE),
// SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 18
Column("LAST_WRITER_LINE", ULong(), NOT_NULL),
// SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 19
Column("OS_WAIT_COUNT", ULong(), NOT_NULL),
CEnd()
};
} // namespace Show
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS
@return 0 on success */
static
int
innodb_sys_semaphore_waits_init(
/*============================*/
void* p) /*!< in/out: table schema object */
{
ST_SCHEMA_TABLE* schema;
DBUG_ENTER("innodb_sys_semaphore_waits_init");
schema = (ST_SCHEMA_TABLE*) p;
schema->fields_info = Show::innodb_sys_semaphore_waits_fields_info;
schema->fill_table = sync_arr_fill_sys_semphore_waits_table;
DBUG_RETURN(0);
}
UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_semaphore_waits =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
/* pointer to type-specific plugin descriptor */
/* void* */
STRUCT_FLD(info, &i_s_info),
/* plugin name */
/* const char* */
STRUCT_FLD(name, "INNODB_SYS_SEMAPHORE_WAITS"),
/* plugin author (for SHOW PLUGINS) */
/* const char* */
STRUCT_FLD(author, maria_plugin_author),
/* general descriptive text (for SHOW PLUGINS) */
/* const char* */
STRUCT_FLD(descr, "InnoDB SYS_SEMAPHORE_WAITS"),
/* the plugin license (PLUGIN_LICENSE_XXX) */
/* int */
STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
/* the function to invoke when plugin is loaded */
/* int (*)(void*); */
STRUCT_FLD(init, innodb_sys_semaphore_waits_init),
/* the function to invoke when plugin is unloaded */
/* int (*)(void*); */
STRUCT_FLD(deinit, i_s_common_deinit),
/* plugin version (for SHOW PLUGINS) */
/* unsigned int */
STRUCT_FLD(version, INNODB_VERSION_SHORT),
/* struct st_mysql_show_var* */
STRUCT_FLD(status_vars, NULL),
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
/* Maria extension */
STRUCT_FLD(version_info, INNODB_VERSION_STR),
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
};
......@@ -61,7 +61,6 @@ extern struct st_maria_plugin i_s_innodb_sys_foreign_cols;
extern struct st_maria_plugin i_s_innodb_sys_tablespaces;
extern struct st_maria_plugin i_s_innodb_sys_virtual;
extern struct st_maria_plugin i_s_innodb_tablespaces_encryption;
extern struct st_maria_plugin i_s_innodb_sys_semaphore_waits;
/** The latest successfully looked up innodb_fts_aux_table */
extern table_id_t innodb_ft_aux_table_id;
......@@ -99,47 +98,4 @@ do { \
#define STRUCT_FLD(name, value) value
#endif
/* Don't use a static const variable here, as some C++ compilers (notably
HPUX aCC: HP ANSI C++ B3910B A.03.65) can't handle it. */
#define END_OF_ST_FIELD_INFO \
{STRUCT_FLD(field_name, NULL), \
STRUCT_FLD(field_length, 0), \
STRUCT_FLD(field_type, MYSQL_TYPE_NULL), \
STRUCT_FLD(value, 0), \
STRUCT_FLD(field_flags, 0), \
STRUCT_FLD(old_name, ""), \
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}
/** Fields on INFORMATION_SCHEMA.SYS_SEMAMPHORE_WAITS table */
#define SYS_SEMAPHORE_WAITS_THREAD_ID 0
#define SYS_SEMAPHORE_WAITS_OBJECT_NAME 1
#define SYS_SEMAPHORE_WAITS_FILE 2
#define SYS_SEMAPHORE_WAITS_LINE 3
#define SYS_SEMAPHORE_WAITS_WAIT_TIME 4
#define SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5
#define SYS_SEMAPHORE_WAITS_WAIT_TYPE 6
#define SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7
#define SYS_SEMAPHORE_WAITS_HOLDER_FILE 8
#define SYS_SEMAPHORE_WAITS_HOLDER_LINE 9
#define SYS_SEMAPHORE_WAITS_CREATED_FILE 10
#define SYS_SEMAPHORE_WAITS_CREATED_LINE 11
#define SYS_SEMAPHORE_WAITS_WRITER_THREAD 12
#define SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13
#define SYS_SEMAPHORE_WAITS_READERS 14
#define SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15
#define SYS_SEMAPHORE_WAITS_LOCK_WORD 16
#define SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 17
#define SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 18
#define SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 19
/*******************************************************************//**
Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
@return 0 on success */
int
field_store_string(
/*===============*/
Field* field, /*!< in/out: target field for storage */
const char* str); /*!< in: NUL-terminated utf-8 string,
or NULL */
#endif /* i_s_h */
......@@ -30,8 +30,6 @@ Created 2013-03-26 Sunny Bains.
#define ib0mutex_h
#include "my_cpu.h"
#include "os0event.h"
#include "sync0arr.h"
/** OS mutex for tracking lock/unlock for debugging */
template <template <typename> class Policy>
......@@ -363,167 +361,6 @@ struct TTASMutex {
std::atomic<uint32_t> m_lock_word;
};
template <template <typename> class Policy>
struct TTASEventMutex {
typedef Policy<TTASEventMutex> MutexPolicy;
TTASEventMutex()
UNIV_NOTHROW
:
m_lock_word(MUTEX_STATE_UNLOCKED),
m_event()
{
/* Check that lock_word is aligned. */
ut_ad(!((ulint) &m_lock_word % sizeof(ulint)));
}
~TTASEventMutex()
UNIV_NOTHROW
{
ut_ad(state() == MUTEX_STATE_UNLOCKED);
}
/** Called when the mutex is "created". Note: Not from the constructor
but when the mutex is initialised.
@param[in] id Mutex ID */
void init(latch_id_t id, const char*, uint32_t) UNIV_NOTHROW
{
ut_a(m_event == 0);
ut_ad(state() == MUTEX_STATE_UNLOCKED);
m_event = os_event_create(sync_latch_get_name(id));
}
/** This is the real desctructor. This mutex can be created in BSS and
its desctructor will be called on exit(). We can't call
os_event_destroy() at that stage. */
void destroy()
UNIV_NOTHROW
{
ut_ad(state() == MUTEX_STATE_UNLOCKED);
/* We have to free the event before InnoDB shuts down. */
os_event_destroy(m_event);
m_event = 0;
}
/** Try and lock the mutex. Note: POSIX returns 0 on success.
@return true on success */
bool try_lock()
UNIV_NOTHROW
{
uint32_t oldval = MUTEX_STATE_UNLOCKED;
return m_lock_word.compare_exchange_strong(
oldval,
MUTEX_STATE_LOCKED,
std::memory_order_acquire,
std::memory_order_relaxed);
}
/** Release the mutex. */
void exit()
UNIV_NOTHROW
{
if (m_lock_word.exchange(MUTEX_STATE_UNLOCKED,
std::memory_order_release)
== MUTEX_STATE_WAITERS) {
os_event_set(m_event);
sync_array_object_signalled();
}
}
/** Acquire the mutex.
@param[in] max_spins max number of spins
@param[in] max_delay max delay per spin
@param[in] filename from where called
@param[in] line within filename */
void enter(
uint32_t max_spins,
uint32_t max_delay,
const char* filename,
uint32_t line)
UNIV_NOTHROW
{
uint32_t n_spins = 0;
uint32_t n_waits = 0;
const uint32_t step = max_spins;
while (!try_lock()) {
if (n_spins++ == max_spins) {
max_spins += step;
n_waits++;
os_thread_yield();
sync_cell_t* cell;
sync_array_t *sync_arr = sync_array_get_and_reserve_cell(
this,
filename, line, &cell);
uint32_t oldval = MUTEX_STATE_LOCKED;
m_lock_word.compare_exchange_strong(
oldval,
MUTEX_STATE_WAITERS,
std::memory_order_relaxed,
std::memory_order_relaxed);
if (oldval == MUTEX_STATE_UNLOCKED) {
sync_array_free_cell(sync_arr, cell);
} else {
sync_array_wait_event(sync_arr, cell);
}
} else {
ut_delay(max_delay);
}
}
m_policy.add(n_spins, n_waits);
}
/** @return the lock state. */
int32 state() const
UNIV_NOTHROW
{
return m_lock_word.load(std::memory_order_relaxed);
}
/** The event that the mutex will wait in sync0arr.cc
@return even instance */
os_event_t event()
UNIV_NOTHROW
{
return(m_event);
}
/** @return non-const version of the policy */
MutexPolicy& policy()
UNIV_NOTHROW
{
return(m_policy);
}
/** @return const version of the policy */
const MutexPolicy& policy() const
UNIV_NOTHROW
{
return(m_policy);
}
private:
/** Disable copying */
TTASEventMutex(const TTASEventMutex&);
TTASEventMutex& operator=(const TTASEventMutex&);
/** mutex state */
std::atomic<uint32_t> m_lock_word;
/** Used by sync0arr.cc for the wait queue */
os_event_t m_event;
/** Policy data */
MutexPolicy m_policy;
};
/** Mutex interface for all policy mutexes. This class handles the interfacing
with the Performance Schema instrumentation. */
template <typename MutexImpl>
......
/*****************************************************************************
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file include/os0event.h
The interface to the operating system condition variables
Created 2012-09-23 Sunny Bains (split from os0sync.h)
*******************************************************/
#ifndef os0event_h
#define os0event_h
#include "univ.i"
// Forward declaration.
struct os_event;
typedef struct os_event* os_event_t;
/** Denotes an infinite delay for os_event_wait_time() */
#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
/**
Creates an event semaphore, i.e., a semaphore which may just have two states:
signaled and nonsignaled. The created event is manual reset: it must be reset
explicitly by calling os_event_reset().
@return the event handle */
os_event_t os_event_create(const char*);
/**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
void
os_event_set(
/*=========*/
os_event_t event); /*!< in/out: event to set */
/**
Check if the event is set.
@return true if set */
bool
os_event_is_set(
/*============*/
const os_event_t event); /*!< in: event to set */
/**
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event.
The return value should be passed to os_even_wait_low() if it is desired
that this thread should not wait in case of an intervening call to
os_event_set() between this os_event_reset() and the
os_event_wait_low() call. See comments for os_event_wait_low(). */
int64_t
os_event_reset(
/*===========*/
os_event_t event); /*!< in/out: event to reset */
/**
Frees an event object. */
void
os_event_destroy(
/*=============*/
os_event_t& event); /*!< in/own: event to free */
/**
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE.
There are, however, situations (e.g.: sync_array code) where we may
lose this information. For example:
thread A calls os_event_reset()
thread B calls os_event_set() [event->is_set == TRUE]
thread C calls os_event_reset() [event->is_set == FALSE]
thread A calls os_event_wait() [infinite wait!]
thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by os_event_reset() should be passed in as
reset_sig_count. */
void
os_event_wait_low(
/*==============*/
os_event_t event, /*!< in/out: event to wait */
int64_t reset_sig_count);/*!< in: zero or the value
returned by previous call of
os_event_reset(). */
/** Blocking infinite wait on an event, until signealled.
@param e - event to wait on. */
#define os_event_wait(e) os_event_wait_low((e), 0)
/**
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
ulint
os_event_wait_time_low(
/*===================*/
os_event_t event, /*!< in/out: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
int64_t reset_sig_count); /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
/** Blocking timed wait on an event.
@param e - event to wait on.
@param t - timeout in microseconds */
#define os_event_wait_time(e, t) os_event_wait_time_low((e), (t), 0)
#endif /* !os0event_h */
......@@ -45,6 +45,7 @@ Created 10/10/1995 Heikki Tuuri
#include "que0types.h"
#include "trx0types.h"
#include "fil0fil.h"
#include "ut0counter.h"
#include "mysql/psi/mysql_stage.h"
#include "mysql/psi/psi.h"
......@@ -184,7 +185,7 @@ struct srv_stats_t
};
/** We are prepared for a situation that we have this many threads waiting for
a semaphore inside InnoDB. srv_start() sets the value. */
a transactional lock inside InnoDB. srv_start() sets the value. */
extern ulint srv_max_n_threads;
extern const char* srv_main_thread_op_info;
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file include/sync0arr.h
The wait array used in synchronization primitives
Created 9/5/1995 Heikki Tuuri
*******************************************************/
#ifndef sync0arr_h
#define sync0arr_h
#include "univ.i"
/** Synchronization wait array cell */
struct sync_cell_t;
/** Synchronization wait array */
struct sync_array_t;
/******************************************************************//**
Get an instance of the sync wait array and reserve a wait array cell
in the instance for waiting for an object. The event of the cell is
reset to nonsignalled state.
If reserving cell of the instance fails, try to get another new
instance until we can reserve an empty cell of it.
@return the sync array found, never NULL. */
UNIV_INLINE
sync_array_t*
sync_array_get_and_reserve_cell(
void* object, /*!< in: pointer to the object to wait for */
const char* file, /*!< in: file where requested */
unsigned line, /*!< in: line where requested */
sync_cell_t** cell); /*!< out: the cell reserved, never NULL */
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state. */
sync_cell_t*
sync_array_reserve_cell(
sync_array_t* arr, /*!< in: wait array */
void* object, /*!< in: pointer to the object to wait for */
const char* file, /*!< in: file where requested */
unsigned line); /*!< in: line where requested */
/******************************************************************//**
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
void
sync_array_wait_event(
sync_array_t* arr, /*!< in: wait array */
sync_cell_t*& cell); /*!< in: the reserved cell */
/******************************************************************//**
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
void
sync_array_free_cell(
sync_array_t* arr, /*!< in: wait array */
sync_cell_t*& cell); /*!< in: the reserved cell */
/** count of how many times an object has been signalled */
extern ulint sg_count;
#define sync_array_object_signalled() ++sg_count
/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */
ibool
sync_array_print_long_waits(
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema); /*!< out: longest-waited-for semaphore */
/**********************************************************************//**
Prints info of the wait array. */
void
sync_array_print(
FILE* file); /*!< in: file where to print */
/** Create the primary system wait arrays */
void sync_array_init();
/** Destroy the sync array wait sub-system. */
void sync_array_close();
/**********************************************************************//**
Get an instance of the sync wait array. */
UNIV_INLINE
sync_array_t*
sync_array_get();
/**********************************************************************//**
Prints info of the wait array without using any mutexes/semaphores. */
UNIV_INTERN
void
sync_array_print_innodb(void);
/*****************************************************************//**
Gets the nth cell in array.
@return cell */
UNIV_INTERN
sync_cell_t*
sync_array_get_nth_cell(
/*====================*/
sync_array_t* arr, /*!< in: sync array */
ulint n); /*!< in: index */
#include "sync0arr.ic"
#endif /* sync0arr_h */
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file include/sync0arr.ic
The wait array for synchronization primitives
Inline code
Created 9/5/1995 Heikki Tuuri
*******************************************************/
extern ulint sync_array_size;
extern sync_array_t** sync_wait_array;
#include "ut0counter.h"
/**********************************************************************//**
Get an instance of the sync wait array.
@return an instance of the sync wait array. */
UNIV_INLINE
sync_array_t*
sync_array_get()
/*============*/
{
if (sync_array_size <= 1) {
return(sync_wait_array[0]);
}
return(sync_wait_array[get_rnd_value() % sync_array_size]);
}
/******************************************************************//**
Get an instance of the sync wait array and reserve a wait array cell
in the instance for waiting for an object. The event of the cell is
reset to nonsignalled state.
If reserving cell of the instance fails, try to get another new
instance until we can reserve an empty cell of it.
@return the sync array reserved, never NULL. */
UNIV_INLINE
sync_array_t*
sync_array_get_and_reserve_cell(
void* object, /*!< in: pointer to the object to wait for */
const char* file, /*!< in: file where requested */
unsigned line, /*!< in: line where requested */
sync_cell_t** cell) /*!< out: the cell reserved, never NULL */
{
sync_array_t* sync_arr = NULL;
*cell = NULL;
for (ulint i = 0; i < sync_array_size && *cell == NULL; ++i) {
/* Although the sync_array is get in a random way currently,
we still try at most sync_array_size times, in case any
of the sync_array we get is full */
sync_arr = sync_array_get();
*cell = sync_array_reserve_cell(sync_arr, object, file, line);
}
/* This won't be true every time, for the loop above may execute
more than srv_sync_array_size times to reserve a cell.
But an assertion here makes the code more solid. */
ut_a(*cell != NULL);
return(sync_arr);
}
......@@ -178,8 +178,6 @@ LatchDebug internals in sync0debug.cc */
enum latch_level_t {
SYNC_UNKNOWN = 0,
SYNC_MUTEX = 1,
RW_LOCK_SX,
RW_LOCK_X_WAIT,
RW_LOCK_S,
......
......@@ -41,7 +41,6 @@ UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex);
UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex);
UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex);
UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex);
#ifdef MUTEX_FUTEX
/** The default mutex type. */
......@@ -50,9 +49,6 @@ typedef FutexMutex ib_mutex_t;
#elif defined(MUTEX_SYS)
typedef SysMutex ib_mutex_t;
#define MUTEX_TYPE "Uses system mutexes"
#elif defined(MUTEX_EVENT)
typedef SyncArrayMutex ib_mutex_t;
#define MUTEX_TYPE "Uses event mutexes"
#else
#error "ib_mutex_t type is unknown"
#endif /* MUTEX_FUTEX */
......
......@@ -868,7 +868,6 @@ constexpr const char* const auto_event_names[] =
"lexyy",
"lock0lock",
"mem0mem",
"os0event",
"os0file",
"pars0lex",
"rem0rec",
......@@ -879,7 +878,6 @@ constexpr const char* const auto_event_names[] =
"row0mysql",
"row0sel",
"srv0start",
"sync0arr",
"sync0debug",
"sync0start",
"sync0types",
......
......@@ -137,11 +137,9 @@ IF(HAVE_C99_INITIALIZERS)
ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
ENDIF()
SET(MUTEXTYPE "event" CACHE STRING "Mutex type: event, sys or futex")
SET(MUTEXTYPE "sys" CACHE STRING "Mutex type: sys or futex")
IF(MUTEXTYPE MATCHES "event")
ADD_DEFINITIONS(-DMUTEX_EVENT)
ELSEIF(MUTEXTYPE MATCHES "futex" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
IF(MUTEXTYPE MATCHES "futex" AND DEFINED HAVE_IB_LINUX_FUTEX)
ADD_DEFINITIONS(-DMUTEX_FUTEX)
ELSE()
ADD_DEFINITIONS(-DMUTEX_SYS)
......
/*****************************************************************************
Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file os/os0event.cc
The interface to the operating system condition variables.
Created 2012-09-23 Sunny Bains
*******************************************************/
#include "os0event.h"
#include "ut0mutex.h"
#include <my_sys.h>
#ifdef _WIN32
#include <windows.h>
#include <synchapi.h>
/** Native condition variable. */
typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native condition variable */
typedef pthread_cond_t os_cond_t;
#endif /* _WIN32 */
/** InnoDB condition variable. */
struct os_event {
os_event() UNIV_NOTHROW;
~os_event() UNIV_NOTHROW;
/**
Destroys a condition variable */
void destroy() UNIV_NOTHROW
{
#ifndef _WIN32
int ret = pthread_cond_destroy(&cond_var);
ut_a(ret == 0);
#endif /* !_WIN32 */
mutex.destroy();
}
/** Set the event */
void set() UNIV_NOTHROW
{
mutex.enter();
if (!m_set) {
broadcast();
}
mutex.exit();
}
int64_t reset() UNIV_NOTHROW
{
mutex.enter();
if (m_set) {
m_set = false;
}
int64_t ret = signal_count;
mutex.exit();
return(ret);
}
/**
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->m_set == true.
There are, however, situations (e.g.: sync_array code) where we may
lose this information. For example:
thread A calls os_event_reset()
thread B calls os_event_set() [event->m_set == true]
thread C calls os_event_reset() [event->m_set == false]
thread A calls os_event_wait() [infinite wait!]
thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by reset() should be passed in as
reset_sig_count. */
void wait_low(int64_t reset_sig_count) UNIV_NOTHROW;
/**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
@param time_in_usec - timeout in microseconds,
or OS_SYNC_INFINITE_TIME
@param reset_sig_count- zero or the value returned by
previous call of os_event_reset().
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
ulint wait_time_low(
ulint time_in_usec,
int64_t reset_sig_count) UNIV_NOTHROW;
/** @return true if the event is in the signalled state. */
bool is_set() const UNIV_NOTHROW
{
mutex.enter();
bool is_set = m_set;
mutex.exit();
return is_set;
}
private:
/**
Initialize a condition variable */
void init() UNIV_NOTHROW
{
mutex.init();
#ifdef _WIN32
InitializeConditionVariable(&cond_var);
#else
{
int ret;
ret = pthread_cond_init(&cond_var, NULL);
ut_a(ret == 0);
}
#endif /* _WIN32 */
}
/**
Wait on condition variable */
void wait() UNIV_NOTHROW
{
#ifdef _WIN32
if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) {
ut_error;
}
#else
{
int ret;
ret = pthread_cond_wait(&cond_var, mutex);
ut_a(ret == 0);
}
#endif /* _WIN32 */
}
/**
Wakes all threads waiting for condition variable */
void broadcast() UNIV_NOTHROW
{
m_set = true;
++signal_count;
#ifdef _WIN32
WakeAllConditionVariable(&cond_var);
#else
{
int ret;
ret = pthread_cond_broadcast(&cond_var);
ut_a(ret == 0);
}
#endif /* _WIN32 */
}
/**
Wakes one thread waiting for condition variable */
void signal() UNIV_NOTHROW
{
#ifdef _WIN32
WakeConditionVariable(&cond_var);
#else
{
int ret;
ret = pthread_cond_signal(&cond_var);
ut_a(ret == 0);
}
#endif /* _WIN32 */
}
/**
Do a timed wait on condition variable.
@param abstime - timeout
@param time_in_ms - timeout in milliseconds.
@return true if timed out, false otherwise */
bool timed_wait(
#ifndef _WIN32
const timespec* abstime
#else
DWORD time_in_ms
#endif /* !_WIN32 */
);
private:
bool m_set; /*!< this is true when the
event is in the signaled
state, i.e., a thread does
not stop if it tries to wait
for this event */
int64_t signal_count; /*!< this is incremented
each time the event becomes
signaled */
mutable OSMutex mutex; /*!< this mutex protects
the next fields */
os_cond_t cond_var; /*!< condition variable is
used in waiting for the event */
protected:
// Disable copying
os_event(const os_event&);
os_event& operator=(const os_event&);
};
/**
Do a timed wait on condition variable.
@param abstime - absolute time to wait
@param time_in_ms - timeout in milliseconds
@return true if timed out */
bool
os_event::timed_wait(
#ifndef _WIN32
const timespec* abstime
#else
DWORD time_in_ms
#endif /* !_WIN32 */
)
{
#ifdef _WIN32
BOOL ret;
ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms);
if (!ret) {
DWORD err = GetLastError();
/* FQDN=msdn.microsoft.com
@see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx,
"Condition variables are subject to spurious wakeups
(those not associated with an explicit wake) and stolen wakeups
(another thread manages to run before the woken thread)."
Check for both types of timeouts.
Conditions are checked by the caller.*/
if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) {
return(true);
}
}
ut_a(ret);
return(false);
#else
int ret;
ret = pthread_cond_timedwait(&cond_var, mutex, abstime);
switch (ret) {
case 0:
case ETIMEDOUT:
/* We play it safe by checking for EINTR even though
according to the POSIX documentation it can't return EINTR. */
case EINTR:
break;
default:
ib::error() << "pthread_cond_timedwait() returned: " << ret
<< ": abstime={" << abstime->tv_sec << ","
<< abstime->tv_nsec << "}";
ut_error;
}
return(ret == ETIMEDOUT);
#endif /* _WIN32 */
}
/**
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->m_set == true.
There are, however, situations (e.g.: sync_array code) where we may
lose this information. For example:
thread A calls os_event_reset()
thread B calls os_event_set() [event->m_set == true]
thread C calls os_event_reset() [event->m_set == false]
thread A calls os_event_wait() [infinite wait!]
thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by reset() should be passed in as
reset_sig_count. */
void
os_event::wait_low(
int64_t reset_sig_count) UNIV_NOTHROW
{
mutex.enter();
if (!reset_sig_count) {
reset_sig_count = signal_count;
}
while (!m_set && signal_count == reset_sig_count) {
wait();
/* Spurious wakeups may occur: we have to check if the
event really has been signaled after we came here to wait. */
}
mutex.exit();
}
/**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
@param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME
@param reset_sig_count - zero or the value returned by previous call
of os_event_reset().
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
ulint
os_event::wait_time_low(
ulint time_in_usec,
int64_t reset_sig_count) UNIV_NOTHROW
{
bool timed_out = false;
#ifdef _WIN32
DWORD time_in_ms;
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
time_in_ms = DWORD(time_in_usec / 1000);
} else {
time_in_ms = INFINITE;
}
#else
struct timespec abstime;
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
ulonglong usec = ulonglong(time_in_usec) + my_hrtime().val;
abstime.tv_sec = static_cast<time_t>(usec / 1000000);
abstime.tv_nsec = static_cast<uint>((usec % 1000000) * 1000);
} else {
abstime.tv_nsec = 999999999;
abstime.tv_sec = (time_t) ULINT_MAX;
}
ut_a(abstime.tv_nsec <= 999999999);
#endif /* _WIN32 */
mutex.enter();
if (!reset_sig_count) {
reset_sig_count = signal_count;
}
do {
if (m_set || signal_count != reset_sig_count) {
break;
}
#ifndef _WIN32
timed_out = timed_wait(&abstime);
#else
timed_out = timed_wait(time_in_ms);
#endif /* !_WIN32 */
} while (!timed_out);
mutex.exit();
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
}
/** Constructor */
os_event::os_event() UNIV_NOTHROW
{
init();
m_set = false;
/* We return this value in os_event_reset(),
which can then be be used to pass to the
os_event_wait_low(). The value of zero is
reserved in os_event_wait_low() for the case
when the caller does not want to pass any
signal_count value. To distinguish between
the two cases we initialize signal_count
to 1 here. */
signal_count = 1;
}
/** Destructor */
os_event::~os_event() UNIV_NOTHROW
{
destroy();
}
/**
Creates an event semaphore, i.e., a semaphore which may just have two
states: signaled and nonsignaled. The created event is manual reset: it
must be reset explicitly by calling sync_os_reset_event.
@return the event handle */
os_event_t os_event_create(const char*)
{
return(UT_NEW_NOKEY(os_event()));
}
/**
Check if the event is set.
@return true if set */
bool
os_event_is_set(
/*============*/
const os_event_t event) /*!< in: event to test */
{
return(event->is_set());
}
/**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
void
os_event_set(
/*=========*/
os_event_t event) /*!< in/out: event to set */
{
event->set();
}
/**
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event.
The return value should be passed to os_even_wait_low() if it is desired
that this thread should not wait in case of an intervening call to
os_event_set() between this os_event_reset() and the
os_event_wait_low() call. See comments for os_event_wait_low().
@return current signal_count. */
int64_t
os_event_reset(
/*===========*/
os_event_t event) /*!< in/out: event to reset */
{
return(event->reset());
}
/**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
ulint
os_event_wait_time_low(
/*===================*/
os_event_t event, /*!< in/out: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
int64_t reset_sig_count) /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{
return(event->wait_time_low(time_in_usec, reset_sig_count));
}
/**
Waits for an event object until it is in the signaled state.
Where such a scenario is possible, to avoid infinite wait, the
value returned by os_event_reset() should be passed in as
reset_sig_count. */
void
os_event_wait_low(
/*==============*/
os_event_t event, /*!< in: event to wait */
int64_t reset_sig_count) /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{
event->wait_low(reset_sig_count);
}
/**
Frees an event object. */
void
os_event_destroy(
/*=============*/
os_event_t& event) /*!< in/own: event to free */
{
UT_DELETE(event);
event = NULL;
}
......@@ -838,11 +838,11 @@ srv_printf_innodb_monitor(
"-----------------\n", file);
srv_print_master_thread_info(file);
/* This section is intentionally left blank, for tools like "innotop" */
fputs("----------\n"
"SEMAPHORES\n"
"----------\n", file);
sync_array_print(file);
/* End of intentionally blank section */
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
order level in sync0sync.h, while dict_foreign_err_mutex has a very
......@@ -1347,21 +1347,11 @@ static void srv_monitor()
srv_refresh_innodb_monitor_stats(current_time);
}
/*********************************************************************//**
A task which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
*/
/** Periodic task which prints the info output by various InnoDB monitors.*/
void srv_monitor_task(void*)
{
/* number of successive fatal timeouts observed */
static ulint fatal_cnt;
static lsn_t old_lsn = recv_sys.recovered_lsn;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter;
static os_thread_id_t old_waiter = os_thread_get_curr_id();
/* the semaphore that is being waited for */
const void* sema = NULL;
static const void* old_sema = NULL;
ut_ad(!srv_read_only_mode);
......@@ -1383,31 +1373,6 @@ void srv_monitor_task(void*)
eviction policy. */
buf_LRU_stat_update();
if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
if (UNIV_UNLIKELY(innodb_disallow_writes)) {
fprintf(stderr,
"WSREP: avoiding InnoDB self crash due to "
"long semaphore wait of > %lu seconds\n"
"Server is processing SST donor operation, "
"fatal_cnt now: " ULINTPF,
srv_fatal_semaphore_wait_threshold, fatal_cnt);
return;
}
#endif /* WITH_WSREP */
if (fatal_cnt++) {
ib::fatal() << "Semaphore wait has lasted > "
<< srv_fatal_semaphore_wait_threshold
<< " seconds. We intentionally crash the"
" server because it appears to be hung.";
}
} else {
fatal_cnt = 0;
old_waiter = waiter;
old_sema = sema;
}
srv_monitor();
}
......
......@@ -100,7 +100,7 @@ Created 2/16/1996 Heikki Tuuri
#include "ut0crc32.h"
/** We are prepared for a situation that we have this many threads waiting for
a semaphore inside InnoDB. srv_start() sets the value. */
a transactional lock inside InnoDB. srv_start() sets the value. */
ulint srv_max_n_threads;
/** Log sequence number at shutdown */
......@@ -1109,27 +1109,11 @@ dberr_t srv_start(bool create_new_db)
mysql_stage_register("innodb", srv_stages,
static_cast<int>(UT_ARR_SIZE(srv_stages)));
/* Set the maximum number of threads which can wait for a semaphore
inside InnoDB: this is the 'sync wait array' size */
srv_max_n_threads = 1 /* io_ibuf_thread */
+ 1 /* io_log_thread */
+ 1 /* srv_print_monitor_task */
+ 1 /* srv_purge_coordinator_thread */
+ 1 /* buf_dump_thread */
+ 1 /* dict_stats_thread */
srv_max_n_threads =
1 /* dict_stats_thread */
+ 1 /* fts_optimize_thread */
+ 1 /* trx_rollback_all_recovered */
+ 128 /* added as margin, for use of
InnoDB Memcached etc. */
+ 1/* buf_flush_page_cleaner */
+ max_connections
+ srv_n_read_io_threads
+ srv_n_write_io_threads
+ srv_n_purge_threads
/* FTS Parallel Sort */
+ fts_sort_pll_degree * FTS_NUM_AUX_INDEX
* max_connections;
+ 128 /* safety margin */
+ max_connections;
srv_boot();
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2013, 2020, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file sync/sync0arr.cc
The wait array used in synchronization primitives
Created 9/5/1995 Heikki Tuuri
*******************************************************/
#include "sync0arr.h"
#include <mysqld_error.h>
#include <mysql/plugin.h>
#include <hash.h>
#include <myisampack.h>
#include <sql_acl.h>
#include <mysys_err.h>
#include <my_sys.h>
#include "srv0srv.h"
#include "srv0start.h"
#include "i_s.h"
#include <sql_plugin.h>
#include <innodb_priv.h>
#include "lock0lock.h"
/*
WAIT ARRAY
==========
The wait array consists of cells each of which has an an event object created
for it. The threads waiting for a mutex, for example, can reserve a cell
in the array and suspend themselves to wait for the event to become signaled.
When using the wait array, remember to make sure that some thread holding
the synchronization object will eventually know that there is a waiter in
the array and signal the object, to prevent infinite wait. Why we chose
to implement a wait array? First, to make mutexes fast, we had to code
our own implementation of them, which only in usually uncommon cases
resorts to using slow operating system primitives. Then we had the choice of
assigning a unique OS event for each mutex, which would be simpler, or
using a global wait array. In some operating systems, the global wait
array solution is more efficient and flexible, because we can do with
a very small number of OS events, say 200. In NT 3.51, allocating events
seems to be a quadratic algorithm, because 10 000 events are created fast,
but 100 000 events takes a couple of minutes to create.
As of 5.0.30 the above mentioned design is changed. Since now OS can handle
millions of wait events efficiently, we no longer have this concept of each
cell of wait array having one event. Instead, now the event that a thread
wants to wait on is embedded in the wait object (mutex or rw_lock). We still
keep the global wait array for the sake of diagnostics and also to avoid
infinite wait The error_monitor thread scans the global wait array to signal
any waiting threads who have missed the signal. */
typedef TTASEventMutex<GenericPolicy> WaitMutex;
/** A cell where an individual thread may wait suspended until a resource
is released. The suspending is implemented using an operating system
event semaphore. */
struct sync_cell_t {
WaitMutex* mutex; /*!< pointer to the object the
thread is waiting for; if NULL
the cell is free for use */
const char* file; /*!< in debug version file where
requested */
ulint line; /*!< in debug version line where
requested, or ULINT_UNDEFINED */
os_thread_id_t thread_id; /*!< thread id of this waiting
thread */
bool waiting; /*!< TRUE if the thread has already
called sync_array_event_wait
on this cell */
int64_t signal_count; /*!< We capture the signal_count
of the mutex when we
reset the event. This value is
then passed on to os_event_wait
and we wait only if the event
has not been signalled in the
period between the reset and
wait call. */
/** time(NULL) when the wait cell was reserved.
FIXME: sync_array_print_long_waits_low() may display bogus
warnings when the system time is adjusted to the past! */
time_t reservation_time;
};
/* NOTE: It is allowed for a thread to wait for an event allocated for
the array without owning the protecting mutex (depending on the case:
OS or database mutex), but all changes (set or reset) to the state of
the event must be made while owning the mutex. */
/** Synchronization array */
struct sync_array_t {
/** Constructor
Creates a synchronization wait array. It is protected by a mutex
which is automatically reserved when the functions operating on it
are called.
@param[in] num_cells Number of cells to create */
sync_array_t(ulint num_cells)
UNIV_NOTHROW;
/** Destructor */
~sync_array_t()
UNIV_NOTHROW;
ulint n_reserved; /*!< number of currently reserved
cells in the wait array */
ulint n_cells; /*!< number of cells in the
wait array */
sync_cell_t* array; /*!< pointer to wait array */
SysMutex mutex; /*!< System mutex protecting the
data structure. As this data
structure is used in constructing
the database mutex, to prevent
infinite recursion in implementation,
we fall back to an OS mutex. */
ulint res_count; /*!< count of cell reservations
since creation of the array */
ulint next_free_slot; /*!< the next free cell in the array */
ulint first_free_slot;/*!< the last slot that was freed */
};
/** User configured sync array size */
ulong srv_sync_array_size = 1;
/** Locally stored copy of srv_sync_array_size */
ulint sync_array_size;
/** The global array of wait cells for implementation of the database's own
mutexes and read-write locks */
sync_array_t** sync_wait_array;
/** count of how many times an object has been signalled */
ulint sg_count;
#define sync_array_exit(a) mutex_exit(&(a)->mutex)
#define sync_array_enter(a) mutex_enter(&(a)->mutex)
#ifdef UNIV_DEBUG
/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
of one or more threads because of waits of semaphores.
@return TRUE if deadlock detected */
static
bool
sync_array_detect_deadlock(
/*=======================*/
sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
sync_cell_t* start, /*!< in: cell where recursive search started */
sync_cell_t* cell, /*!< in: cell to search */
ulint depth); /*!< in: recursion depth */
#endif /* UNIV_DEBUG */
/** Constructor
Creates a synchronization wait array. It is protected by a mutex
which is automatically reserved when the functions operating on it
are called.
@param[in] num_cells Number of cells to create */
sync_array_t::sync_array_t(ulint num_cells)
UNIV_NOTHROW
:
n_reserved(),
n_cells(num_cells),
array(UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells)),
mutex(),
res_count(),
next_free_slot(),
first_free_slot(ULINT_UNDEFINED)
{
ut_a(num_cells > 0);
memset(array, 0x0, sizeof(sync_cell_t) * n_cells);
/* Then create the mutex to protect the wait array */
mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex);
}
/** Validate the integrity of the wait array. Check
that the number of reserved cells equals the count variable.
@param[in,out] arr sync wait array */
static
void
sync_array_validate(sync_array_t* arr)
{
ulint i;
ulint count = 0;
sync_array_enter(arr);
for (i = 0; i < arr->n_cells; i++) {
sync_cell_t* cell;
cell = sync_array_get_nth_cell(arr, i);
if (cell->mutex) {
count++;
}
}
ut_a(count == arr->n_reserved);
sync_array_exit(arr);
}
/** Destructor */
sync_array_t::~sync_array_t()
UNIV_NOTHROW
{
ut_a(n_reserved == 0);
sync_array_validate(this);
/* Release the mutex protecting the wait array */
mutex_free(&mutex);
UT_DELETE_ARRAY(array);
}
/*****************************************************************//**
Gets the nth cell in array.
@return cell */
UNIV_INTERN
sync_cell_t*
sync_array_get_nth_cell(
/*====================*/
sync_array_t* arr, /*!< in: sync array */
ulint n) /*!< in: index */
{
ut_a(n < arr->n_cells);
return(arr->array + n);
}
/******************************************************************//**
Frees the resources in a wait array. */
static
void
sync_array_free(
/*============*/
sync_array_t* arr) /*!< in, own: sync wait array */
{
UT_DELETE(arr);
}
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state.
@return sync cell to wait on */
sync_cell_t*
sync_array_reserve_cell(
sync_array_t* arr, /*!< in: wait array */
void* object, /*!< in: pointer to the object to wait for */
const char* file, /*!< in: file where requested */
unsigned line) /*!< in: line where requested */
{
sync_cell_t* cell;
sync_array_enter(arr);
if (arr->first_free_slot != ULINT_UNDEFINED) {
/* Try and find a slot in the free list */
ut_ad(arr->first_free_slot < arr->next_free_slot);
cell = sync_array_get_nth_cell(arr, arr->first_free_slot);
arr->first_free_slot = cell->line;
} else if (arr->next_free_slot < arr->n_cells) {
/* Try and find a slot after the currently allocated slots */
cell = sync_array_get_nth_cell(arr, arr->next_free_slot);
++arr->next_free_slot;
} else {
sync_array_exit(arr);
// We should return NULL and if there is more than
// one sync array, try another sync array instance.
return(NULL);
}
++arr->res_count;
ut_ad(arr->n_reserved < arr->n_cells);
ut_ad(arr->next_free_slot <= arr->n_cells);
++arr->n_reserved;
/* Reserve the cell. */
ut_ad(!cell->mutex);
cell->mutex = static_cast<WaitMutex*>(object);
cell->waiting = false;
cell->file = file;
cell->line = line;
sync_array_exit(arr);
cell->thread_id = os_thread_get_curr_id();
cell->reservation_time = time(NULL);
/* Make sure the event is reset and also store the value of
signal_count at which the event was reset. */
cell->signal_count = os_event_reset(cell->mutex->event());
return(cell);
}
/******************************************************************//**
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
void
sync_array_free_cell(
/*=================*/
sync_array_t* arr, /*!< in: wait array */
sync_cell_t*& cell) /*!< in/out: the cell in the array */
{
sync_array_enter(arr);
ut_a(cell->mutex);
cell->waiting = false;
cell->signal_count = 0;
cell->mutex = NULL;
/* Setup the list of free slots in the array */
cell->line = arr->first_free_slot;
arr->first_free_slot = cell - arr->array;
ut_a(arr->n_reserved > 0);
arr->n_reserved--;
if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) {
#ifdef UNIV_DEBUG
for (ulint i = 0; i < arr->next_free_slot; ++i) {
cell = sync_array_get_nth_cell(arr, i);
ut_ad(!cell->waiting);
ut_ad(!cell->mutex);
ut_ad(cell->signal_count == 0);
}
#endif /* UNIV_DEBUG */
arr->next_free_slot = 0;
arr->first_free_slot = ULINT_UNDEFINED;
}
sync_array_exit(arr);
cell = 0;
}
/******************************************************************//**
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
void
sync_array_wait_event(
/*==================*/
sync_array_t* arr, /*!< in: wait array */
sync_cell_t*& cell) /*!< in: index of the reserved cell */
{
sync_array_enter(arr);
ut_ad(!cell->waiting);
ut_ad(cell->mutex);
ut_ad(os_thread_get_curr_id() == cell->thread_id);
cell->waiting = true;
#ifdef UNIV_DEBUG
if (sync_array_detect_deadlock(arr, cell, cell, 0)) {
ib::fatal() << "########################################"
" Deadlock Detected!";
}
#endif /* UNIV_DEBUG */
sync_array_exit(arr);
tpool::tpool_wait_begin();
os_event_wait_low(cell->mutex->event(), cell->signal_count);
tpool::tpool_wait_end();
sync_array_free_cell(arr, cell);
cell = 0;
}
/******************************************************************//**
Reports info of a wait array cell. */
static
void
sync_array_cell_print(
/*==================*/
FILE* file, /*!< in: file where to print */
sync_cell_t* cell) /*!< in: sync cell */
{
fprintf(file,
"--Thread " ULINTPF " has waited at %s line " ULINTPF
" for %.2f seconds the semaphore:\n",
ulint(cell->thread_id),
innobase_basename(cell->file), cell->line,
difftime(time(NULL), cell->reservation_time));
WaitMutex* mutex = cell->mutex;
const WaitMutex::MutexPolicy& policy = mutex->policy();
#ifdef UNIV_DEBUG
const char* name = policy.context.get_enter_filename();
if (name == NULL) {
/* The mutex might have been released. */
name = "NULL";
}
#endif /* UNIV_DEBUG */
fprintf(file,
"Mutex at %p, %s, lock var %x\n"
#ifdef UNIV_DEBUG
"Last time reserved in file %s line %u"
#endif /* UNIV_DEBUG */
"\n",
(void*) mutex,
policy.to_string().c_str(),
mutex->state()
#ifdef UNIV_DEBUG
,name, policy.context.get_enter_line()
#endif /* UNIV_DEBUG */
);
if (!cell->waiting) {
fputs("wait has ended\n", file);
}
}
#ifdef UNIV_DEBUG
/******************************************************************//**
Looks for a cell with the given thread id.
@return pointer to cell or NULL if not found */
static
sync_cell_t*
sync_array_find_thread(
/*===================*/
sync_array_t* arr, /*!< in: wait array */
os_thread_id_t thread) /*!< in: thread id */
{
ulint i;
for (i = 0; i < arr->n_cells; i++) {
sync_cell_t* cell;
cell = sync_array_get_nth_cell(arr, i);
if (cell->mutex
&& os_thread_eq(cell->thread_id, thread)) {
return(cell); /* Found */
}
}
return(NULL); /* Not found */
}
/******************************************************************//**
Recursion step for deadlock detection.
@return TRUE if deadlock detected */
static
ibool
sync_array_deadlock_step(
/*=====================*/
sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
sync_cell_t* start, /*!< in: cell where recursive search
started */
os_thread_id_t thread, /*!< in: thread to look at */
ulint pass, /*!< in: pass value */
ulint depth) /*!< in: recursion depth */
{
sync_cell_t* new_cell;
if (pass != 0) {
/* If pass != 0, then we do not know which threads are
responsible of releasing the lock, and no deadlock can
be detected. */
return(FALSE);
}
new_cell = sync_array_find_thread(arr, thread);
if (new_cell == start) {
/* Deadlock */
fputs("########################################\n"
"DEADLOCK of threads detected!\n", stderr);
return(TRUE);
} else if (new_cell) {
return(sync_array_detect_deadlock(
arr, start, new_cell, depth + 1));
}
return(FALSE);
}
/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
of one or more threads because of waits of semaphores.
@return TRUE if deadlock detected */
static
bool
sync_array_detect_deadlock(
/*=======================*/
sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
sync_cell_t* start, /*!< in: cell where recursive search started */
sync_cell_t* cell, /*!< in: cell to search */
ulint depth) /*!< in: recursion depth */
{
os_thread_id_t thread;
ibool ret;
ut_a(arr);
ut_a(start);
ut_a(cell);
ut_ad(cell->mutex);
ut_ad(os_thread_get_curr_id() == start->thread_id);
ut_ad(depth < 100);
depth++;
if (!cell->waiting) {
/* No deadlock here */
return(false);
}
WaitMutex* mutex = cell->mutex;
const WaitMutex::MutexPolicy& policy = mutex->policy();
if (mutex->state() != MUTEX_STATE_UNLOCKED) {
thread = policy.context.get_thread_id();
/* Note that mutex->thread_id above may be
also OS_THREAD_ID_UNDEFINED, because the
thread which held the mutex maybe has not
yet updated the value, or it has already
released the mutex: in this case no deadlock
can occur, as the wait array cannot contain
a thread with ID_UNDEFINED value. */
ret = sync_array_deadlock_step(
arr, start, thread, 0, depth);
if (ret) {
const char* name;
name = policy.context.get_enter_filename();
if (name == NULL) {
/* The mutex might have been
released. */
name = "NULL";
}
ib::info()
<< "Mutex " << mutex << " owned by"
" thread " << thread
<< " file " << name << " line "
<< policy.context.get_enter_line();
sync_array_cell_print(stderr, cell);
return(true);
}
}
/* No deadlock */
return(false);
}
#endif /* UNIV_DEBUG */
/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */
static
bool
sync_array_print_long_waits_low(
/*============================*/
sync_array_t* arr, /*!< in: sync array instance */
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema, /*!< out: longest-waited-for semaphore */
ibool* noticed)/*!< out: TRUE if long wait noticed */
{
double fatal_timeout = static_cast<double>(
srv_fatal_semaphore_wait_threshold);
ibool fatal = FALSE;
double longest_diff = 0;
ulint i;
/* For huge tables, skip the check during CHECK TABLE etc... */
if (btr_validate_index_running) {
return(false);
}
#if defined HAVE_valgrind && !__has_feature(memory_sanitizer)
/* Increase the timeouts if running under valgrind because it executes
extremely slowly. HAVE_valgrind does not necessary mean that
we are running under valgrind but we have no better way to tell.
See Bug#58432 innodb.innodb_bug56143 fails under valgrind
for an example */
# define SYNC_ARRAY_TIMEOUT 2400
fatal_timeout *= 10;
#else
# define SYNC_ARRAY_TIMEOUT 240
#endif
const time_t now = time(NULL);
for (ulint i = 0; i < arr->n_cells; i++) {
sync_cell_t* cell = sync_array_get_nth_cell(arr, i);
if (!cell->mutex || !cell->waiting) {
continue;
}
double diff = difftime(now, cell->reservation_time);
if (diff > SYNC_ARRAY_TIMEOUT) {
ib::warn() << "A long semaphore wait:";
sync_array_cell_print(stderr, cell);
*noticed = TRUE;
}
if (diff > fatal_timeout) {
fatal = TRUE;
}
if (diff > longest_diff) {
longest_diff = diff;
*sema = cell->mutex;
*waiter = cell->thread_id;
}
}
/* We found a long semaphore wait, print all threads that are
waiting for a semaphore. */
if (*noticed) {
for (i = 0; i < arr->n_cells; i++) {
sync_cell_t* cell = sync_array_get_nth_cell(arr, i);
if (!cell->mutex || !cell->waiting) {
continue;
}
ib::info() << "A semaphore wait:";
sync_array_cell_print(stderr, cell);
}
}
#undef SYNC_ARRAY_TIMEOUT
return(fatal);
}
/**********************************************************************//**
Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */
ibool
sync_array_print_long_waits(
/*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema) /*!< out: longest-waited-for semaphore */
{
ulint i;
ibool fatal = FALSE;
ibool noticed = FALSE;
for (i = 0; i < sync_array_size; ++i) {
sync_array_t* arr = sync_wait_array[i];
sync_array_enter(arr);
if (sync_array_print_long_waits_low(
arr, waiter, sema, &noticed)) {
fatal = TRUE;
}
sync_array_exit(arr);
}
if (noticed) {
/* If some crucial semaphore is reserved, then also the InnoDB
Monitor can hang, and we do not get diagnostics. Since in
many cases an InnoDB hang is caused by a pwrite() or a pread()
call hanging inside the operating system, let us print right
now the values of pending calls of these. */
fprintf(stderr,
"InnoDB: Pending reads " UINT64PF
", writes " UINT64PF "\n",
MONITOR_VALUE(MONITOR_OS_PENDING_READS),
MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
lock_wait_timeout_task(nullptr);
}
return(fatal);
}
/**********************************************************************//**
Prints info of the wait array. */
static
void
sync_array_print_info_low(
/*======================*/
FILE* file, /*!< in: file where to print */
sync_array_t* arr) /*!< in: wait array */
{
ulint i;
ulint count = 0;
fprintf(file,
"OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
arr->res_count);
for (i = 0; count < arr->n_reserved; ++i) {
sync_cell_t* cell = sync_array_get_nth_cell(arr, i);
if (cell->mutex) {
count++;
sync_array_cell_print(file, cell);
}
}
}
/**********************************************************************//**
Prints info of the wait array. */
static
void
sync_array_print_info(
/*==================*/
FILE* file, /*!< in: file where to print */
sync_array_t* arr) /*!< in: wait array */
{
sync_array_enter(arr);
sync_array_print_info_low(file, arr);
sync_array_exit(arr);
}
/** Create the primary system wait arrays */
void sync_array_init()
{
ut_a(sync_wait_array == NULL);
ut_a(srv_sync_array_size > 0);
ut_a(srv_max_n_threads > 0);
sync_array_size = srv_sync_array_size;
sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size);
ulint n_slots = 1 + (srv_max_n_threads - 1) / sync_array_size;
for (ulint i = 0; i < sync_array_size; ++i) {
sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots));
}
}
/** Destroy the sync array wait sub-system. */
void sync_array_close()
{
for (ulint i = 0; i < sync_array_size; ++i) {
sync_array_free(sync_wait_array[i]);
}
UT_DELETE_ARRAY(sync_wait_array);
sync_wait_array = NULL;
}
/**********************************************************************//**
Print info about the sync array(s). */
void
sync_array_print(
/*=============*/
FILE* file) /*!< in/out: Print to this stream */
{
for (ulint i = 0; i < sync_array_size; ++i) {
sync_array_print_info(file, sync_wait_array[i]);
}
fprintf(file,
"OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
}
/**********************************************************************//**
Prints info of the wait array without using any mutexes/semaphores. */
UNIV_INTERN
void
sync_array_print_innodb(void)
/*=========================*/
{
ulint i;
sync_array_t* arr = sync_array_get();
fputs("InnoDB: Semaphore wait debug output started for InnoDB:\n", stderr);
for (i = 0; i < arr->n_cells; i++) {
sync_cell_t* cell = sync_array_get_nth_cell(arr, i);
if (!cell->mutex || !cell->waiting) {
continue;
}
fputs("InnoDB: Warning: semaphore wait:\n",
stderr);
sync_array_cell_print(stderr, cell);
}
fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
}
/**********************************************************************//**
Get number of items on sync array. */
UNIV_INTERN
ulint
sync_arr_get_n_items(void)
/*======================*/
{
sync_array_t* sync_arr = sync_array_get();
return (ulint) sync_arr->n_cells;
}
/******************************************************************//**
Get specified item from sync array if it is reserved. Set given
pointer to array item if it is reserved.
@return true if item is reserved, false othervise */
UNIV_INTERN
ibool
sync_arr_get_item(
/*==============*/
ulint i, /*!< in: requested item */
sync_cell_t **cell) /*!< out: cell contents if item
reserved */
{
sync_array_t* sync_arr;
sync_cell_t* wait_cell;
void* wait_object;
ibool found = FALSE;
sync_arr = sync_array_get();
wait_cell = sync_array_get_nth_cell(sync_arr, i);
if (wait_cell) {
wait_object = wait_cell->mutex;
if(wait_object != NULL && wait_cell->waiting) {
found = TRUE;
*cell = wait_cell;
}
}
return found;
}
/*******************************************************************//**
Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
Loop through each item on sync array, and extract the column
information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
@return 0 on success */
UNIV_INTERN
int
sync_arr_fill_sys_semphore_waits_table(
/*===================================*/
THD* thd, /*!< in: thread */
TABLE_LIST* tables, /*!< in/out: tables to fill */
Item* ) /*!< in: condition (not used) */
{
Field** fields;
ulint n_items;
DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str);
/* deny access to user without PROCESS_ACL privilege */
if (check_global_access(thd, PROCESS_ACL)) {
DBUG_RETURN(0);
}
fields = tables->table->field;
n_items = sync_arr_get_n_items();
for(ulint i=0; i < n_items;i++) {
sync_cell_t *cell=NULL;
if (sync_arr_get_item(i, &cell)) {
/* JAN: FIXME
OK(fields[SYS_SEMAPHORE_WAITS_THREAD_ID]->store(,
ulint(cell->thread), true));
*/
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
OK(fields[SYS_SEMAPHORE_WAITS_LINE]->store(cell->line, true));
fields[SYS_SEMAPHORE_WAITS_LINE]->set_notnull();
OK(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME]->store(
difftime(time(NULL),
cell->reservation_time)));
if (WaitMutex* mutex = cell->mutex) {
// JAN: FIXME
// OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
OK(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT]->store((longlong)mutex, true));
OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID]->store(mutex->thread_id, true));
//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
//OK(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->store(mutex->line, true));
//fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE]->set_notnull();
//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
//OK(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->store(mutex->cline, true));
//fields[SYS_SEMAPHORE_WAITS_CREATED_LINE]->set_notnull();
//OK(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG]->store(mutex->waiters, true));
//OK(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD]->store(mutex->lock_word, true));
//OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
//OK(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->store(mutex->line, true));
//fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE]->set_notnull();
//OK(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT]->store(mutex->count_os_wait, true));
}
OK(schema_table_store_record(thd, tables->table));
}
}
DBUG_RETURN(0);
}
......@@ -388,7 +388,6 @@ LatchDebug::LatchDebug()
m_mutex.init();
LEVEL_MAP_INSERT(SYNC_UNKNOWN);
LEVEL_MAP_INSERT(SYNC_MUTEX);
LEVEL_MAP_INSERT(RW_LOCK_SX);
LEVEL_MAP_INSERT(RW_LOCK_X_WAIT);
LEVEL_MAP_INSERT(RW_LOCK_S);
......@@ -668,7 +667,6 @@ LatchDebug::check_order(
basic_check(latches, level, SYNC_DICT);
break;
case SYNC_MUTEX:
case SYNC_UNKNOWN:
case RW_LOCK_X:
case RW_LOCK_X_WAIT:
......@@ -1030,8 +1028,6 @@ sync_check_init()
sync_latch_meta_init();
sync_array_init();
ut_d(sync_check_enable());
}
......@@ -1040,8 +1036,5 @@ void
sync_check_close()
{
ut_d(LatchDebug::shutdown());
sync_array_close();
sync_latch_meta_destroy();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment