/* Copyright (C) 2008-2009 Sun Microsystems, Inc This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** @file storage/perfschema/table_events_waits.cc Table EVENTS_WAITS_xxx (implementation). */ #include "mysql_priv.h" #include "table_events_waits.h" #include "pfs_instr_class.h" #include "pfs_instr.h" #include "pfs_events_waits.h" THR_LOCK table_events_waits_current::m_table_lock; static const TABLE_FIELD_TYPE field_types[]= { { { C_STRING_WITH_LEN("THREAD_ID") }, { C_STRING_WITH_LEN("int(11)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("EVENT_ID") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("EVENT_NAME") }, { C_STRING_WITH_LEN("varchar(128)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("SOURCE") }, { C_STRING_WITH_LEN("varchar(64)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("TIMER_START") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("TIMER_END") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("TIMER_WAIT") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("SPINS") }, { C_STRING_WITH_LEN("int(10)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("OBJECT_SCHEMA") }, { C_STRING_WITH_LEN("varchar(64)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("OBJECT_NAME") }, { C_STRING_WITH_LEN("varchar(512)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("OBJECT_TYPE") }, { C_STRING_WITH_LEN("varchar(64)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("OBJECT_INSTANCE_BEGIN") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("NESTING_EVENT_ID") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("OPERATION") }, { C_STRING_WITH_LEN("varchar(16)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("NUMBER_OF_BYTES") }, { C_STRING_WITH_LEN("bigint(20)") }, { NULL, 0} }, { { C_STRING_WITH_LEN("FLAGS") }, { C_STRING_WITH_LEN("int(10)") }, { NULL, 0} } }; TABLE_FIELD_DEF table_events_waits_current::m_field_def= { 16, field_types }; PFS_engine_table_share table_events_waits_current::m_share= { { C_STRING_WITH_LEN("EVENTS_WAITS_CURRENT") }, &pfs_truncatable_acl, &table_events_waits_current::create, NULL, /* write_row */ &table_events_waits_current::delete_all_rows, 1000, /* records */ sizeof(pos_events_waits_current), /* ref length */ &m_table_lock, &m_field_def, false /* checked */ }; THR_LOCK table_events_waits_history::m_table_lock; PFS_engine_table_share table_events_waits_history::m_share= { { C_STRING_WITH_LEN("EVENTS_WAITS_HISTORY") }, &pfs_truncatable_acl, &table_events_waits_history::create, NULL, /* write_row */ &table_events_waits_history::delete_all_rows, 1000, /* records */ sizeof(pos_events_waits_history), /* ref length */ &m_table_lock, &table_events_waits_current::m_field_def, false /* checked */ }; THR_LOCK table_events_waits_history_long::m_table_lock; PFS_engine_table_share table_events_waits_history_long::m_share= { { C_STRING_WITH_LEN("EVENTS_WAITS_HISTORY_LONG") }, &pfs_truncatable_acl, &table_events_waits_history_long::create, NULL, /* write_row */ &table_events_waits_history_long::delete_all_rows, 10000, /* records */ sizeof(PFS_simple_index), /* ref length */ &m_table_lock, &table_events_waits_current::m_field_def, false /* checked */ }; table_events_waits_common::table_events_waits_common (const PFS_engine_table_share *share, void *pos) : PFS_readonly_table(share, pos), m_row_exists(false) {} void table_events_waits_common::clear_object_columns() { m_row.m_object_type= NULL; m_row.m_object_type_length= 0; m_row.m_object_schema_length= 0; m_row.m_object_name_length= 0; } /** Build a row. @param thread_own_wait True if the memory for the wait is owned by pfs_thread @param pfs_thread the thread the cursor is reading @param wait the wait the cursor is reading */ void table_events_waits_common::make_row(bool thread_own_wait, PFS_thread *pfs_thread, PFS_events_waits *wait) { pfs_lock lock; PFS_thread *safe_thread; PFS_instr_class *safe_class; const char *base; const char *safe_source_file; m_row_exists= false; safe_thread= sanitize_thread(pfs_thread); if (unlikely(safe_thread == NULL)) return; /* Protect this reader against a thread termination */ if (thread_own_wait) safe_thread->m_lock.begin_optimistic_lock(&lock); /* Design choice: We could have used a pfs_lock in PFS_events_waits here, to protect the reader from concurrent event generation, but this leads to too many pfs_lock atomic operations each time an event is recorded: - 1 dirty() + 1 allocated() per event start, for EVENTS_WAITS_CURRENT - 1 dirty() + 1 allocated() per event end, for EVENTS_WAITS_CURRENT - 1 dirty() + 1 allocated() per copy to EVENTS_WAITS_HISTORY - 1 dirty() + 1 allocated() per copy to EVENTS_WAITS_HISTORY_LONG or 8 atomics per recorded event. The problem is that we record a *lot* of events ... Instead, a *dirty* marking is done using m_wait_class. Using m_wait_class alone does not guarantee anything, it just filters out most of the bad data. This is acceptable because this code is garbage-proof, and won't crash on bad data, only display it, very rarely (which is accepted). If a bad record is displayed, it's a very transient failure: the next select * from EVENTS_WAITS_CURRENT/_HISTORY/_HISTORY_LONG will show clean data again. */ m_row.m_thread_internal_id= safe_thread->m_thread_internal_id; m_row.m_event_id= wait->m_event_id; m_row.m_timer_state= wait->m_timer_state; m_row.m_timer_start= wait->m_timer_start; m_row.m_timer_end= wait->m_timer_end; m_row.m_object_instance_addr= (intptr) wait->m_object_instance_addr; /* PFS_events_waits::m_class needs to be sanitized, for race conditions when this code: - reads a new value in m_wait_class, - reads an old value in m_class. */ switch (wait->m_wait_class) { case WAIT_CLASS_MUTEX: clear_object_columns(); safe_class= sanitize_mutex_class((PFS_mutex_class*) wait->m_class); break; case WAIT_CLASS_RWLOCK: clear_object_columns(); safe_class= sanitize_rwlock_class((PFS_rwlock_class*) wait->m_class); break; case WAIT_CLASS_COND: clear_object_columns(); safe_class= sanitize_cond_class((PFS_cond_class*) wait->m_class); break; case WAIT_CLASS_TABLE: m_row.m_object_type= "TABLE"; m_row.m_object_type_length= 5; memcpy(m_row.m_object_schema, wait->m_schema_name, wait->m_schema_name_length); m_row.m_object_schema_length= wait->m_schema_name_length; memcpy(m_row.m_object_name, wait->m_object_name, wait->m_object_name_length); m_row.m_object_name_length= wait->m_object_name_length; safe_class= &global_table_class; break; case WAIT_CLASS_FILE: m_row.m_object_type= "FILE"; m_row.m_object_type_length= 4; m_row.m_object_schema_length= 0; memcpy(m_row.m_object_name, wait->m_object_name, wait->m_object_name_length); m_row.m_object_name_length= wait->m_object_name_length; safe_class= sanitize_file_class((PFS_file_class*) wait->m_class); break; case NO_WAIT_CLASS: default: return; } if (unlikely(safe_class == NULL)) return; m_row.m_name= safe_class->m_name; m_row.m_name_length= safe_class->m_name_length; /* We are assuming this pointer is sane, since it comes from __FILE__. */ safe_source_file= wait->m_source_file; if (unlikely(safe_source_file == NULL)) return; base= base_name(wait->m_source_file); m_row.m_source_length= my_snprintf(m_row.m_source, sizeof(m_row.m_source), "%s:%d", base, wait->m_source_line); if (m_row.m_source_length > sizeof(m_row.m_source)) m_row.m_source_length= sizeof(m_row.m_source); m_row.m_operation= wait->m_operation; m_row.m_number_of_bytes= wait->m_number_of_bytes; m_row.m_flags= 0; if (thread_own_wait) { if (safe_thread->m_lock.end_optimistic_lock(&lock)) m_row_exists= true; } else { /* For EVENTS_WAITS_HISTORY_LONG (thread_own_wait is false), the wait record is always valid, because it is not stored in memory owned by pfs_thread. Even when the thread terminated, the record is mostly readable, so this record is displayed. */ m_row_exists= true; } } /** Operations names map, as displayed in the 'OPERATION' column. Indexed by enum_operation_type - 1. Note: enum_operation_type contains a more precise definition, since more details are needed internally by the instrumentation. Different similar operations (CLOSE vs STREAMCLOSE) are displayed with the same name 'close'. */ static const LEX_STRING operation_names_map[]= { /* Mutex operations */ { C_STRING_WITH_LEN("lock") }, { C_STRING_WITH_LEN("try_lock") }, /* RWLock operations */ { C_STRING_WITH_LEN("read_lock") }, { C_STRING_WITH_LEN("write_lock") }, { C_STRING_WITH_LEN("try_read_lock") }, { C_STRING_WITH_LEN("try_write_lock") }, /* Condition operations */ { C_STRING_WITH_LEN("wait") }, { C_STRING_WITH_LEN("timed_wait") }, /* File operations */ { C_STRING_WITH_LEN("create") }, { C_STRING_WITH_LEN("create") }, /* create tmp */ { C_STRING_WITH_LEN("open") }, { C_STRING_WITH_LEN("open") }, /* stream open */ { C_STRING_WITH_LEN("close") }, { C_STRING_WITH_LEN("close") }, /* stream close */ { C_STRING_WITH_LEN("read") }, { C_STRING_WITH_LEN("write") }, { C_STRING_WITH_LEN("seek") }, { C_STRING_WITH_LEN("tell") }, { C_STRING_WITH_LEN("flush") }, { C_STRING_WITH_LEN("stat") }, { C_STRING_WITH_LEN("stat") }, /* fstat */ { C_STRING_WITH_LEN("chsize") }, { C_STRING_WITH_LEN("delete") }, { C_STRING_WITH_LEN("rename") }, { C_STRING_WITH_LEN("sync") } }; int table_events_waits_common::read_row_values(TABLE *table, unsigned char *buf, Field **fields, bool read_all) { Field *f; const LEX_STRING *operation; compile_time_assert(COUNT_OPERATION_TYPE == array_elements(operation_names_map)); if (unlikely(! m_row_exists)) return HA_ERR_RECORD_DELETED; /* Set the null bits */ DBUG_ASSERT(table->s->null_bytes == 2); buf[0]= 0; buf[1]= 0; /* Some columns are unreliable, because they are joined with other buffers, which could have changed and been reused for something else. These columns are: - THREAD_ID (m_thread joins with PFS_thread), - SCHEMA_NAME (m_schema_name joins with PFS_table_share) - OBJECT_NAME (m_object_name joins with PFS_table_share) */ for (; (f= *fields) ; fields++) { if (read_all || bitmap_is_set(table->read_set, f->field_index)) { switch(f->field_index) { case 0: /* THREAD_ID */ set_field_ulong(f, m_row.m_thread_internal_id); break; case 1: /* EVENT_ID */ set_field_ulonglong(f, m_row.m_event_id); break; case 2: /* EVENT_NAME */ set_field_varchar_utf8(f, m_row.m_name, m_row.m_name_length); break; case 3: /* SOURCE */ set_field_varchar_utf8(f, m_row.m_source, m_row.m_source_length); break; case 4: /* TIMER_START */ if ((m_row.m_timer_state == TIMER_STATE_STARTED) || (m_row.m_timer_state == TIMER_STATE_TIMED)) set_field_ulonglong(f, m_row.m_timer_start); else f->set_null(); break; case 5: /* TIMER_END */ if (m_row.m_timer_state == TIMER_STATE_TIMED) set_field_ulonglong(f, m_row.m_timer_end); else f->set_null(); break; case 6: /* TIMER_WAIT */ if (m_row.m_timer_state == TIMER_STATE_TIMED) set_field_ulonglong(f, m_row.m_timer_end - m_row.m_timer_start); else f->set_null(); break; case 7: /* SPINS */ f->set_null(); break; case 8: /* OBJECT_SCHEMA */ if (m_row.m_object_schema_length > 0) { set_field_varchar_utf8(f, m_row.m_object_schema, m_row.m_object_schema_length); } else f->set_null(); break; case 9: /* OBJECT_NAME */ if (m_row.m_object_name_length > 0) { set_field_varchar_utf8(f, m_row.m_object_name, m_row.m_object_name_length); } else f->set_null(); break; case 10: /* OBJECT_TYPE */ if (m_row.m_object_type) { set_field_varchar_utf8(f, m_row.m_object_type, m_row.m_object_type_length); } else f->set_null(); break; case 11: /* OBJECT_INSTANCE */ set_field_ulonglong(f, m_row.m_object_instance_addr); break; case 12: /* NESTING_EVENT_ID */ f->set_null(); break; case 13: /* OPERATION */ operation= &operation_names_map[(int) m_row.m_operation - 1]; set_field_varchar_utf8(f, operation->str, operation->length); break; case 14: /* NUMBER_OF_BYTES */ if ((m_row.m_operation == OPERATION_TYPE_FILEREAD) || (m_row.m_operation == OPERATION_TYPE_FILEWRITE) || (m_row.m_operation == OPERATION_TYPE_FILECHSIZE)) set_field_ulonglong(f, m_row.m_number_of_bytes); else f->set_null(); break; case 15: /* FLAGS */ set_field_ulong(f, m_row.m_flags); break; default: DBUG_ASSERT(false); } } } return 0; } PFS_engine_table* table_events_waits_current::create(void) { return new table_events_waits_current(); } table_events_waits_current::table_events_waits_current() : table_events_waits_common(&m_share, &m_pos), m_pos(), m_next_pos() {} void table_events_waits_current::reset_position(void) { m_pos.reset(); m_next_pos.reset(); } int table_events_waits_current::rnd_next(void) { PFS_thread *pfs_thread; PFS_events_waits *wait; for (m_pos.set_at(&m_next_pos); m_pos.m_index_1 < thread_max; m_pos.next_thread()) { pfs_thread= &thread_array[m_pos.m_index_1]; if (! pfs_thread->m_lock.is_populated()) { /* This thread does not exist */ continue; } /* We do not show nested events for now, this will be revised with TABLE io */ #define ONLY_SHOW_ONE_WAIT #ifdef ONLY_SHOW_ONE_WAIT if (m_pos.m_index_2 >= 1) continue; #else if (m_pos.m_index_2 >= pfs_thread->m_wait_locker_count) continue; #endif wait= &pfs_thread->m_wait_locker_stack[m_pos.m_index_2].m_waits_current; if (wait->m_wait_class == NO_WAIT_CLASS) { /* This locker does not exist. There can not be more lockers in the stack, skip to the next thread */ continue; } make_row(true, pfs_thread, wait); /* Next iteration, look for the next locker in this thread */ m_next_pos.set_after(&m_pos); return 0; } return HA_ERR_END_OF_FILE; } int table_events_waits_current::rnd_pos(const void *pos) { PFS_thread *pfs_thread; PFS_events_waits *wait; set_position(pos); DBUG_ASSERT(m_pos.m_index_1 < thread_max); pfs_thread= &thread_array[m_pos.m_index_1]; if (! pfs_thread->m_lock.is_populated()) return HA_ERR_RECORD_DELETED; #ifdef ONLY_SHOW_CURRENT_WAITS if (m_pos.m_index_2 >= pfs_thread->m_wait_locker_count) return HA_ERR_RECORD_DELETED; #endif DBUG_ASSERT(m_pos.m_index_2 < LOCKER_STACK_SIZE); wait= &pfs_thread->m_wait_locker_stack[m_pos.m_index_2].m_waits_current; if (wait->m_wait_class == NO_WAIT_CLASS) return HA_ERR_RECORD_DELETED; make_row(true, pfs_thread, wait); return 0; } int table_events_waits_current::delete_all_rows(void) { reset_events_waits_current(); return 0; } PFS_engine_table* table_events_waits_history::create(void) { return new table_events_waits_history(); } table_events_waits_history::table_events_waits_history() : table_events_waits_common(&m_share, &m_pos), m_pos(), m_next_pos() {} void table_events_waits_history::reset_position(void) { m_pos.reset(); m_next_pos.reset(); } int table_events_waits_history::rnd_next(void) { PFS_thread *pfs_thread; PFS_events_waits *wait; if (events_waits_history_per_thread == 0) return HA_ERR_END_OF_FILE; for (m_pos.set_at(&m_next_pos); m_pos.m_index_1 < thread_max; m_pos.next_thread()) { pfs_thread= &thread_array[m_pos.m_index_1]; if (! pfs_thread->m_lock.is_populated()) { /* This thread does not exist */ continue; } if (m_pos.m_index_2 >= events_waits_history_per_thread) { /* This thread does not have more (full) history */ continue; } if ( ! pfs_thread->m_waits_history_full && (m_pos.m_index_2 >= pfs_thread->m_waits_history_index)) { /* This thread does not have more (not full) history */ continue; } if (pfs_thread->m_waits_history[m_pos.m_index_2].m_wait_class == NO_WAIT_CLASS) { /* This locker does not exist. There can not be more lockers in the stack, skip to the next thread */ continue; } wait= &pfs_thread->m_waits_history[m_pos.m_index_2]; make_row(true, pfs_thread, wait); /* Next iteration, look for the next history in this thread */ m_next_pos.set_after(&m_pos); return 0; } return HA_ERR_END_OF_FILE; } int table_events_waits_history::rnd_pos(const void *pos) { PFS_thread *pfs_thread; PFS_events_waits *wait; DBUG_ASSERT(events_waits_history_per_thread != 0); set_position(pos); DBUG_ASSERT(m_pos.m_index_1 < thread_max); pfs_thread= &thread_array[m_pos.m_index_1]; if (! pfs_thread->m_lock.is_populated()) return HA_ERR_RECORD_DELETED; DBUG_ASSERT(m_pos.m_index_2 < events_waits_history_per_thread); if ( ! pfs_thread->m_waits_history_full && (m_pos.m_index_2 >= pfs_thread->m_waits_history_index)) return HA_ERR_RECORD_DELETED; if (pfs_thread->m_waits_history[m_pos.m_index_2].m_wait_class == NO_WAIT_CLASS) return HA_ERR_RECORD_DELETED; wait= &pfs_thread->m_waits_history[m_pos.m_index_2]; make_row(true, pfs_thread, wait); return 0; } int table_events_waits_history::delete_all_rows(void) { reset_events_waits_history(); return 0; } PFS_engine_table* table_events_waits_history_long::create(void) { return new table_events_waits_history_long(); } table_events_waits_history_long::table_events_waits_history_long() : table_events_waits_common(&m_share, &m_pos), m_pos(0), m_next_pos(0) {} void table_events_waits_history_long::reset_position(void) { m_pos.m_index= 0; m_next_pos.m_index= 0; } int table_events_waits_history_long::rnd_next(void) { PFS_events_waits *wait; uint limit; if (events_waits_history_long_size == 0) return HA_ERR_END_OF_FILE; if (events_waits_history_long_full) limit= events_waits_history_long_size; else limit= events_waits_history_long_index % events_waits_history_long_size; for (m_pos.set_at(&m_next_pos); m_pos.m_index < limit; m_pos.next()) { wait= &events_waits_history_long_array[m_pos.m_index]; if (wait->m_wait_class != NO_WAIT_CLASS) { make_row(false, wait->m_thread, wait); /* Next iteration, look for the next entry */ m_next_pos.set_after(&m_pos); return 0; } } return HA_ERR_END_OF_FILE; } int table_events_waits_history_long::rnd_pos(const void *pos) { PFS_events_waits *wait; uint limit; if (events_waits_history_long_size == 0) return HA_ERR_RECORD_DELETED; set_position(pos); if (events_waits_history_long_full) limit= events_waits_history_long_size; else limit= events_waits_history_long_index % events_waits_history_long_size; if (m_pos.m_index >= limit) return HA_ERR_RECORD_DELETED; wait= &events_waits_history_long_array[m_pos.m_index]; if (wait->m_wait_class == NO_WAIT_CLASS) return HA_ERR_RECORD_DELETED; make_row(false, wait->m_thread, wait); return 0; } int table_events_waits_history_long::delete_all_rows(void) { reset_events_waits_history_long(); return 0; }