diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index e500b92252fc8748c144e1b00d1d7c0873d97dfc..883c5464319365d4938030e2d16669d790d91e3b 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -332,6 +332,9 @@ dict_boot(void) #endif #if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2 #error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2" +#endif +#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2 +#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2" #endif table->id = DICT_INDEXES_ID; diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index 747a99ebdc976790b646303063f149e5ace0aa76..e744ffda7a666ca420084d851e32186074c76100 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -706,6 +706,101 @@ dict_drop_index_tree( DICT_SYS_INDEXES_PAGE_NO_FIELD, FIL_NULL, mtr); } +/*********************************************************************** +Truncates the index tree associated with a row in SYS_INDEXES table. */ + +void +dict_truncate_index_tree( +/*=====================*/ + dict_table_t* table, /* in: the table the index belongs to */ + rec_t* rec, /* in: record in the clustered index of + SYS_INDEXES table */ + mtr_t* mtr) /* in: mtr having the latch + on the record page */ +{ + ulint root_page_no; + ulint space; + ulint type; + dulint index_id; + byte* ptr; + ulint len; + ibool comp; + dict_index_t* index; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&(dict_sys->mutex))); +#endif /* UNIV_SYNC_DEBUG */ + + ut_a(!dict_sys->sys_indexes->comp); + ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); + + ut_ad(len == 4); + + root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + + if (root_page_no == FIL_NULL) { + /* The tree has been freed. */ + + return; + } + + ptr = rec_get_nth_field_old(rec, + DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); + + ut_ad(len == 4); + + space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + + if (!fil_tablespace_exists_in_mem(space)) { + /* It is a single table tablespace and the .ibd file is + missing: do nothing */ + + return; + } + + ptr = rec_get_nth_field_old(rec, + DICT_SYS_INDEXES_TYPE_FIELD, &len); + ut_ad(len == 4); + type = mach_read_from_4(ptr); + + ptr = rec_get_nth_field_old(rec, 1, &len); + ut_ad(len == 8); + index_id = mach_read_from_8(ptr); + + /* We free all the pages but the root page first; this operation + may span several mini-transactions */ + + btr_free_but_not_root(space, root_page_no); + + /* Then we free the root page in the same mini-transaction where + we create the b-tree and write its new root page number to the + appropriate field in the SYS_INDEXES record: this mini-transaction + marks the B-tree totally truncated */ + + comp = page_is_comp(btr_page_get( + space, root_page_no, RW_X_LATCH, mtr)); + + btr_free_root(space, root_page_no, mtr); + + /* Find the index corresponding to this SYS_INDEXES record. */ + for (index = UT_LIST_GET_FIRST(table->indexes); + index; + index = UT_LIST_GET_NEXT(indexes, index)) { + if (!ut_dulint_cmp(index->id, index_id)) { + break; + } + } + + root_page_no = btr_create(type, space, index_id, comp, mtr); + if (index) { + index->page_no = root_page_no; + } + + page_rec_write_index_page_no(rec, + DICT_SYS_INDEXES_PAGE_NO_FIELD, + root_page_no, mtr); +} + /************************************************************************* Creates a table create graph. */ diff --git a/innobase/include/dict0boot.h b/innobase/include/dict0boot.h index 35eff5af29a05488db01fc3d7e81469c6c7f27ad..86702cbca05ec61474bddda06b282e423d419431 100644 --- a/innobase/include/dict0boot.h +++ b/innobase/include/dict0boot.h @@ -119,6 +119,7 @@ dict_create(void); clustered index */ #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 +#define DICT_SYS_INDEXES_TYPE_FIELD 6 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h index 8b6944fc60576f2023173428a569a9631eb5e64f..d718e92eb131672bbe937c90c8c8e045408bc44b 100644 --- a/innobase/include/dict0crea.h +++ b/innobase/include/dict0crea.h @@ -54,6 +54,17 @@ dict_create_index_step( /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ /*********************************************************************** +Truncates the index tree associated with a row in SYS_INDEXES table. */ + +void +dict_truncate_index_tree( +/*=====================*/ + dict_table_t* table, /* in: the table the index belongs to */ + rec_t* rec, /* in: record in the clustered index of + SYS_INDEXES table */ + mtr_t* mtr); /* in: mtr having the latch + on the record page */ +/*********************************************************************** Drops the index tree associated with a row in SYS_INDEXES table. */ void diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index 575d8ee67bb41b52d100dfa025e8ec3bf8d42ac1..3a4c255911d252e1dabbf1cc90d1e288e1a82c2a 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -363,6 +363,15 @@ row_get_background_drop_list_len_low(void); /*======================================*/ /* out: how many tables in list */ /************************************************************************* +Truncates a table for MySQL. */ + +int +row_truncate_table_for_mysql( +/*=========================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table handle */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* Drops a table for MySQL. If the name of the dropped table ends to characters INNODB_MONITOR, then this also stops printing of monitor output by the master thread. */ diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 380fcf236ed0d7e4262a06373494845683a6c81c..a9b9d096e6b2d35448985db908a9e50ee99fbf09 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -2421,6 +2421,286 @@ row_import_tablespace_for_mysql( return((int) err); } +/************************************************************************* +Truncates a table for MySQL. */ + +int +row_truncate_table_for_mysql( +/*=========================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table handle */ + trx_t* trx) /* in: transaction handle */ +{ + dict_foreign_t* foreign; + ulint err; + ibool locked_dictionary = FALSE; + mem_heap_t* heap; + byte* buf; + dtuple_t* tuple; + dfield_t* dfield; + dict_index_t* sys_index; + btr_pcur_t pcur; + mtr_t mtr; + dulint new_id; + char* sql; + que_thr_t* thr; + que_t* graph = NULL; + +/* How do we prevent crashes caused by ongoing operations on the table? Old +operations could try to access non-existent pages. + +1) SQL queries, INSERT, SELECT, ...: we must get an exclusive MySQL table lock +on the table before we can do TRUNCATE TABLE. Then there are no running +queries on the table. +2) Purge and rollback: we assign a new table id for the table. Since purge and +rollback look for the table based on the table id, they see the table as +'dropped' and discard their operations. +3) Insert buffer: we remove all entries for the table in the insert +buffer tree; ... TODO +4) Linear readahead and random readahead: we use the same method as in 3) to +discard ongoing operations. +5) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0, we +do not allow the discard. We also reserve the data dictionary latch. */ + + static const char renumber_tablespace_proc[] = + "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n" + "old_id CHAR;\n" + "new_id CHAR;\n" + "old_id_low INT;\n" + "old_id_high INT;\n" + "new_id_low INT;\n" + "new_id_high INT;\n" + "BEGIN\n" + "old_id_high := %lu;\n" + "old_id_low := %lu;\n" + "new_id_high := %lu;\n" + "new_id_low := %lu;\n" + "old_id := CONCAT(TO_BINARY(old_id_high, 4), TO_BINARY(old_id_low, 4));\n" + "new_id := CONCAT(TO_BINARY(new_id_high, 4), TO_BINARY(new_id_low, 4));\n" + "UPDATE SYS_TABLES SET ID = new_id\n" + "WHERE ID = old_id;\n" + "UPDATE SYS_COLUMNS SET TABLE_ID = new_id\n" + "WHERE TABLE_ID = old_id;\n" + "UPDATE SYS_INDEXES SET TABLE_ID = new_id\n" + "WHERE TABLE_ID = old_id;\n" + "COMMIT WORK;\n" + "END;\n"; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + ut_ad(table); + + if (srv_created_new_raw) { + fputs( + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n", + stderr); + + return(DB_ERROR); + } + + trx->op_info = "truncating table"; + + trx_start_if_not_started(trx); + + /* Serialize data dictionary operations with dictionary mutex: + no deadlocks can occur then in these operations */ + + if (trx->dict_operation_lock_mode != RW_X_LATCH) { + /* Prevent foreign key checks etc. while we are truncating the + table */ + + row_mysql_lock_data_dictionary(trx); + + locked_dictionary = TRUE; + } + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + /* Check if the table is referenced by foreign key constraints from + some other table (not the table itself) */ + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign && foreign->foreign_table == table) { + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + if (foreign && trx->check_foreigns) { + FILE* ef = dict_foreign_err_file; + + /* We only allow truncating a referenced table if + FOREIGN_KEY_CHECKS is set to 0 */ + + mutex_enter(&dict_foreign_err_mutex); + rewind(ef); + ut_print_timestamp(ef); + + fputs(" Cannot truncate table ", ef); + ut_print_name(ef, trx, table->name); + fputs("\n" + "because it is referenced by ", ef); + ut_print_name(ef, trx, foreign->foreign_table_name); + putc('\n', ef); + mutex_exit(&dict_foreign_err_mutex); + + err = DB_ERROR; + goto funct_exit; + } + + if (table->n_mysql_handles_opened > 1) { + ut_print_timestamp(stderr); +fputs(" InnoDB: Warning: MySQL is trying to truncate table ", stderr); + ut_print_name(stderr, trx, table->name); + fputs("\n" +"InnoDB: though there are still open handles to it.\n", stderr); + err = DB_ERROR; + + goto funct_exit; + } + + /* TODO: could we replace the counter n_foreign_key_checks_running + with lock checks on the table? Acquire here an exclusive lock on the + table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that + they can cope with the table having been truncated here? Foreign key + checks take an IS or IX lock on the table. */ + + if (table->n_foreign_key_checks_running > 0) { + ut_print_timestamp(stderr); + fputs(" InnoDB: You are trying to truncate table ", stderr); + ut_print_name(stderr, trx, table->name); + fputs("\n" +"InnoDB: though there is a foreign key check running on it.\n", + stderr); + err = DB_ERROR; + + goto funct_exit; + } + + /* Remove any locks there are on the table or its records */ + + lock_reset_all_on_table(table); + + trx->dict_operation = TRUE; + trx->table_id = table->id; + + /* scan SYS_INDEXES for all indexes of the table */ + heap = mem_heap_create(800); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, table->id); + + dfield_set_data(dfield, buf, 8); + sys_index = dict_table_get_first_index(dict_sys->sys_indexes); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + for (;;) { + rec_t* rec; + const byte* field; + ulint len; + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* The end of SYS_INDEXES has been reached. */ + break; + } + + rec = btr_pcur_get_rec(&pcur); + + field = rec_get_nth_field_old(rec, 0, &len); + ut_ad(len == 8); + + if (memcmp(buf, field, len) != 0) { + /* End of indexes for the table (TABLE_ID mismatch). */ + break; + } + + if (rec_get_deleted_flag(rec, FALSE)) { + /* The index has been dropped. */ + continue; + } + + dict_truncate_index_tree(table, rec, &mtr); + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + + mem_heap_empty(heap); + sql = mem_heap_alloc(heap, (sizeof renumber_tablespace_proc) + 40); + sprintf(sql, renumber_tablespace_proc, + (ulong) ut_dulint_get_high(table->id), + (ulong) ut_dulint_get_low(table->id), + (ulong) ut_dulint_get_high(new_id), + (ulong) ut_dulint_get_low(new_id)); + + graph = pars_sql(sql); + + ut_a(graph); + + mem_heap_free(heap); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + thr = que_fork_start_command(graph); + ut_a(thr); + + que_run_threads(thr); + + que_graph_free(graph); + + err = trx->error_state; + + if (err != DB_SUCCESS) { + trx->error_state = DB_SUCCESS; + trx_general_rollback_for_mysql(trx, FALSE, NULL); + trx->error_state = DB_SUCCESS; + ut_print_timestamp(stderr); +fputs(" InnoDB: Unable to assign a new identifier to table ", stderr); + ut_print_name(stderr, trx, table->name); + fputs("\n" +"InnoDB: after truncating it. Background processes may corrupt the table!\n", + stderr); + err = DB_ERROR; + } else { + dict_table_change_id_in_cache(table, new_id); + } + + dict_table_autoinc_initialize(table, 0); + dict_update_statistics(table); + + trx_commit_for_mysql(trx); + +funct_exit: + + if (locked_dictionary) { + row_mysql_unlock_data_dictionary(trx); + } + + trx->op_info = ""; + + srv_wake_master_thread(); + + return((int) err); +} + /************************************************************************* Drops a table for MySQL. If the name of the table to be dropped is equal with one of the predefined magic table names, then this also stops printing diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index d49f1c1ad46a20100cb97bb55371b2297e62d9a5..e19071d599d2323198945fcfb32b4b4862b8c034 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -4091,6 +4091,48 @@ ha_innobase::discard_or_import_tablespace( DBUG_RETURN(err); } +/********************************************************************* +Deletes all rows of an InnoDB table. */ + +int +ha_innobase::delete_all_rows(void) +/*==============================*/ + /* out: error number */ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; + int error; + trx_t* trx; + THD* thd = current_thd; + + DBUG_ENTER("ha_innobase::delete_all_rows"); + + if (thd->lex->sql_command != SQLCOM_TRUNCATE) { + fallback: + /* We only handle TRUNCATE TABLE t as a special case. + DELETE FROM t will have to use ha_innobase::delete_row(). */ + DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND); + } + + /* Get the transaction associated with the current thd, or create one + if not yet created */ + + trx = check_trx_exists(thd); + + /* Truncate the table in InnoDB */ + + error = row_truncate_table_for_mysql(prebuilt->table, trx); + if (error == DB_ERROR) { + /* Cannot truncate; resort to ha_innobase::delete_row() */ + goto fallback; + } + + innobase_commit_low(trx); + + error = convert_error_code_to_mysql(error, NULL); + + DBUG_RETURN(error); +} + /********************************************************************* Drops a table from an InnoDB database. Before calling this function, MySQL calls innobase_commit to commit the transaction of the current user. diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 0cb55e02ae3b5f07d5affca37822e4757325a9e4..2154e238fd1b0d952c778850572fb13f5a18393b 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -159,6 +159,7 @@ class ha_innobase: public handler int create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info); + int delete_all_rows(); int delete_table(const char *name); int rename_table(const char* from, const char* to); int check(THD* thd, HA_CHECK_OPT* check_opt);