row0purge.c 14.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/******************************************************
Purge obsolete records

(c) 1997 Innobase Oy

Created 3/14/1997 Heikki Tuuri
*******************************************************/

#include "row0purge.h"

#ifdef UNIV_NONINL
#include "row0purge.ic"
#endif

#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "trx0undo.h"
#include "trx0purge.h"
#include "trx0rec.h"
#include "que0que.h"
#include "row0row.h"
#include "row0upd.h"
#include "row0vers.h"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
27
#include "row0mysql.h"
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
#include "log0log.h"

/************************************************************************
Creates a purge node to a query graph. */

purge_node_t*
row_purge_node_create(
/*==================*/
				/* out, own: purge node */
	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
	mem_heap_t*	heap)	/* in: memory heap where created */
{
	purge_node_t*	node;

	ut_ad(parent && heap);

	node = mem_heap_alloc(heap, sizeof(purge_node_t));

	node->common.type = QUE_NODE_PURGE;
	node->common.parent = parent;

	node->heap = mem_heap_create(256);

	return(node);
}

/***************************************************************
Repositions the pcur in the purge node on the clustered index record,
if found. */
static
ibool
row_purge_reposition_pcur(
/*======================*/
				/* out: TRUE if the record was found */
	ulint		mode,	/* in: latching mode */
	purge_node_t*	node,	/* in: row purge node */
	mtr_t*		mtr)	/* in: mtr */
{
	ibool	found;

	if (node->found_clust) {
		found = btr_pcur_restore_position(mode, &(node->pcur), mtr);

		return(found);
	}

	found = row_search_on_row_ref(&(node->pcur), mode, node->table,
							node->ref, mtr);
	node->found_clust = found;

	if (found) {
		btr_pcur_store_position(&(node->pcur), mtr);
	}

	return(found);
}

/***************************************************************
Removes a delete marked clustered index record if possible. */
static
ibool
row_purge_remove_clust_if_poss_low(
/*===============================*/
				/* out: TRUE if success, or if not found, or
				if modified after the delete marking */
	purge_node_t*	node,	/* in: row purge node */
	ulint		mode)	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
	dict_index_t*	index;
	btr_pcur_t*	pcur;
	btr_cur_t*	btr_cur;
	ibool		success;
	ulint		err;
	mtr_t		mtr;

	index = dict_table_get_first_index(node->table);
	
	pcur = &(node->pcur);
	btr_cur = btr_pcur_get_btr_cur(pcur);

	mtr_start(&mtr);

	success = row_purge_reposition_pcur(mode, node, &mtr);

	if (!success) {
		/* The record is already removed */

		btr_pcur_commit_specify_mtr(pcur, &mtr);

		return(TRUE);
	}

	if (0 != ut_dulint_cmp(node->roll_ptr,
		row_get_rec_roll_ptr(btr_pcur_get_rec(pcur), index))) {
		
		/* Someone else has modified the record later: do not remove */
		btr_pcur_commit_specify_mtr(pcur, &mtr);

		return(TRUE);
	}

	if (mode == BTR_MODIFY_LEAF) {
		success = btr_cur_optimistic_delete(btr_cur, &mtr);
	} else {
		ut_ad(mode == BTR_MODIFY_TREE);
133
		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
134 135 136 137 138 139

		if (err == DB_SUCCESS) {
			success = TRUE;
		} else if (err == DB_OUT_OF_FILE_SPACE) {
			success = FALSE;
		} else {
140
			ut_error;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
		}
	}

	btr_pcur_commit_specify_mtr(pcur, &mtr);

	return(success);
}
		
/***************************************************************
Removes a clustered index record if it has not been modified after the delete
marking. */
static
void
row_purge_remove_clust_if_poss(
/*===========================*/
156
	purge_node_t*	node)	/* in: row purge node */
157 158 159 160
{
	ibool	success;
	ulint	n_tries	= 0;
	
161
/*	fputs("Purge: Removing clustered record\n", stderr); */
162

163
	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
164 165 166 167 168
	if (success) {

		return;
	}
retry:
169
	success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
	/* The delete operation may fail if we have little
	file space left: TODO: easiest to crash the database
	and restart with more file space */

	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
		n_tries++;

		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
			
		goto retry;
	}

	ut_a(success);
}
 						
/***************************************************************
Removes a secondary index entry if possible. */
static
ibool
row_purge_remove_sec_if_poss_low(
/*=============================*/
				/* out: TRUE if success or if not found */
	purge_node_t*	node,	/* in: row purge node */
	dict_index_t*	index,	/* in: index */
	dtuple_t*	entry,	/* in: index entry */
	ulint		mode)	/* in: latch mode BTR_MODIFY_LEAF or
				BTR_MODIFY_TREE */	
{
	btr_pcur_t	pcur;
	btr_cur_t*	btr_cur;
	ibool		success;
201
	ibool		old_has = 0; /* remove warning */
202 203 204
	ibool		found;
	ulint		err;
	mtr_t		mtr;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
205
	mtr_t*		mtr_vers;
206 207 208 209 210 211 212 213 214
	
	log_free_check();
	mtr_start(&mtr);
	
	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);

	if (!found) {
		/* Not found */

215
		/* fputs("PURGE:........sec entry not found\n", stderr); */
216 217 218 219 220 221 222 223 224 225 226 227 228 229
		/* dtuple_print(entry); */

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);

		return(TRUE);
	}

	btr_cur = btr_pcur_get_btr_cur(&pcur);
	
	/* We should remove the index record if no later version of the row,
	which cannot be purged yet, requires its existence. If some requires,
	we should do nothing. */

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
230 231 232
	mtr_vers = mem_alloc(sizeof(mtr_t));
	
	mtr_start(mtr_vers);
233

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
234
	success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
235 236 237 238

	if (success) {		
		old_has = row_vers_old_has_index_entry(TRUE,
					btr_pcur_get_rec(&(node->pcur)),
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
239
					mtr_vers, index, entry);
240 241
	}

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
242 243 244
	btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);

	mem_free(mtr_vers);
245 246 247 248 249 250 251 252
	
	if (!success || !old_has) {
		/* Remove the index record */

		if (mode == BTR_MODIFY_LEAF) {		
			success = btr_cur_optimistic_delete(btr_cur, &mtr);
		} else {
			ut_ad(mode == BTR_MODIFY_TREE);
253 254
			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
							FALSE, &mtr);
255 256 257 258 259
			if (err == DB_SUCCESS) {
				success = TRUE;
			} else if (err == DB_OUT_OF_FILE_SPACE) {
				success = FALSE;
			} else {
260
				ut_error;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
			}
		}
	}

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(success);
}

/***************************************************************
Removes a secondary index entry if possible. */
UNIV_INLINE
void
row_purge_remove_sec_if_poss(
/*=========================*/
	purge_node_t*	node,	/* in: row purge node */
	dict_index_t*	index,	/* in: index */
	dtuple_t*	entry)	/* in: index entry */
{
	ibool	success;
	ulint	n_tries		= 0;
	
284
/*	fputs("Purge: Removing secondary record\n", stderr); */
285

286
	success = row_purge_remove_sec_if_poss_low(node, index, entry,
287 288 289 290 291 292
							BTR_MODIFY_LEAF);
	if (success) {

		return;
	}
retry:
293
	success = row_purge_remove_sec_if_poss_low(node, index, entry,
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
							BTR_MODIFY_TREE);
	/* The delete operation may fail if we have little
	file space left: TODO: easiest to crash the database
	and restart with more file space */

	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
				
		n_tries++;

		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
							
		goto retry;
	}

	ut_a(success);
}

/***************************************************************
Purges a delete marking of a record. */
static
void
row_purge_del_mark(
/*===============*/
317
	purge_node_t*	node)	/* in: row purge node */
318 319 320 321 322
{
	mem_heap_t*	heap;
	dtuple_t*	entry;
	dict_index_t*	index;
	
323
	ut_ad(node);
324 325 326 327 328 329 330 331 332

	heap = mem_heap_create(1024);

	while (node->index != NULL) {
		index = node->index;

		/* Build the index entry */
		entry = row_build_index_entry(node->row, index, heap);

333
		row_purge_remove_sec_if_poss(node, index, entry);
334 335 336 337 338 339

		node->index = dict_table_get_next_index(node->index);
	}

	mem_heap_free(heap);	

340
	row_purge_remove_clust_if_poss(node);
341 342 343
}
	
/***************************************************************
344 345
Purges an update of an existing record. Also purges an update of a delete
marked record if that record contained an externally stored field. */
346 347
static
void
348 349
row_purge_upd_exist_or_extern(
/*==========================*/
350
	purge_node_t*	node)	/* in: row purge node */
351 352 353 354
{
	mem_heap_t*	heap;
	dtuple_t*	entry;
	dict_index_t*	index;
355 356 357 358 359 360 361 362 363 364
	upd_field_t*	ufield;
	ibool		is_insert;
	ulint		rseg_id;
	ulint		page_no;
	ulint		offset;
	ulint		internal_offset;
	byte*		data_field;
	ulint		data_field_len;
	ulint		i;
	mtr_t		mtr;
365
	
366
	ut_ad(node);
367

368 369 370 371 372
	if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {

		goto skip_secondaries;
	}

373 374 375 376 377
	heap = mem_heap_create(1024);

	while (node->index != NULL) {
		index = node->index;

378
		if (row_upd_changes_ord_field_binary(NULL, node->index,
379 380 381 382
							node->update)) {
			/* Build the older version of the index entry */
			entry = row_build_index_entry(node->row, index, heap);

383
			row_purge_remove_sec_if_poss(node, index, entry);
384 385 386 387 388 389
		}

		node->index = dict_table_get_next_index(node->index);
	}

	mem_heap_free(heap);	
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419

skip_secondaries:
	/* Free possible externally stored fields */
	for (i = 0; i < upd_get_n_fields(node->update); i++) {

		ufield = upd_get_nth_field(node->update, i);

		if (ufield->extern_storage) {
			/* We use the fact that new_val points to
			node->undo_rec and get thus the offset of
			dfield data inside the unod record. Then we
			can calculate from node->roll_ptr the file
			address of the new_val data */

			internal_offset = ((byte*)ufield->new_val.data)
						- node->undo_rec;
						
			ut_a(internal_offset < UNIV_PAGE_SIZE);

			trx_undo_decode_roll_ptr(node->roll_ptr,
						&is_insert, &rseg_id,
						&page_no, &offset);
			mtr_start(&mtr);

			/* We have to acquire an X-latch to the clustered
			index tree */

			index = dict_table_get_first_index(node->table);

			mtr_x_lock(dict_tree_get_lock(index->tree), &mtr);
420 421 422 423 424 425 426 427 428

			/* NOTE: we must also acquire an X-latch to the
			root page of the tree. We will need it when we
			free pages from the tree. If the tree is of height 1,
			the tree X-latch does NOT protect the root page,
			because it is also a leaf page. Since we will have a
			latch on an undo log page, we would break the
			latching order if we would only later latch the
			root page of such a tree! */
429
			
430 431
			btr_root_get(index->tree, &mtr);

432 433 434 435 436 437
			/* We assume in purge of externally stored fields
			that the space id of the undo log record is 0! */

			data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
				     + offset + internal_offset;

438
#ifdef UNIV_SYNC_DEBUG
439 440
			buf_page_dbg_add_level(buf_frame_align(data_field),
						SYNC_TRX_UNDO_PAGE);
441
#endif /* UNIV_SYNC_DEBUG */
442 443 444 445
				     
			data_field_len = ufield->new_val.len;

			btr_free_externally_stored_field(index, data_field,
446
						data_field_len, FALSE, &mtr);
447 448 449
			mtr_commit(&mtr);
		}
	}
450 451 452 453 454 455 456 457
}

/***************************************************************
Parses the row reference and other info in a modify undo log record. */
static
ibool
row_purge_parse_undo_rec(
/*=====================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
458
				/* out: TRUE if purge operation required:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
459 460
				NOTE that then the CALLER must unfreeze
				data dictionary! */
461
	purge_node_t*	node,	/* in: row undo node */
462 463 464
	ibool*		updated_extern,
				/* out: TRUE if an externally stored field
				was updated */
465 466 467 468
	que_thr_t*	thr)	/* in: query thread */
{
	dict_index_t*	clust_index;
	byte*		ptr;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
469
	trx_t*		trx;
470 471 472 473 474 475 476 477 478
	dulint		undo_no;
	dulint		table_id;
	dulint		trx_id;
	dulint		roll_ptr;
	ulint		info_bits;
	ulint		type;
	ulint		cmpl_info;
	
	ut_ad(node && thr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
479 480

	trx = thr_get_trx(thr);
481 482
	
	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
483
					updated_extern, &undo_no, &table_id);
484 485
	node->rec_type = type;

486
	if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
487 488 489 490 491 492 493 494 495

		return(FALSE);
	}	    		

	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
								&info_bits);
	node->table = NULL;

	if (type == TRX_UNDO_UPD_EXIST_REC
496
	    && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
497 498 499 500 501 502

	    	/* Purge requires no changes to indexes: we may return */

	    	return(FALSE);
	}
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
503 504 505
	/* Prevent DROP TABLE etc. from running when we are doing the purge
	for this row */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
506
	row_mysql_freeze_data_dictionary(trx);
507

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
508
	mutex_enter(&(dict_sys->mutex));
509

510
	node->table = dict_table_get_on_id_low(table_id, trx);
511
	
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
512 513
	mutex_exit(&(dict_sys->mutex));

514 515 516
	if (node->table == NULL) {
		/* The table has been dropped: no need to do purge */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
517
		row_mysql_unfreeze_data_dictionary(trx);
518 519 520 521

		return(FALSE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
522 523 524 525 526
	if (node->table->ibd_file_missing) {
		/* We skip purge of missing .ibd files */

		node->table = NULL;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
527 528
		row_mysql_unfreeze_data_dictionary(trx);

529
		return(FALSE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
530 531
	}

532 533
	clust_index = dict_table_get_first_index(node->table);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
534 535 536
	if (clust_index == NULL) {
		/* The table was corrupt in the data dictionary */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
537
		row_mysql_unfreeze_data_dictionary(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
538 539 540 541

		return(FALSE);
	}

542 543 544 545
	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
								node->heap);

	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
546 547
					roll_ptr, info_bits, trx,
					node->heap, &(node->update));
548 549 550

	/* Read to the partial row the fields that occur in indexes */

551 552 553 554 555
	if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
		ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
						&(node->row), node->heap);
	}
	
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
	return(TRUE);
}

/***************************************************************
Fetches an undo log record and does the purge for the recorded operation.
If none left, or the current purge completed, returns the control to the
parent node, which is always a query thread node. */
static
ulint
row_purge(
/*======*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code */
	purge_node_t*	node,	/* in: row purge node */
	que_thr_t*	thr)	/* in: query thread */
{
	dulint	roll_ptr;
	ibool	purge_needed;
574
	ibool	updated_extern;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
575
	trx_t*	trx;
576 577 578
	
	ut_ad(node && thr);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
579 580
	trx = thr_get_trx(thr);
	
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
	node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
						&(node->reservation),
						node->heap);
	if (!node->undo_rec) {
		/* Purge completed for this query thread */

		thr->run_node = que_node_get_parent(node);

		return(DB_SUCCESS);
	}

	node->roll_ptr = roll_ptr;

	if (node->undo_rec == &trx_purge_dummy_rec) {
		purge_needed = FALSE;
	} else {
597 598
		purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
									thr);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
599 600
		/* If purge_needed == TRUE, we must also remember to unfreeze
		data dictionary! */
601 602 603 604 605 606 607 608
	}

	if (purge_needed) {
		node->found_clust = FALSE;
	
		node->index = dict_table_get_next_index(
				dict_table_get_first_index(node->table));

609
		if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
610
			row_purge_del_mark(node);
611 612 613 614

		} else if (updated_extern
			    || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {

615
			row_purge_upd_exist_or_extern(node);
616 617 618 619 620 621
		}

		if (node->found_clust) {
			btr_pcur_close(&(node->pcur));
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
622
		row_mysql_unfreeze_data_dictionary(trx);
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
	}

	/* Do some cleanup */
	trx_purge_rec_release(node->reservation);
	mem_heap_empty(node->heap);
	
	thr->run_node = node;

	return(DB_SUCCESS);
}

/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph. */

que_thr_t*
row_purge_step(
/*===========*/
				/* out: query thread to run next or NULL */
	que_thr_t*	thr)	/* in: query thread */
{
	purge_node_t*	node;
	ulint		err;

	ut_ad(thr);
	
	node = thr->run_node;

	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);

	err = row_purge(node, thr);

	ut_ad(err == DB_SUCCESS);

	return(thr);
}