trx0rec.c 44.8 KB
Newer Older
1 2
/*****************************************************************************

3
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA

*****************************************************************************/

19 20
/**************************************************//**
@file trx/trx0rec.c
osku's avatar
osku committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34
Transaction undo log record

Created 3/26/1996 Heikki Tuuri
*******************************************************/

#include "trx0rec.h"

#ifdef UNIV_NONINL
#include "trx0rec.ic"
#endif

#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0undo.h"
35 36
#include "mtr0log.h"
#ifndef UNIV_HOTBACKUP
osku's avatar
osku committed
37 38
#include "dict0dict.h"
#include "ut0mem.h"
39
#include "row0ext.h"
osku's avatar
osku committed
40 41 42
#include "row0upd.h"
#include "que0que.h"
#include "trx0purge.h"
43
#include "trx0rseg.h"
osku's avatar
osku committed
44 45 46 47
#include "row0row.h"

/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/

48
/**********************************************************************//**
osku's avatar
osku committed
49 50 51 52 53 54
Writes the mtr log entry of the inserted undo log record on the undo log
page. */
UNIV_INLINE
void
trx_undof_page_add_undo_rec_log(
/*============================*/
55 56 57 58
	page_t* undo_page,	/*!< in: undo log page */
	ulint	old_free,	/*!< in: start offset of the inserted entry */
	ulint	new_free,	/*!< in: end offset of the entry */
	mtr_t*	mtr)		/*!< in: mtr */
osku's avatar
osku committed
59 60 61 62 63 64 65 66 67 68 69 70 71
{
	byte*		log_ptr;
	const byte*	log_end;
	ulint		len;

	log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);

	if (log_ptr == NULL) {

		return;
	}

	log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
72 73
	log_ptr = mlog_write_initial_log_record_fast(
		undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
osku's avatar
osku committed
74 75 76 77 78 79 80 81 82 83 84 85
	len = new_free - old_free - 4;

	mach_write_to_2(log_ptr, len);
	log_ptr += 2;

	if (log_ptr + len <= log_end) {
		memcpy(log_ptr, undo_page + old_free + 2, len);
		mlog_close(mtr, log_ptr + len);
	} else {
		mlog_close(mtr, log_ptr);
		mlog_catenate_string(mtr, undo_page + old_free + 2, len);
	}
86
}
87
#endif /* !UNIV_HOTBACKUP */
osku's avatar
osku committed
88

89
/***********************************************************//**
90 91
Parses a redo log record of adding an undo log record.
@return	end of log record or NULL */
92
UNIV_INTERN
osku's avatar
osku committed
93 94 95
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
96 97 98
	byte*	ptr,	/*!< in: buffer */
	byte*	end_ptr,/*!< in: buffer end */
	page_t*	page)	/*!< in: page or NULL */
osku's avatar
osku committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
{
	ulint	len;
	byte*	rec;
	ulint	first_free;

	if (end_ptr < ptr + 2) {

		return(NULL);
	}

	len = mach_read_from_2(ptr);
	ptr += 2;

	if (end_ptr < ptr + len) {

		return(NULL);
	}
116

osku's avatar
osku committed
117 118 119 120
	if (page == NULL) {

		return(ptr + len);
	}
121

osku's avatar
osku committed
122
	first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
123
				      + TRX_UNDO_PAGE_FREE);
osku's avatar
osku committed
124
	rec = page + first_free;
125

osku's avatar
osku committed
126 127 128 129
	mach_write_to_2(rec, first_free + 4 + len);
	mach_write_to_2(rec + 2 + len, first_free);

	mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
130
			first_free + 4 + len);
osku's avatar
osku committed
131 132 133 134
	ut_memcpy(rec + 2, ptr, len);

	return(ptr + len);
}
135

136
#ifndef UNIV_HOTBACKUP
137
/**********************************************************************//**
138 139
Calculates the free space left for extending an undo log record.
@return	bytes left */
osku's avatar
osku committed
140 141 142 143
UNIV_INLINE
ulint
trx_undo_left(
/*==========*/
144 145
	const page_t*	page,	/*!< in: undo log page */
	const byte*	ptr)	/*!< in: pointer to page */
osku's avatar
osku committed
146 147 148 149 150 151 152
{
	/* The '- 10' is a safety margin, in case we have some small
	calculation error below */

	return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
}

153
/**********************************************************************//**
154 155
Set the next and previous pointers in the undo page for the undo record
that was written to ptr. Update the first free value by the number of bytes
156 157
written for this undo record.
@return	offset of the inserted entry on the page if succeeded, 0 if fail */
158 159 160 161
static
ulint
trx_undo_page_set_next_prev_and_add(
/*================================*/
162 163
	page_t*		undo_page,	/*!< in/out: undo log page */
	byte*		ptr,		/*!< in: ptr up to where data has been
164
					written on this undo page. */
165
	mtr_t*		mtr)		/*!< in: mtr */
166
{
167 168
	ulint		first_free;	/*!< offset within undo_page */
	ulint		end_of_rec;	/*!< offset within undo_page */
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
	byte*		ptr_to_first_free;
					/* pointer within undo_page
					that points to the next free
					offset value within undo_page.*/

	ut_ad(ptr > undo_page);
	ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);

	if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {

		return(0);
	}

	ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;

	first_free = mach_read_from_2(ptr_to_first_free);

	/* Write offset of the previous undo log record */
	mach_write_to_2(ptr, first_free);
	ptr += 2;

	end_of_rec = ptr - undo_page;

	/* Write offset of the next undo log record */
	mach_write_to_2(undo_page + first_free, end_of_rec);

	/* Update the offset to first free undo record */
	mach_write_to_2(ptr_to_first_free, end_of_rec);

	/* Write this log entry to the UNDO log */
	trx_undof_page_add_undo_rec_log(undo_page, first_free,
					end_of_rec, mtr);

	return(first_free);
}

205
/**********************************************************************//**
206 207
Reports in the undo log of an insert of a clustered index record.
@return	offset of the inserted entry on the page if succeed, 0 if fail */
osku's avatar
osku committed
208 209 210 211
static
ulint
trx_undo_page_report_insert(
/*========================*/
212 213 214 215
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index */
	const dtuple_t*	clust_entry,	/*!< in: index entry which will be
osku's avatar
osku committed
216
					inserted to the clustered index */
217
	mtr_t*		mtr)		/*!< in: mtr */
osku's avatar
osku committed
218 219 220 221
{
	ulint		first_free;
	byte*		ptr;
	ulint		i;
222

223
	ut_ad(dict_index_is_clust(index));
osku's avatar
osku committed
224
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
225
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
osku's avatar
osku committed
226 227

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
228
				      + TRX_UNDO_PAGE_FREE);
osku's avatar
osku committed
229
	ptr = undo_page + first_free;
230

osku's avatar
osku committed
231 232
	ut_ad(first_free <= UNIV_PAGE_SIZE);

233
	if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
osku's avatar
osku committed
234

235
		/* Not enough space for writing the general parameters */
osku's avatar
osku committed
236 237 238 239 240 241

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;
242 243

	/* Store first some general parameters to the undo log */
244 245 246
	*ptr++ = TRX_UNDO_INSERT_REC;
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
	ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
osku's avatar
osku committed
247 248 249 250 251 252
	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the record
	to be inserted in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

253 254
		const dfield_t*	field	= dtuple_get_nth_field(clust_entry, i);
		ulint		flen	= dfield_get_len(field);
osku's avatar
osku committed
255 256 257 258 259 260

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

261
		ptr += mach_write_compressed(ptr, flen);
osku's avatar
osku committed
262 263 264 265 266 267 268

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

269
			ut_memcpy(ptr, dfield_get_data(field), flen);
osku's avatar
osku committed
270 271 272 273
			ptr += flen;
		}
	}

274 275
	return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
}
osku's avatar
osku committed
276

277
/**********************************************************************//**
278 279
Reads from an undo log record the general parameters.
@return	remaining part of undo log record after reading these values */
280
UNIV_INTERN
osku's avatar
osku committed
281 282 283
byte*
trx_undo_rec_get_pars(
/*==================*/
284 285
	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
	ulint*		type,		/*!< out: undo record type:
osku's avatar
osku committed
286
					TRX_UNDO_INSERT_REC, ... */
287
	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
osku's avatar
osku committed
288
					for update type records */
289
	ibool*		updated_extern,	/*!< out: TRUE if we updated an
osku's avatar
osku committed
290
					externally stored fild */
291 292
	undo_no_t*	undo_no,	/*!< out: undo log record number */
	dulint*		table_id)	/*!< out: table id */
osku's avatar
osku committed
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
{
	byte*		ptr;
	ulint		type_cmpl;

	ptr = undo_rec + 2;

	type_cmpl = mach_read_from_1(ptr);
	ptr++;

	if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
		*updated_extern = TRUE;
		type_cmpl -= TRX_UNDO_UPD_EXTERN;
	} else {
		*updated_extern = FALSE;
	}

	*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
	*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;

312
	*undo_no = mach_dulint_read_much_compressed(ptr);
313
	ptr += mach_dulint_get_much_compressed_size(*undo_no);
osku's avatar
osku committed
314

315
	*table_id = mach_dulint_read_much_compressed(ptr);
316
	ptr += mach_dulint_get_much_compressed_size(*table_id);
osku's avatar
osku committed
317 318 319 320

	return(ptr);
}

321
/**********************************************************************//**
322 323
Reads from an undo log record a stored column value.
@return	remaining part of undo log record after reading these values */
osku's avatar
osku committed
324 325 326 327
static
byte*
trx_undo_rec_get_col_val(
/*=====================*/
328 329 330 331
	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
	byte**	field,	/*!< out: pointer to stored field */
	ulint*	len,	/*!< out: length of the field, or UNIV_SQL_NULL */
	ulint*	orig_len)/*!< out: original length of the locally
332
			stored part of an externally stored column, or 0 */
osku's avatar
osku committed
333
{
334
	*len = mach_read_compressed(ptr);
osku's avatar
osku committed
335 336
	ptr += mach_get_compressed_size(*len);

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
	*orig_len = 0;

	switch (*len) {
	case UNIV_SQL_NULL:
		*field = NULL;
		break;
	case UNIV_EXTERN_STORAGE_FIELD:
		*orig_len = mach_read_compressed(ptr);
		ptr += mach_get_compressed_size(*orig_len);
		*len = mach_read_compressed(ptr);
		ptr += mach_get_compressed_size(*len);
		*field = ptr;
		ptr += *len;

		ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
		ut_ad(*len > *orig_len);
353 354 355 356 357
		/* @see dtuple_convert_big_rec() */
		ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
		/* we do not have access to index->table here
		ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
		      || *len >= REC_MAX_INDEX_COL_LEN
358
		      + BTR_EXTERN_FIELD_REF_SIZE);
359
		*/
360 361 362 363 364

		*len += UNIV_EXTERN_STORAGE_FIELD;
		break;
	default:
		*field = ptr;
osku's avatar
osku committed
365
		if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
366
			ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
osku's avatar
osku committed
367 368 369 370 371 372 373 374
		} else {
			ptr += *len;
		}
	}

	return(ptr);
}

375
/*******************************************************************//**
376 377
Builds a row reference from an undo log record.
@return	pointer to remaining part of undo record */
378
UNIV_INTERN
osku's avatar
osku committed
379 380 381
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
382
	byte*		ptr,	/*!< in: remaining part of a copy of an undo log
osku's avatar
osku committed
383 384 385 386 387
				record, at the start of the row reference;
				NOTE that this copy of the undo log record must
				be preserved as long as the row reference is
				used, as we do NOT copy the data in the
				record! */
388 389 390
	dict_index_t*	index,	/*!< in: clustered index */
	dtuple_t**	ref,	/*!< out, own: row reference */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
osku's avatar
osku committed
391 392 393 394
				needed is allocated */
{
	ulint		ref_len;
	ulint		i;
395

osku's avatar
osku committed
396
	ut_ad(index && ptr && ref && heap);
397
	ut_a(dict_index_is_clust(index));
398

osku's avatar
osku committed
399 400 401 402 403 404 405
	ref_len = dict_index_get_n_unique(index);

	*ref = dtuple_create(heap, ref_len);

	dict_index_copy_types(*ref, index, ref_len);

	for (i = 0; i < ref_len; i++) {
406 407 408 409 410
		dfield_t*	dfield;
		byte*		field;
		ulint		len;
		ulint		orig_len;

411
		dfield = dtuple_get_nth_field(*ref, i);
osku's avatar
osku committed
412

413
		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
osku's avatar
osku committed
414 415 416 417 418

		dfield_set_data(dfield, field, len);
	}

	return(ptr);
419
}
osku's avatar
osku committed
420

421
/*******************************************************************//**
422 423
Skips a row reference from an undo log record.
@return	pointer to remaining part of undo record */
424
UNIV_INTERN
osku's avatar
osku committed
425 426 427
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
428
	byte*		ptr,	/*!< in: remaining part in update undo log
osku's avatar
osku committed
429
				record, at the start of the row reference */
430
	dict_index_t*	index)	/*!< in: clustered index */
osku's avatar
osku committed
431 432 433
{
	ulint	ref_len;
	ulint	i;
434

osku's avatar
osku committed
435
	ut_ad(index && ptr);
436
	ut_a(dict_index_is_clust(index));
437

osku's avatar
osku committed
438 439 440
	ref_len = dict_index_get_n_unique(index);

	for (i = 0; i < ref_len; i++) {
441 442 443 444 445
		byte*	field;
		ulint	len;
		ulint	orig_len;

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
osku's avatar
osku committed
446 447 448
	}

	return(ptr);
449
}
osku's avatar
osku committed
450

451
/**********************************************************************//**
452
Fetch a prefix of an externally stored column, for writing to the undo log
453 454
of an update or delete marking of a clustered index record.
@return	ext_buf */
455 456 457 458
static
byte*
trx_undo_page_fetch_ext(
/*====================*/
459
	byte*		ext_buf,	/*!< in: a buffer of
460 461
					REC_MAX_INDEX_COL_LEN
					+ BTR_EXTERN_FIELD_REF_SIZE */
462
	ulint		zip_size,	/*!< compressed page size in bytes,
463
					or 0 for uncompressed BLOB  */
464 465
	const byte*	field,		/*!< in: an externally stored column */
	ulint*		len)		/*!< in: length of field;
466 467 468 469 470
					out: used length of ext_buf */
{
	/* Fetch the BLOB. */
	ulint	ext_len = btr_copy_externally_stored_field_prefix(
		ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
471
	/* BLOBs should always be nonempty. */
472 473 474 475 476 477 478 479 480
	ut_a(ext_len);
	/* Append the BLOB pointer to the prefix. */
	memcpy(ext_buf + ext_len,
	       field + *len - BTR_EXTERN_FIELD_REF_SIZE,
	       BTR_EXTERN_FIELD_REF_SIZE);
	*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
	return(ext_buf);
}

481
/**********************************************************************//**
482 483
Writes to the undo log a prefix of an externally stored column.
@return	undo log position */
484 485 486 487
static
byte*
trx_undo_page_report_modify_ext(
/*============================*/
488
	byte*		ptr,		/*!< in: undo log position,
489
					at least 15 bytes must be available */
490
	byte*		ext_buf,	/*!< in: a buffer of
491 492 493 494
					REC_MAX_INDEX_COL_LEN
					+ BTR_EXTERN_FIELD_REF_SIZE,
					or NULL when should not fetch
					a longer prefix */
495
	ulint		zip_size,	/*!< compressed page size in bytes,
496
					or 0 for uncompressed BLOB  */
497
	const byte**	field,		/*!< in/out: the locally stored part of
498
					the externally stored column */
499
	ulint*		len)		/*!< in/out: length of field, in bytes */
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
{
	if (ext_buf) {
		/* If an ordering column is externally stored, we will
		have to store a longer prefix of the field.  In this
		case, write to the log a marker followed by the
		original length and the real length of the field. */
		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);

		ptr += mach_write_compressed(ptr, *len);

		*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
						 *field, len);

		ptr += mach_write_compressed(ptr, *len);
	} else {
		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
					     + *len);
	}

	return(ptr);
}

522
/**********************************************************************//**
osku's avatar
osku committed
523
Reports in the undo log of an update or delete marking of a clustered index
524
record.
525 526
@return byte offset of the inserted undo log entry on the page if
succeed, 0 if fail */
osku's avatar
osku committed
527 528 529 530
static
ulint
trx_undo_page_report_modify(
/*========================*/
531 532 533
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index where update or
osku's avatar
osku committed
534
					delete marking is done */
535
	const rec_t*	rec,		/*!< in: clustered index record which
osku's avatar
osku committed
536
					has NOT yet been modified */
537 538
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
	const upd_t*	update,		/*!< in: update vector which tells the
osku's avatar
osku committed
539 540
					columns to be updated; in the case of
					a delete, this should be set to NULL */
541
	ulint		cmpl_info,	/*!< in: compiler info on secondary
osku's avatar
osku committed
542
					index updates */
543
	mtr_t*		mtr)		/*!< in: mtr */
osku's avatar
osku committed
544 545 546 547
{
	dict_table_t*	table;
	ulint		first_free;
	byte*		ptr;
marko's avatar
marko committed
548
	const byte*	field;
osku's avatar
osku committed
549 550 551 552 553
	ulint		flen;
	ulint		col_no;
	ulint		type_cmpl;
	byte*		type_cmpl_ptr;
	ulint		i;
554
	trx_id_t	trx_id;
555
	ibool		ignore_prefix = FALSE;
556 557
	byte		ext_buf[REC_MAX_INDEX_COL_LEN
				+ BTR_EXTERN_FIELD_REF_SIZE];
558

559
	ut_a(dict_index_is_clust(index));
osku's avatar
osku committed
560 561
	ut_ad(rec_offs_validate(rec, index, offsets));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
562
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
osku's avatar
osku committed
563
	table = index->table;
564

osku's avatar
osku committed
565
	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
566
				      + TRX_UNDO_PAGE_FREE);
osku's avatar
osku committed
567
	ptr = undo_page + first_free;
568

osku's avatar
osku committed
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 50) {

		/* NOTE: the value 50 must be big enough so that the general
		fields written below fit on the undo log page */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */

marko's avatar
marko committed
584
	if (!update) {
osku's avatar
osku committed
585
		type_cmpl = TRX_UNDO_DEL_MARK_REC;
marko's avatar
marko committed
586 587
	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
		type_cmpl = TRX_UNDO_UPD_DEL_REC;
588 589 590 591 592
		/* We are about to update a delete marked record.
		We don't typically need the prefix in this case unless
		the delete marking is done by the same transaction
		(which we check below). */
		ignore_prefix = TRUE;
marko's avatar
marko committed
593 594
	} else {
		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
osku's avatar
osku committed
595 596
	}

marko's avatar
marko committed
597
	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
osku's avatar
osku committed
598 599
	type_cmpl_ptr = ptr;

600
	*ptr++ = (byte) type_cmpl;
601
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
osku's avatar
osku committed
602

603
	ptr += mach_dulint_write_much_compressed(ptr, table->id);
osku's avatar
osku committed
604 605 606 607

	/*----------------------------------------*/
	/* Store the state of the info bits */

608
	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
osku's avatar
osku committed
609 610 611

	/* Store the values of the system columns */
	field = rec_get_nth_field(rec, offsets,
612
				  dict_index_get_sys_col_pos(
613 614
					  index, DATA_TRX_ID), &flen);
	ut_ad(flen == DATA_TRX_ID_LEN);
marko's avatar
marko committed
615

616 617 618 619 620 621 622 623 624 625
	trx_id = trx_read_trx_id(field);

	/* If it is an update of a delete marked record, then we are
	allowed to ignore blob prefixes if the delete marking was done
	by some other trx as it must have committed by now for us to
	allow an over-write. */
	if (ignore_prefix) {
		ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
	}
	ptr += mach_dulint_write_compressed(ptr, trx_id);
marko's avatar
marko committed
626

osku's avatar
osku committed
627
	field = rec_get_nth_field(rec, offsets,
628
				  dict_index_get_sys_col_pos(
629 630
					  index, DATA_ROLL_PTR), &flen);
	ut_ad(flen == DATA_ROLL_PTR_LEN);
osku's avatar
osku committed
631

632
	ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
osku's avatar
osku committed
633 634 635 636 637 638 639 640 641

	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the
	record which will be modified in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		field = rec_get_nth_field(rec, offsets, i, &flen);

642 643 644 645 646
		/* The ordering columns must not be stored externally. */
		ut_ad(!rec_offs_nth_extern(offsets, i));
		ut_ad(dict_index_get_nth_col(index, i)->ord_part);

		if (trx_undo_left(undo_page, ptr) < 5) {
osku's avatar
osku committed
647 648 649 650

			return(0);
		}

651
		ptr += mach_write_compressed(ptr, flen);
osku's avatar
osku committed
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, field, flen);
			ptr += flen;
		}
	}

	/*----------------------------------------*/
	/* Save to the undo log the old values of the columns to be updated. */

	if (update) {
668
		if (trx_undo_left(undo_page, ptr) < 5) {
osku's avatar
osku committed
669

670 671
			return(0);
		}
osku's avatar
osku committed
672

673
		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
osku's avatar
osku committed
674

675
		for (i = 0; i < upd_get_n_fields(update); i++) {
osku's avatar
osku committed
676

marko's avatar
marko committed
677
			ulint	pos = upd_get_nth_field(update, i)->field_no;
osku's avatar
osku committed
678

679 680
			/* Write field number to undo log */
			if (trx_undo_left(undo_page, ptr) < 5) {
osku's avatar
osku committed
681

682 683
				return(0);
			}
osku's avatar
osku committed
684

685
			ptr += mach_write_compressed(ptr, pos);
osku's avatar
osku committed
686

687 688
			/* Save the old value of field */
			field = rec_get_nth_field(rec, offsets, pos, &flen);
osku's avatar
osku committed
689

690
			if (trx_undo_left(undo_page, ptr) < 15) {
osku's avatar
osku committed
691

692 693
				return(0);
			}
osku's avatar
osku committed
694

695
			if (rec_offs_nth_extern(offsets, pos)) {
696
				ptr = trx_undo_page_report_modify_ext(
697
					ptr,
698 699
					dict_index_get_nth_col(index, pos)
					->ord_part
700
					&& !ignore_prefix
701 702 703 704
					&& flen < REC_MAX_INDEX_COL_LEN
					? ext_buf : NULL,
					dict_table_zip_size(table),
					&field, &flen);
osku's avatar
osku committed
705

706 707
				/* Notify purge that it eventually has to
				free the old externally stored field */
osku's avatar
osku committed
708

709
				trx->update_undo->del_marks = TRUE;
osku's avatar
osku committed
710

marko's avatar
marko committed
711
				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
712
			} else {
713
				ptr += mach_write_compressed(ptr, flen);
714
			}
osku's avatar
osku committed
715

716 717
			if (flen != UNIV_SQL_NULL) {
				if (trx_undo_left(undo_page, ptr) < flen) {
osku's avatar
osku committed
718

719 720 721 722 723 724
					return(0);
				}

				ut_memcpy(ptr, field, flen);
				ptr += flen;
			}
osku's avatar
osku committed
725
		}
726
	}
osku's avatar
osku committed
727 728 729 730 731 732 733 734

	/*----------------------------------------*/
	/* In the case of a delete marking, and also in the case of an update
	where any ordering field of any index changes, store the values of all
	columns which occur as ordering fields in any index. This info is used
	in the purge of old versions where we use it to build and search the
	delete marked index records, to look if we can remove them from the
	index tree. Note that starting from 4.0.14 also externally stored
735 736 737 738 739 740
	fields can be ordering in some index. Starting from 5.2, we no longer
	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
	but we can construct the column prefix fields in the index by
	fetching the first page of the BLOB that is pointed to by the
	clustered index. This works also in crash recovery, because all pages
	(including BLOBs) are recovered before anything is rolled back. */
osku's avatar
osku committed
741

742
	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
marko's avatar
marko committed
743
		byte*	old_ptr = ptr;
osku's avatar
osku committed
744

745
		trx->update_undo->del_marks = TRUE;
osku's avatar
osku committed
746

747
		if (trx_undo_left(undo_page, ptr) < 5) {
osku's avatar
osku committed
748

749 750
			return(0);
		}
osku's avatar
osku committed
751

752 753
		/* Reserve 2 bytes to write the number of bytes the stored
		fields take in this undo record */
osku's avatar
osku committed
754

755
		ptr += 2;
osku's avatar
osku committed
756

757 758
		for (col_no = 0; col_no < dict_table_get_n_cols(table);
		     col_no++) {
osku's avatar
osku committed
759

760 761
			const dict_col_t*	col
				= dict_table_get_nth_col(table, col_no);
osku's avatar
osku committed
762

763 764
			if (col->ord_part) {
				ulint	pos;
osku's avatar
osku committed
765

766
				/* Write field number to undo log */
767
				if (trx_undo_left(undo_page, ptr) < 5 + 15) {
osku's avatar
osku committed
768 769 770 771

					return(0);
				}

772 773 774
				pos = dict_index_get_nth_col_pos(index,
								 col_no);
				ptr += mach_write_compressed(ptr, pos);
775 776 777

				/* Save the old value of field */
				field = rec_get_nth_field(rec, offsets, pos,
778
							  &flen);
779

780
				if (rec_offs_nth_extern(offsets, pos)) {
781 782 783
					ptr = trx_undo_page_report_modify_ext(
						ptr,
						flen < REC_MAX_INDEX_COL_LEN
784
						&& !ignore_prefix
785
						? ext_buf : NULL,
786
						dict_table_zip_size(table),
787
						&field, &flen);
788 789 790 791
				} else {
					ptr += mach_write_compressed(
						ptr, flen);
				}
792 793 794

				if (flen != UNIV_SQL_NULL) {
					if (trx_undo_left(undo_page, ptr)
795
					    < flen) {
796 797 798 799 800 801 802

						return(0);
					}

					ut_memcpy(ptr, field, flen);
					ptr += flen;
				}
osku's avatar
osku committed
803 804 805
			}
		}

806 807
		mach_write_to_2(old_ptr, ptr - old_ptr);
	}
osku's avatar
osku committed
808 809 810 811 812 813 814

	/*----------------------------------------*/
	/* Write pointers to the previous and the next undo log records */
	if (trx_undo_left(undo_page, ptr) < 2) {

		return(0);
	}
815

osku's avatar
osku committed
816 817 818 819 820
	mach_write_to_2(ptr, first_free);
	ptr += 2;
	mach_write_to_2(undo_page + first_free, ptr - undo_page);

	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
821
			ptr - undo_page);
osku's avatar
osku committed
822 823 824 825

	/* Write to the REDO log about this change in the UNDO log */

	trx_undof_page_add_undo_rec_log(undo_page, first_free,
826
					ptr - undo_page, mtr);
827
	return(first_free);
osku's avatar
osku committed
828 829
}

830
/**********************************************************************//**
osku's avatar
osku committed
831
Reads from an undo log update record the system field values of the old
832 833
version.
@return	remaining part of undo log record after reading these values */
834
UNIV_INTERN
osku's avatar
osku committed
835 836 837
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
838
	byte*		ptr,		/*!< in: remaining part of undo
839 840
					log record after reading
					general parameters */
841 842 843
	trx_id_t*	trx_id,		/*!< out: trx id */
	roll_ptr_t*	roll_ptr,	/*!< out: roll ptr */
	ulint*		info_bits)	/*!< out: info bits state */
osku's avatar
osku committed
844 845 846 847 848 849 850
{
	/* Read the state of the info bits */
	*info_bits = mach_read_from_1(ptr);
	ptr += 1;

	/* Read the values of the system columns */

851
	*trx_id = mach_dulint_read_compressed(ptr);
852
	ptr += mach_dulint_get_compressed_size(*trx_id);
osku's avatar
osku committed
853

854
	*roll_ptr = mach_dulint_read_compressed(ptr);
855
	ptr += mach_dulint_get_compressed_size(*roll_ptr);
osku's avatar
osku committed
856 857 858 859

	return(ptr);
}

860
/**********************************************************************//**
861 862
Reads from an update undo log record the number of updated fields.
@return	remaining part of undo log record after reading this value */
osku's avatar
osku committed
863 864 865 866
UNIV_INLINE
byte*
trx_undo_update_rec_get_n_upd_fields(
/*=================================*/
867 868
	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
	ulint*	n)	/*!< out: number of fields */
osku's avatar
osku committed
869
{
870
	*n = mach_read_compressed(ptr);
osku's avatar
osku committed
871 872 873 874 875
	ptr += mach_get_compressed_size(*n);

	return(ptr);
}

876
/**********************************************************************//**
877 878
Reads from an update undo log record a stored field number.
@return	remaining part of undo log record after reading this value */
osku's avatar
osku committed
879 880 881 882
UNIV_INLINE
byte*
trx_undo_update_rec_get_field_no(
/*=============================*/
883 884
	byte*	ptr,	/*!< in: pointer to remaining part of undo log record */
	ulint*	field_no)/*!< out: field number */
osku's avatar
osku committed
885
{
886
	*field_no = mach_read_compressed(ptr);
osku's avatar
osku committed
887 888 889 890 891
	ptr += mach_get_compressed_size(*field_no);

	return(ptr);
}

892
/*******************************************************************//**
893
Builds an update vector based on a remaining part of an undo log record.
894 895
@return remaining part of the record, NULL if an error detected, which
means that the record is corrupted */
896
UNIV_INTERN
osku's avatar
osku committed
897 898 899
byte*
trx_undo_update_rec_get_update(
/*===========================*/
900
	byte*		ptr,	/*!< in: remaining part in update undo log
osku's avatar
osku committed
901 902 903 904 905
				record, after reading the row reference
				NOTE that this copy of the undo log record must
				be preserved as long as the update vector is
				used, as we do NOT copy the data in the
				record! */
906 907
	dict_index_t*	index,	/*!< in: clustered index */
	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
osku's avatar
osku committed
908 909 910 911
				TRX_UNDO_UPD_DEL_REC, or
				TRX_UNDO_DEL_MARK_REC; in the last case,
				only trx id and roll ptr fields are added to
				the update vector */
912 913 914 915 916
	trx_id_t	trx_id,	/*!< in: transaction id from this undo record */
	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
	ulint		info_bits,/*!< in: info bits from this undo record */
	trx_t*		trx,	/*!< in: transaction */
	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
osku's avatar
osku committed
917
				needed is allocated */
918
	upd_t**		upd)	/*!< out, own: update vector */
osku's avatar
osku committed
919 920 921 922 923 924
{
	upd_field_t*	upd_field;
	upd_t*		update;
	ulint		n_fields;
	byte*		buf;
	ulint		i;
925

926
	ut_a(dict_index_is_clust(index));
osku's avatar
osku committed
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941

	if (type != TRX_UNDO_DEL_MARK_REC) {
		ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
	} else {
		n_fields = 0;
	}

	update = upd_create(n_fields + 2, heap);

	update->info_bits = info_bits;

	/* Store first trx id and roll ptr to update vector */

	upd_field = upd_get_nth_field(update, n_fields);
	buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
942
	trx_write_trx_id(buf, trx_id);
osku's avatar
osku committed
943 944

	upd_field_set_field_no(upd_field,
945 946
			       dict_index_get_sys_col_pos(index, DATA_TRX_ID),
			       index, trx);
osku's avatar
osku committed
947 948 949 950
	dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);

	upd_field = upd_get_nth_field(update, n_fields + 1);
	buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
951
	trx_write_roll_ptr(buf, roll_ptr);
osku's avatar
osku committed
952

953 954 955
	upd_field_set_field_no(
		upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
		index, trx);
osku's avatar
osku committed
956
	dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
957

osku's avatar
osku committed
958 959 960 961
	/* Store then the updated ordinary columns to the update vector */

	for (i = 0; i < n_fields; i++) {

962 963 964 965 966
		byte*	field;
		ulint	len;
		ulint	field_no;
		ulint	orig_len;

osku's avatar
osku committed
967 968 969 970
		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);

		if (field_no >= dict_index_get_n_fields(index)) {
			fprintf(stderr,
971 972 973
				"InnoDB: Error: trying to access"
				" update undo rec field %lu in ",
				(ulong) field_no);
osku's avatar
osku committed
974 975
			dict_index_name_print(stderr, trx, index);
			fprintf(stderr, "\n"
976 977 978 979
				"InnoDB: but index has only %lu fields\n"
				"InnoDB: Submit a detailed bug report"
				" to http://bugs.mysql.com\n"
				"InnoDB: Run also CHECK TABLE ",
osku's avatar
osku committed
980
				(ulong) dict_index_get_n_fields(index));
981
			ut_print_name(stderr, trx, TRUE, index->table_name);
osku's avatar
osku committed
982
			fprintf(stderr, "\n"
983
				"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
osku's avatar
osku committed
984
				(ulong) n_fields, (ulong) i, ptr);
985
			*upd = NULL;
osku's avatar
osku committed
986 987 988 989 990 991 992
			return(NULL);
		}

		upd_field = upd_get_nth_field(update, i);

		upd_field_set_field_no(upd_field, field_no, index, trx);

993
		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
osku's avatar
osku committed
994

995 996 997 998 999
		upd_field->orig_len = orig_len;

		if (len == UNIV_SQL_NULL) {
			dfield_set_null(&upd_field->new_val);
		} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
1000 1001
			dfield_set_data(&upd_field->new_val, field, len);
		} else {
1002 1003
			len -= UNIV_EXTERN_STORAGE_FIELD;

1004
			dfield_set_data(&upd_field->new_val, field, len);
1005
			dfield_set_ext(&upd_field->new_val);
osku's avatar
osku committed
1006 1007 1008 1009 1010 1011 1012
		}
	}

	*upd = update;

	return(ptr);
}
1013

1014
/*******************************************************************//**
osku's avatar
osku committed
1015
Builds a partial row from an update undo log record. It contains the
1016 1017
columns which occur as ordering in any index of the table.
@return	pointer to remaining part of undo record */
1018
UNIV_INTERN
osku's avatar
osku committed
1019 1020 1021
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
1022
	byte*		ptr,	/*!< in: remaining part in update undo log
osku's avatar
osku committed
1023 1024 1025 1026 1027 1028
				record of a suitable type, at the start of
				the stored index columns;
				NOTE that this copy of the undo log record must
				be preserved as long as the partial row is
				used, as we do NOT copy the data in the
				record! */
1029 1030 1031
	dict_index_t*	index,	/*!< in: clustered index */
	dtuple_t**	row,	/*!< out, own: partial row */
	ibool		ignore_prefix, /*!< in: flag to indicate if we
1032 1033
				expect blob prefixes in undo. Used
				only in the assertion. */
1034
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
osku's avatar
osku committed
1035 1036
				needed is allocated */
{
1037
	const byte*	end_ptr;
osku's avatar
osku committed
1038
	ulint		row_len;
1039

1040 1041 1042 1043 1044
	ut_ad(index);
	ut_ad(ptr);
	ut_ad(row);
	ut_ad(heap);
	ut_ad(dict_index_is_clust(index));
1045

osku's avatar
osku committed
1046 1047 1048 1049 1050 1051
	row_len = dict_table_get_n_cols(index->table);

	*row = dtuple_create(heap, row_len);

	dict_table_copy_types(*row, index->table);

1052
	end_ptr = ptr + mach_read_from_2(ptr);
osku's avatar
osku committed
1053
	ptr += 2;
1054

1055
	while (ptr != end_ptr) {
1056 1057 1058 1059 1060 1061
		dfield_t*		dfield;
		byte*			field;
		ulint			field_no;
		const dict_col_t*	col;
		ulint			col_no;
		ulint			len;
1062
		ulint			orig_len;
1063

osku's avatar
osku committed
1064 1065
		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);

1066 1067
		col = dict_index_get_nth_col(index, field_no);
		col_no = dict_col_get_no(col);
1068

1069
		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
osku's avatar
osku committed
1070

1071
		dfield = dtuple_get_nth_field(*row, col_no);
osku's avatar
osku committed
1072 1073

		dfield_set_data(dfield, field, len);
1074

1075 1076
		if (len != UNIV_SQL_NULL
		    && len >= UNIV_EXTERN_STORAGE_FIELD) {
1077 1078
			dfield_set_len(dfield,
				       len - UNIV_EXTERN_STORAGE_FIELD);
1079
			dfield_set_ext(dfield);
1080 1081 1082
			/* If the prefix of this column is indexed,
			ensure that enough prefix is stored in the
			undo log record. */
1083 1084 1085 1086 1087 1088 1089 1090 1091
			if (!ignore_prefix && col->ord_part) {
				ut_a(dfield_get_len(dfield)
				     >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
				ut_a(dict_table_get_format(index->table)
				     >= DICT_TF_FORMAT_ZIP
				     || dfield_get_len(dfield)
				     >= REC_MAX_INDEX_COL_LEN
				     + BTR_EXTERN_FIELD_REF_SIZE);
			}
1092
		}
osku's avatar
osku committed
1093 1094 1095
	}

	return(ptr);
1096
}
1097
#endif /* !UNIV_HOTBACKUP */
osku's avatar
osku committed
1098

1099
/***********************************************************************//**
osku's avatar
osku committed
1100 1101 1102 1103 1104
Erases the unused undo log page end. */
static
void
trx_undo_erase_page_end(
/*====================*/
1105 1106
	page_t*	undo_page,	/*!< in: undo page whose end to erase */
	mtr_t*	mtr)		/*!< in: mtr */
osku's avatar
osku committed
1107 1108 1109 1110
{
	ulint	first_free;

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
1111
				      + TRX_UNDO_PAGE_FREE);
osku's avatar
osku committed
1112
	memset(undo_page + first_free, 0xff,
1113
	       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
osku's avatar
osku committed
1114 1115 1116

	mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
}
1117

1118
/***********************************************************//**
1119 1120
Parses a redo log record of erasing of an undo page end.
@return	end of log record or NULL */
1121
UNIV_INTERN
osku's avatar
osku committed
1122 1123 1124
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
1125 1126 1127 1128
	byte*	ptr,	/*!< in: buffer */
	byte*	end_ptr __attribute__((unused)), /*!< in: buffer end */
	page_t*	page,	/*!< in: page or NULL */
	mtr_t*	mtr)	/*!< in: mtr or NULL */
osku's avatar
osku committed
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
{
	ut_ad(ptr && end_ptr);

	if (page == NULL) {

		return(ptr);
	}

	trx_undo_erase_page_end(page, mtr);

	return(ptr);
}

1142
#ifndef UNIV_HOTBACKUP
1143
/***********************************************************************//**
osku's avatar
osku committed
1144 1145 1146
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
1147 1148
transaction.
@return	DB_SUCCESS or error code */
1149
UNIV_INTERN
osku's avatar
osku committed
1150 1151 1152
ulint
trx_undo_report_row_operation(
/*==========================*/
1153
	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
osku's avatar
osku committed
1154
					set, does nothing */
1155
	ulint		op_type,	/*!< in: TRX_UNDO_INSERT_OP or
osku's avatar
osku committed
1156
					TRX_UNDO_MODIFY_OP */
1157 1158 1159
	que_thr_t*	thr,		/*!< in: query thread */
	dict_index_t*	index,		/*!< in: clustered index */
	const dtuple_t*	clust_entry,	/*!< in: in the case of an insert,
osku's avatar
osku committed
1160 1161
					index entry to insert into the
					clustered index, otherwise NULL */
1162
	const upd_t*	update,		/*!< in: in the case of an update,
osku's avatar
osku committed
1163
					the update vector, otherwise NULL */
1164
	ulint		cmpl_info,	/*!< in: compiler info on secondary
osku's avatar
osku committed
1165
					index updates */
1166
	const rec_t*	rec,		/*!< in: in case of an update or delete
osku's avatar
osku committed
1167 1168
					marking, the record in the clustered
					index, otherwise NULL */
1169
	roll_ptr_t*	roll_ptr)	/*!< out: rollback pointer to the
osku's avatar
osku committed
1170 1171 1172 1173 1174 1175 1176 1177 1178
					inserted undo log record,
					ut_dulint_zero if BTR_NO_UNDO_LOG
					flag was specified */
{
	trx_t*		trx;
	trx_undo_t*	undo;
	ulint		page_no;
	trx_rseg_t*	rseg;
	mtr_t		mtr;
1179
	ulint		err		= DB_SUCCESS;
osku's avatar
osku committed
1180 1181 1182
	mem_heap_t*	heap		= NULL;
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
	ulint*		offsets		= offsets_;
1183
	rec_offs_init(offsets_);
osku's avatar
osku committed
1184

1185
	ut_a(dict_index_is_clust(index));
osku's avatar
osku committed
1186 1187 1188 1189 1190 1191 1192

	if (flags & BTR_NO_UNDO_LOG_FLAG) {

		*roll_ptr = ut_dulint_zero;

		return(DB_SUCCESS);
	}
1193

osku's avatar
osku committed
1194 1195
	ut_ad(thr);
	ut_ad((op_type != TRX_UNDO_INSERT_OP)
1196
	      || (clust_entry && !update && !rec));
1197

osku's avatar
osku committed
1198 1199
	trx = thr_get_trx(thr);
	rseg = trx->rseg;
1200

osku's avatar
osku committed
1201 1202 1203 1204 1205 1206 1207 1208
	mutex_enter(&(trx->undo_mutex));

	/* If the undo log is not assigned yet, assign one */

	if (op_type == TRX_UNDO_INSERT_OP) {

		if (trx->insert_undo == NULL) {

1209
			err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
osku's avatar
osku committed
1210 1211 1212
		}

		undo = trx->insert_undo;
marko's avatar
marko committed
1213 1214

		if (UNIV_UNLIKELY(!undo)) {
1215
			/* Did not succeed */
marko's avatar
marko committed
1216 1217
			mutex_exit(&(trx->undo_mutex));

1218
			return(err);
marko's avatar
marko committed
1219
		}
osku's avatar
osku committed
1220 1221 1222 1223 1224
	} else {
		ut_ad(op_type == TRX_UNDO_MODIFY_OP);

		if (trx->update_undo == NULL) {

1225
			err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
osku's avatar
osku committed
1226 1227 1228 1229 1230

		}

		undo = trx->update_undo;

marko's avatar
marko committed
1231
		if (UNIV_UNLIKELY(!undo)) {
1232
			/* Did not succeed */
marko's avatar
marko committed
1233
			mutex_exit(&(trx->undo_mutex));
1234
			return(err);
marko's avatar
marko committed
1235
		}
osku's avatar
osku committed
1236

marko's avatar
marko committed
1237 1238
		offsets = rec_get_offsets(rec, index, offsets,
					  ULINT_UNDEFINED, &heap);
osku's avatar
osku committed
1239 1240 1241
	}

	page_no = undo->last_page_no;
1242

osku's avatar
osku committed
1243 1244 1245
	mtr_start(&mtr);

	for (;;) {
marko's avatar
marko committed
1246 1247
		buf_block_t*	undo_block;
		page_t*		undo_page;
marko's avatar
marko committed
1248
		ulint		offset;
marko's avatar
marko committed
1249

1250 1251
		undo_block = buf_page_get_gen(undo->space, undo->zip_size,
					      page_no, RW_X_LATCH,
1252 1253 1254
					      undo->guess_block, BUF_GET,
					      __FILE__, __LINE__, &mtr);
		buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
1255

1256
		undo_page = buf_block_get_frame(undo_block);
osku's avatar
osku committed
1257 1258

		if (op_type == TRX_UNDO_INSERT_OP) {
1259 1260
			offset = trx_undo_page_report_insert(
				undo_page, trx, index, clust_entry, &mtr);
osku's avatar
osku committed
1261
		} else {
1262 1263 1264
			offset = trx_undo_page_report_modify(
				undo_page, trx, index, rec, offsets, update,
				cmpl_info, &mtr);
osku's avatar
osku committed
1265 1266
		}

marko's avatar
marko committed
1267
		if (UNIV_UNLIKELY(offset == 0)) {
osku's avatar
osku committed
1268 1269
			/* The record did not fit on the page. We erase the
			end segment of the undo log page and write a log
1270 1271 1272
			record of it: this is to ensure that in the debug
			version the replicate page constructed using the log
			records stays identical to the original page */
osku's avatar
osku committed
1273 1274

			trx_undo_erase_page_end(undo_page, &mtr);
marko's avatar
marko committed
1275 1276 1277
			mtr_commit(&mtr);
		} else {
			/* Success */
1278

marko's avatar
marko committed
1279
			mtr_commit(&mtr);
osku's avatar
osku committed
1280

marko's avatar
marko committed
1281 1282 1283 1284 1285
			undo->empty = FALSE;
			undo->top_page_no = page_no;
			undo->top_offset  = offset;
			undo->top_undo_no = trx->undo_no;
			undo->guess_block = undo_block;
osku's avatar
osku committed
1286

marko's avatar
marko committed
1287 1288 1289
			UT_DULINT_INC(trx->undo_no);

			mutex_exit(&trx->undo_mutex);
marko's avatar
marko committed
1290 1291 1292 1293 1294 1295 1296 1297

			*roll_ptr = trx_undo_build_roll_ptr(
				op_type == TRX_UNDO_INSERT_OP,
				rseg->id, page_no, offset);
			if (UNIV_LIKELY_NULL(heap)) {
				mem_heap_free(heap);
			}
			return(DB_SUCCESS);
osku's avatar
osku committed
1298 1299 1300
		}

		ut_ad(page_no == undo->last_page_no);
1301

osku's avatar
osku committed
1302 1303 1304 1305 1306 1307 1308 1309 1310
		/* We have to extend the undo log by one page */

		mtr_start(&mtr);

		/* When we add a page to an undo log, this is analogous to
		a pessimistic insert in a B-tree, and we must reserve the
		counterpart of the tree latch, which is the rseg mutex. */

		mutex_enter(&(rseg->mutex));
1311

osku's avatar
osku committed
1312 1313 1314
		page_no = trx_undo_add_page(trx, undo, &mtr);

		mutex_exit(&(rseg->mutex));
1315

marko's avatar
marko committed
1316
		if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
osku's avatar
osku committed
1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
			/* Did not succeed: out of space */

			mutex_exit(&(trx->undo_mutex));
			mtr_commit(&mtr);
			if (UNIV_LIKELY_NULL(heap)) {
				mem_heap_free(heap);
			}
			return(DB_OUT_OF_FILE_SPACE);
		}
	}
}

/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/

1331
/******************************************************************//**
osku's avatar
osku committed
1332
Copies an undo record to heap. This function can be called if we know that
1333 1334
the undo log record exists.
@return	own: copy of the record */
1335
UNIV_INTERN
osku's avatar
osku committed
1336 1337 1338
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
1339 1340
	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
	mem_heap_t*	heap)		/*!< in: memory heap where copied */
osku's avatar
osku committed
1341 1342 1343 1344 1345
{
	trx_undo_rec_t*	undo_rec;
	ulint		rseg_id;
	ulint		page_no;
	ulint		offset;
1346
	const page_t*	undo_page;
osku's avatar
osku committed
1347 1348 1349
	trx_rseg_t*	rseg;
	ibool		is_insert;
	mtr_t		mtr;
1350

osku's avatar
osku committed
1351
	trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
1352
				 &offset);
osku's avatar
osku committed
1353 1354 1355
	rseg = trx_rseg_get_on_id(rseg_id);

	mtr_start(&mtr);
1356

1357 1358
	undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
						page_no, &mtr);
1359

osku's avatar
osku committed
1360 1361 1362 1363 1364 1365 1366
	undo_rec = trx_undo_rec_copy(undo_page + offset, heap);

	mtr_commit(&mtr);

	return(undo_rec);
}

1367
/******************************************************************//**
1368
Copies an undo record to heap.
1369 1370 1371 1372 1373 1374

NOTE: the caller must have latches on the clustered index page and
purge_view.

@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
truncated and we cannot fetch the old version */
1375
UNIV_INTERN
osku's avatar
osku committed
1376 1377 1378
ulint
trx_undo_get_undo_rec(
/*==================*/
1379 1380
	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
	trx_id_t	trx_id,		/*!< in: id of the trx that generated
osku's avatar
osku committed
1381 1382
					the roll pointer: it points to an
					undo log of this transaction */
1383 1384
	trx_undo_rec_t** undo_rec,	/*!< out, own: copy of the record */
	mem_heap_t*	heap)		/*!< in: memory heap where copied */
osku's avatar
osku committed
1385 1386 1387 1388 1389 1390 1391
{
#ifdef UNIV_SYNC_DEBUG
	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	if (!trx_purge_update_undo_must_exist(trx_id)) {

1392
		/* It may be that the necessary undo log has already been
osku's avatar
osku committed
1393 1394 1395 1396 1397 1398
		deleted */

		return(DB_MISSING_HISTORY);
	}

	*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
1399

osku's avatar
osku committed
1400 1401 1402
	return(DB_SUCCESS);
}

1403
/*******************************************************************//**
osku's avatar
osku committed
1404 1405 1406
Build a previous version of a clustered index record. This function checks
that the caller has a latch on the index page of the clustered index record
and an s-latch on the purge_view. This guarantees that the stack of versions
1407
is locked all the way down to the purge_view.
1408 1409 1410
@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
earlier than purge_view, which means that it may have been removed,
DB_ERROR if corrupted record */
1411
UNIV_INTERN
osku's avatar
osku committed
1412 1413 1414
ulint
trx_undo_prev_version_build(
/*========================*/
1415
	const rec_t*	index_rec,/*!< in: clustered index record in the
osku's avatar
osku committed
1416 1417
				index tree */
	mtr_t*		index_mtr __attribute__((unused)),
1418
				/*!< in: mtr which contains the latch to
osku's avatar
osku committed
1419
				index_rec page and purge_view */
1420 1421 1422 1423
	const rec_t*	rec,	/*!< in: version of a clustered index record */
	dict_index_t*	index,	/*!< in: clustered index */
	ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
osku's avatar
osku committed
1424
				needed is allocated */
1425
	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
osku's avatar
osku committed
1426
				rec is the first inserted version, or if
1427 1428 1429
				history data has been deleted (an error),
				or if the purge COULD have removed the version
				though it has not yet done so */
osku's avatar
osku committed
1430
{
1431
	trx_undo_rec_t*	undo_rec	= NULL;
osku's avatar
osku committed
1432
	dtuple_t*	entry;
1433
	trx_id_t	rec_trx_id;
osku's avatar
osku committed
1434
	ulint		type;
1435
	undo_no_t	undo_no;
osku's avatar
osku committed
1436
	dulint		table_id;
1437 1438 1439
	trx_id_t	trx_id;
	roll_ptr_t	roll_ptr;
	roll_ptr_t	old_roll_ptr;
osku's avatar
osku committed
1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
	upd_t*		update;
	byte*		ptr;
	ulint		info_bits;
	ulint		cmpl_info;
	ibool		dummy_extern;
	byte*		buf;
	ulint		err;
#ifdef UNIV_SYNC_DEBUG
	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
1450 1451 1452
	ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
	      || mtr_memo_contains_page(index_mtr, index_rec,
					MTR_MEMO_PAGE_X_FIX));
osku's avatar
osku committed
1453 1454
	ut_ad(rec_offs_validate(rec, index, offsets));

1455
	if (!dict_index_is_clust(index)) {
osku's avatar
osku committed
1456 1457 1458 1459 1460 1461 1462
		fprintf(stderr, "InnoDB: Error: trying to access"
			" update undo rec for non-clustered index %s\n"
			"InnoDB: Submit a detailed bug report to"
			" http://bugs.mysql.com\n"
			"InnoDB: index record ", index->name);
		rec_print(stderr, index_rec, index);
		fputs("\n"
1463
		      "InnoDB: record version ", stderr);
osku's avatar
osku committed
1464 1465
		rec_print_new(stderr, rec, offsets);
		putc('\n', stderr);
1466 1467
		return(DB_ERROR);
	}
osku's avatar
osku committed
1468 1469 1470

	roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
	old_roll_ptr = roll_ptr;
1471

osku's avatar
osku committed
1472 1473 1474 1475 1476 1477 1478 1479 1480
	*old_vers = NULL;

	if (trx_undo_roll_ptr_is_insert(roll_ptr)) {

		/* The record rec is the first inserted version */

		return(DB_SUCCESS);
	}

1481 1482
	rec_trx_id = row_get_rec_trx_id(rec, index, offsets);

osku's avatar
osku committed
1483 1484
	err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);

1485 1486 1487
	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
		/* The undo record may already have been purged.
		This should never happen in InnoDB. */
osku's avatar
osku committed
1488 1489 1490 1491 1492

		return(err);
	}

	ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
1493
				    &dummy_extern, &undo_no, &table_id);
osku's avatar
osku committed
1494 1495

	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1496
					       &info_bits);
1497 1498 1499 1500 1501 1502 1503 1504 1505 1506

	/* (a) If a clustered index record version is such that the
	trx id stamp in it is bigger than purge_sys->view, then the
	BLOBs in that version are known to exist (the purge has not
	progressed that far);

	(b) if the version is the first version such that trx id in it
	is less than purge_sys->view, and it is not delete-marked,
	then the BLOBs in that version are known to exist (the purge
	cannot have purged the BLOBs referenced by that version
1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
	yet).

	This function does not fetch any BLOBs.  The callers might, by
	possibly invoking row_ext_create() via row_build().  However,
	they should have all needed information in the *old_vers
	returned by this function.  This is because *old_vers is based
	on the transaction undo log records.  The function
	trx_undo_page_fetch_ext() will write BLOB prefixes to the
	transaction undo log that are at least as long as the longest
	possible column prefix in a secondary index.  Thus, secondary
	index entries for *old_vers can be constructed without
	dereferencing any BLOB pointers. */
1519

osku's avatar
osku committed
1520 1521 1522
	ptr = trx_undo_rec_skip_row_ref(ptr, index);

	ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
1523 1524
					     roll_ptr, info_bits,
					     NULL, heap, &update);
osku's avatar
osku committed
1525 1526 1527 1528 1529

	if (ut_dulint_cmp(table_id, index->table->id) != 0) {
		ptr = NULL;

		fprintf(stderr,
1530 1531 1532 1533 1534 1535 1536
			"InnoDB: Error: trying to access update undo rec"
			" for table %s\n"
			"InnoDB: but the table id in the"
			" undo record is wrong\n"
			"InnoDB: Submit a detailed bug report"
			" to http://bugs.mysql.com\n"
			"InnoDB: Run also CHECK TABLE %s\n",
osku's avatar
osku committed
1537 1538 1539 1540 1541 1542 1543 1544
			index->table_name, index->table_name);
	}

	if (ptr == NULL) {
		/* The record was corrupted, return an error; these printfs
		should catch an elusive bug in row_vers_old_has_index_entry */

		fprintf(stderr,
1545 1546 1547 1548 1549
			"InnoDB: table %s, index %s, n_uniq %lu\n"
			"InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
			"InnoDB: undo rec table id %lu %lu,"
			" index table id %lu %lu\n"
			"InnoDB: dump of 150 bytes in undo rec: ",
osku's avatar
osku committed
1550 1551 1552 1553 1554 1555 1556 1557 1558
			index->table_name, index->name,
			(ulong) dict_index_get_n_unique(index),
			undo_rec, (ulong) type, (ulong) cmpl_info,
			(ulong) ut_dulint_get_high(table_id),
			(ulong) ut_dulint_get_low(table_id),
			(ulong) ut_dulint_get_high(index->table->id),
			(ulong) ut_dulint_get_low(index->table->id));
		ut_print_buf(stderr, undo_rec, 150);
		fputs("\n"
1559
		      "InnoDB: index record ", stderr);
osku's avatar
osku committed
1560 1561
		rec_print(stderr, index_rec, index);
		fputs("\n"
1562
		      "InnoDB: record version ", stderr);
osku's avatar
osku committed
1563 1564
		rec_print_new(stderr, rec, offsets);
		fprintf(stderr, "\n"
vasil's avatar
vasil committed
1565 1566
			"InnoDB: Record trx id " TRX_ID_FMT
			", update rec trx id " TRX_ID_FMT "\n"
1567 1568
			"InnoDB: Roll ptr in rec %lu %lu, in update rec"
			" %lu %lu\n",
vasil's avatar
vasil committed
1569 1570
			TRX_ID_PREP_PRINTF(rec_trx_id),
			TRX_ID_PREP_PRINTF(trx_id),
1571 1572 1573 1574 1575
			(ulong) ut_dulint_get_high(old_roll_ptr),
			(ulong) ut_dulint_get_low(old_roll_ptr),
			(ulong) ut_dulint_get_high(roll_ptr),
			(ulong) ut_dulint_get_low(roll_ptr));

osku's avatar
osku committed
1576 1577 1578 1579 1580
		trx_purge_sys_print();
		return(DB_ERROR);
	}

	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
1581
		ulint	n_ext;
osku's avatar
osku committed
1582 1583 1584

		/* We have to set the appropriate extern storage bits in the
		old version of the record: the extern bits in rec for those
1585
		fields that update does NOT update, as well as the bits for
osku's avatar
osku committed
1586
		those fields that update updates to become externally stored
1587
		fields. Store the info: */
osku's avatar
osku committed
1588

1589 1590
		entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
					       offsets, &n_ext, heap);
1591
		n_ext += btr_push_update_extern_fields(entry, update, heap);
1592 1593 1594
		/* The page containing the clustered index record
		corresponding to entry is latched in mtr.  Thus the
		following call is safe. */
1595
		row_upd_index_replace_new_col_vals(entry, index, update, heap);
osku's avatar
osku committed
1596

1597 1598
		buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
								  n_ext));
osku's avatar
osku committed
1599

1600 1601
		*old_vers = rec_convert_dtuple_to_rec(buf, index,
						      entry, n_ext);
osku's avatar
osku committed
1602 1603 1604 1605
	} else {
		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
		*old_vers = rec_copy(buf, rec, offsets);
		rec_offs_make_valid(*old_vers, index, offsets);
1606
		row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
osku's avatar
osku committed
1607 1608 1609 1610
	}

	return(DB_SUCCESS);
}
1611
#endif /* !UNIV_HOTBACKUP */