buf0buddy.c 21.4 KB
Newer Older
Vadim Tkachenko's avatar
Vadim Tkachenko committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*****************************************************************************

Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA

*****************************************************************************/

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/******************************************************
Binary buddy allocator for compressed pages

Created December 2006 by Marko Makela
*******************************************************/

#define THIS_MODULE
#include "buf0buddy.h"
#ifdef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#undef THIS_MODULE
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0flu.h"
#include "page0zip.h"

/* Statistic counters */

#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];

/**************************************************************************
Get the offset of the buddy of a compressed page frame. */
UNIV_INLINE
byte*
buf_buddy_get(
/*==========*/
			/* out: the buddy relative of page */
	byte*	page,	/* in: compressed page */
	ulint	size)	/* in: page size in bytes */
{
	ut_ad(ut_is_2pow(size));
	ut_ad(size >= BUF_BUDDY_LOW);
	ut_ad(size < BUF_BUDDY_HIGH);
	ut_ad(!ut_align_offset(page, size));

	if (((ulint) page) & size) {
		return(page - size);
	} else {
		return(page + size);
	}
}

/**************************************************************************
Add a block to the head of the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_add_to_free(
/*==================*/
	buf_page_t*	bpage,	/* in,own: block to be freed */
	ulint		i)	/* in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
	buf_page_t*	b  = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);

	if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */

	ut_ad(buf_pool->zip_free[i].start != bpage);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
85
	UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102

#ifdef UNIV_DEBUG_VALGRIND
	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
	UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}

/**************************************************************************
Remove a block from the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
	buf_page_t*	bpage,	/* in: block to be removed */
	ulint		i)	/* in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
Vadim Tkachenko's avatar
Vadim Tkachenko committed
103 104
	buf_page_t*	prev = UT_LIST_GET_PREV(zip_list, bpage);
	buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
105 106 107 108 109 110 111 112 113

	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);

	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */

	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
114
	UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133

#ifdef UNIV_DEBUG_VALGRIND
	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
	if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}

/**************************************************************************
Try to allocate a block from buf_pool->zip_free[]. */
static
void*
buf_buddy_alloc_zip(
/*================*/
			/* out: allocated block, or NULL
			if buf_pool->zip_free[] was empty */
	ulint	i)	/* in: index of buf_pool->zip_free[] */
{
	buf_page_t*	bpage;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
134 135
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
136 137 138 139
	ut_a(i < BUF_BUDDY_SIZES);

#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
	/* Valgrind would complain about accessing free memory. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
140
	UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i]);
141
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
142
	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180

	if (bpage) {
		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);

		buf_buddy_remove_from_free(bpage, i);
	} else if (i + 1 < BUF_BUDDY_SIZES) {
		/* Attempt to split. */
		bpage = buf_buddy_alloc_zip(i + 1);

		if (bpage) {
			buf_page_t*	buddy = (buf_page_t*)
				(((char*) bpage) + (BUF_BUDDY_LOW << i));

			ut_ad(!buf_pool_contains_zip(buddy));
			ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
			buddy->state = BUF_BLOCK_ZIP_FREE;
			buf_buddy_add_to_free(buddy, i);
		}
	}

#ifdef UNIV_DEBUG
	if (bpage) {
		memset(bpage, ~i, BUF_BUDDY_LOW << i);
	}
#endif /* UNIV_DEBUG */

	UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);

	return(bpage);
}

/**************************************************************************
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
static
void
buf_buddy_block_free(
/*=================*/
Vadim Tkachenko's avatar
Vadim Tkachenko committed
181 182
	void*	buf,	/* in: buffer frame to deallocate */
	ibool	have_page_hash_mutex)
183 184 185 186 187
{
	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
	buf_page_t*	bpage;
	buf_block_t*	block;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
188
	//ut_ad(buf_pool_mutex_own());
189 190 191
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));

Vadim Tkachenko's avatar
Vadim Tkachenko committed
192 193
	mutex_enter(&zip_hash_mutex);

194
	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
Vadim Tkachenko's avatar
Vadim Tkachenko committed
195 196
		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
			  && bpage->in_zip_hash && !bpage->in_page_hash),
197 198 199 200 201 202 203 204
		    ((buf_block_t*) bpage)->frame == buf);
	ut_a(bpage);
	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
	ut_ad(!bpage->in_page_hash);
	ut_ad(bpage->in_zip_hash);
	ut_d(bpage->in_zip_hash = FALSE);
	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
205 206
	mutex_exit(&zip_hash_mutex);

207 208 209 210 211
	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);

	block = (buf_block_t*) bpage;
	mutex_enter(&block->mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
212
	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
	mutex_exit(&block->mutex);

	ut_ad(buf_buddy_n_frames > 0);
	ut_d(buf_buddy_n_frames--);
}

/**************************************************************************
Allocate a buffer block to the buddy allocator. */
static
void
buf_buddy_block_register(
/*=====================*/
	buf_block_t*	block)	/* in: buffer frame to allocate */
{
	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
228
	//ut_ad(buf_pool_mutex_own());
229 230 231 232 233 234 235 236 237 238
	ut_ad(!mutex_own(&buf_pool_zip_mutex));

	buf_block_set_state(block, BUF_BLOCK_MEMORY);

	ut_a(block->frame);
	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));

	ut_ad(!block->page.in_page_hash);
	ut_ad(!block->page.in_zip_hash);
	ut_d(block->page.in_zip_hash = TRUE);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
239 240

	mutex_enter(&zip_hash_mutex);
241
	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
242
	mutex_exit(&zip_hash_mutex);
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275

	ut_d(buf_buddy_n_frames++);
}

/**************************************************************************
Allocate a block from a bigger object. */
static
void*
buf_buddy_alloc_from(
/*=================*/
				/* out: allocated block */
	void*		buf,	/* in: a block that is free to use */
	ulint		i,	/* in: index of buf_pool->zip_free[] */
	ulint		j)	/* in: size of buf as an index
				of buf_pool->zip_free[] */
{
	ulint	offs	= BUF_BUDDY_LOW << j;
	ut_ad(j <= BUF_BUDDY_SIZES);
	ut_ad(j >= i);
	ut_ad(!ut_align_offset(buf, offs));

	/* Add the unused parts of the block to the free lists. */
	while (j > i) {
		buf_page_t*	bpage;

		offs >>= 1;
		j--;

		bpage = (buf_page_t*) ((byte*) buf + offs);
		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
		bpage->state = BUF_BLOCK_ZIP_FREE;
#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
		/* Valgrind would complain about accessing free memory. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
276
		UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[j]);
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
		buf_buddy_add_to_free(bpage, j);
	}

	return(buf);
}

/**************************************************************************
Allocate a block.  The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL. */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
			/* out: allocated block,
			possibly NULL if lru==NULL */
	ulint	i,	/* in: index of buf_pool->zip_free[],
			or BUF_BUDDY_SIZES */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
296
	ibool*	lru,	/* in: pointer to a variable that will be assigned
297 298 299
			TRUE if storage was allocated from the LRU list
			and buf_pool_mutex was temporarily released,
			or NULL if the LRU list should not be used */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
300
	ibool	have_page_hash_mutex)
301 302 303
{
	buf_block_t*	block;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
304
	//ut_ad(buf_pool_mutex_own());
305 306 307 308
	ut_ad(!mutex_own(&buf_pool_zip_mutex));

	if (i < BUF_BUDDY_SIZES) {
		/* Try to allocate from the buddy system. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
309
		mutex_enter(&zip_free_mutex);
310 311 312 313 314 315
		block = buf_buddy_alloc_zip(i);

		if (block) {

			goto func_exit;
		}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
316 317

		mutex_exit(&zip_free_mutex);
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
	}

	/* Try allocating from the buf_pool->free list. */
	block = buf_LRU_get_free_only();

	if (block) {

		goto alloc_big;
	}

	if (!lru) {

		return(NULL);
	}

	/* Try replacing an uncompressed page in the buffer pool. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
334 335 336 337 338
	//buf_pool_mutex_exit();
	mutex_exit(&LRU_list_mutex);
	if (have_page_hash_mutex) {
		rw_lock_x_unlock(&page_hash_latch);
	}
339 340
	block = buf_LRU_get_free_block(0);
	*lru = TRUE;
Vadim Tkachenko's avatar
Vadim Tkachenko committed
341 342 343 344 345
	//buf_pool_mutex_enter();
	mutex_enter(&LRU_list_mutex);
	if (have_page_hash_mutex) {
		rw_lock_x_lock(&page_hash_latch);
	}
346 347 348 349

alloc_big:
	buf_buddy_block_register(block);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
350
	mutex_enter(&zip_free_mutex);
351 352 353 354
	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);

func_exit:
	buf_buddy_stat[i].used++;
Vadim Tkachenko's avatar
Vadim Tkachenko committed
355 356
	mutex_exit(&zip_free_mutex);

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
	return(block);
}

/**************************************************************************
Try to relocate the control block of a compressed page. */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
				/* out: TRUE if relocated */
	buf_page_t*	bpage,	/* in: block to relocate */
	buf_page_t*	dpage)	/* in: free block to relocate to */
{
	buf_page_t*	b;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
372 373 374 375
	//ut_ad(buf_pool_mutex_own());
#ifdef UNIV_SYNC_DEBUG
	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
#endif
376 377 378 379 380 381 382 383

	switch (buf_page_get_state(bpage)) {
	case BUF_BLOCK_ZIP_FREE:
	case BUF_BLOCK_NOT_USED:
	case BUF_BLOCK_READY_FOR_USE:
	case BUF_BLOCK_FILE_PAGE:
	case BUF_BLOCK_MEMORY:
	case BUF_BLOCK_REMOVE_HASH:
Vadim Tkachenko's avatar
Vadim Tkachenko committed
384
		/* ut_error; */ /* optimistic */
385 386 387 388 389 390 391 392
	case BUF_BLOCK_ZIP_DIRTY:
		/* Cannot relocate dirty pages. */
		return(FALSE);

	case BUF_BLOCK_ZIP_PAGE:
		break;
	}

393
	mutex_enter(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
394
	mutex_enter(&zip_free_mutex);
395

Vadim Tkachenko's avatar
Vadim Tkachenko committed
396
	if (!buf_page_can_relocate(bpage)) {
397
		mutex_exit(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
398 399 400 401 402 403 404
		mutex_exit(&zip_free_mutex);
		return(FALSE);
	}

	if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
		mutex_exit(&buf_pool_zip_mutex);
		mutex_exit(&zip_free_mutex);
405 406 407 408 409 410 411
		return(FALSE);
	}

	buf_relocate(bpage, dpage);
	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);

	/* relocate buf_pool->zip_clean */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
412 413 414
	mutex_enter(&flush_list_mutex);
	b = UT_LIST_GET_PREV(zip_list, dpage);
	UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
415 416

	if (b) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
417
		UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
418
	} else {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
419
		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
420
	}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
421
	mutex_exit(&flush_list_mutex);
422 423

	mutex_exit(&buf_pool_zip_mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
424
	mutex_exit(&zip_free_mutex);
425 426 427 428 429 430 431 432 433 434 435 436
	return(TRUE);
}

/**************************************************************************
Try to relocate a block. */
static
ibool
buf_buddy_relocate(
/*===============*/
			/* out: TRUE if relocated */
	void*	src,	/* in: block to relocate */
	void*	dst,	/* in: free block to relocate to */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
437 438
	ulint	i,	/* in: index of buf_pool->zip_free[] */
	ibool	have_page_hash_mutex)
439 440 441 442 443
{
	buf_page_t*	bpage;
	const ulint	size	= BUF_BUDDY_LOW << i;
	ullint		usec	= ut_time_us(NULL);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
444 445
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_ad(!ut_align_offset(src, size));
	ut_ad(!ut_align_offset(dst, size));
	UNIV_MEM_ASSERT_W(dst, size);

	/* We assume that all memory from buf_buddy_alloc()
	is used for either compressed pages or buf_page_t
	objects covering compressed pages. */

	/* We look inside the allocated objects returned by
	buf_buddy_alloc() and assume that anything of
	PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
	a valid space_id and page_no in the page header.  Should the
	fields be invalid, we will be unable to relocate the block.
	We also assume that anything that fits sizeof(buf_page_t)
	actually is a properly initialized buf_page_t object. */

	if (size >= PAGE_ZIP_MIN_SIZE) {
464
		mutex_t*	mutex;
Vadim Tkachenko's avatar
Vadim Tkachenko committed
465 466 467
		if (!have_page_hash_mutex)
			mutex_exit(&zip_free_mutex);

468 469
		/* This is a compressed page. */

Vadim Tkachenko's avatar
Vadim Tkachenko committed
470 471 472 473
		if (!have_page_hash_mutex) {
			mutex_enter(&LRU_list_mutex);
			rw_lock_x_lock(&page_hash_latch);
		}
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
		/* The src block may be split into smaller blocks,
		some of which may be free.  Thus, the
		mach_read_from_4() calls below may attempt to read
		from free memory.  The memory is "owned" by the buddy
		allocator (and it has been allocated from the buffer
		pool), so there is nothing wrong about this.  The
		mach_read_from_4() calls here will only trigger bogus
		Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
		bpage = buf_page_hash_get(
			mach_read_from_4((const byte*) src
					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
			mach_read_from_4((const byte*) src
					 + FIL_PAGE_OFFSET));

		if (!bpage || bpage->zip.data != src) {
			/* The block has probably been freshly
			allocated by buf_LRU_get_free_block() but not
			added to buf_pool->page_hash yet.  Obviously,
			it cannot be relocated. */

Vadim Tkachenko's avatar
Vadim Tkachenko committed
494 495 496 497 498
			if (!have_page_hash_mutex) {
				mutex_enter(&zip_free_mutex);
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
499 500 501 502 503 504 505 506 507
			return(FALSE);
		}

		if (page_zip_get_size(&bpage->zip) != size) {
			/* The block is of different size.  We would
			have to relocate all blocks covered by src.
			For the sake of simplicity, give up. */
			ut_ad(page_zip_get_size(&bpage->zip) < size);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
508 509 510 511 512
			if (!have_page_hash_mutex) {
				mutex_enter(&zip_free_mutex);
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
513 514 515
			return(FALSE);
		}

Vadim Tkachenko's avatar
Vadim Tkachenko committed
516 517 518 519
		/* To keep latch order */
		if (have_page_hash_mutex)
			mutex_exit(&zip_free_mutex);

520 521 522 523 524 525
		/* The block must have been allocated, but it may
		contain uninitialized data. */
		UNIV_MEM_ASSERT_W(src, size);

		mutex = buf_page_get_mutex(bpage);

Vadim Tkachenko's avatar
Vadim Tkachenko committed
526
retry_lock:
527
		mutex_enter(mutex);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
528 529 530 531 532 533
		if (mutex != buf_page_get_mutex(bpage)) {
			mutex_exit(mutex);
			mutex = buf_page_get_mutex(bpage);
			goto retry_lock;
		}
		mutex_enter(&zip_free_mutex);
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549

		if (buf_page_can_relocate(bpage)) {
			/* Relocate the compressed page. */
			ut_a(bpage->zip.data == src);
			memcpy(dst, src, size);
			bpage->zip.data = dst;
			mutex_exit(mutex);
success:
			UNIV_MEM_INVALID(src, size);
			{
				buf_buddy_stat_t*	buddy_stat
					= &buf_buddy_stat[i];
				buddy_stat->relocated++;
				buddy_stat->relocated_usec
					+= ut_time_us(NULL) - usec;
			}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
550 551 552 553 554

			if (!have_page_hash_mutex) {
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
555 556 557
			return(TRUE);
		}

Vadim Tkachenko's avatar
Vadim Tkachenko committed
558 559 560 561 562
		if (!have_page_hash_mutex) {
			mutex_exit(&LRU_list_mutex);
			rw_lock_x_unlock(&page_hash_latch);
		}

563 564 565 566
		mutex_exit(mutex);
	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
		/* This must be a buf_page_t object. */
		UNIV_MEM_ASSERT_RW(src, size);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
567 568 569 570 571 572 573 574

		mutex_exit(&zip_free_mutex);

		if (!have_page_hash_mutex) {
			mutex_enter(&LRU_list_mutex);
			rw_lock_x_lock(&page_hash_latch);
		}

575
		if (buf_buddy_relocate_block(src, dst)) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
576 577 578 579 580 581
			mutex_enter(&zip_free_mutex);

			if (!have_page_hash_mutex) {
				mutex_exit(&LRU_list_mutex);
				rw_lock_x_unlock(&page_hash_latch);
			}
582 583 584

			goto success;
		}
Vadim Tkachenko's avatar
Vadim Tkachenko committed
585 586 587 588 589 590 591

		mutex_enter(&zip_free_mutex);

		if (!have_page_hash_mutex) {
			mutex_exit(&LRU_list_mutex);
			rw_lock_x_unlock(&page_hash_latch);
		}
592 593 594 595 596 597 598 599 600 601 602 603 604
	}

	return(FALSE);
}

/**************************************************************************
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
	void*	buf,	/* in: block to be freed, must not be
			pointed to by the buffer pool */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
605 606
	ulint	i,	/* in: index of buf_pool->zip_free[] */
	ibool	have_page_hash_mutex)
607 608 609 610
{
	buf_page_t*	bpage;
	buf_page_t*	buddy;

Vadim Tkachenko's avatar
Vadim Tkachenko committed
611 612
	//ut_ad(buf_pool_mutex_own());
	ut_ad(mutex_own(&zip_free_mutex));
613 614 615 616 617 618 619 620 621 622
	ut_ad(!mutex_own(&buf_pool_zip_mutex));
	ut_ad(i <= BUF_BUDDY_SIZES);
	ut_ad(buf_buddy_stat[i].used > 0);

	buf_buddy_stat[i].used--;
recombine:
	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);

	if (i == BUF_BUDDY_SIZES) {
Vadim Tkachenko's avatar
Vadim Tkachenko committed
623 624 625
		mutex_exit(&zip_free_mutex);
		buf_buddy_block_free(buf, have_page_hash_mutex);
		mutex_enter(&zip_free_mutex);
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
		return;
	}

	ut_ad(i < BUF_BUDDY_SIZES);
	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
	ut_ad(!buf_pool_contains_zip(buf));

	/* Try to combine adjacent blocks. */

	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);

#ifndef UNIV_DEBUG_VALGRIND
	/* Valgrind would complain about accessing free memory. */

	if (buddy->state != BUF_BLOCK_ZIP_FREE) {

		goto buddy_nonfree;
	}

	/* The field buddy->state can only be trusted for free blocks.
	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
	it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */

	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);

		if (bpage == buddy) {
buddy_free:
			/* The buddy is free: recombine */
			buf_buddy_remove_from_free(bpage, i);
buddy_free2:
			ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
			ut_ad(!buf_pool_contains_zip(buddy));
			i++;
			buf = ut_align_down(buf, BUF_BUDDY_LOW << i);

			goto recombine;
		}

		ut_a(bpage != buf);

		{
Vadim Tkachenko's avatar
Vadim Tkachenko committed
670
			buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
671 672 673 674 675 676 677 678
			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
			bpage = next;
		}
	}

#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
	/* Valgrind would complain about accessing free memory. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
679
	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i]));
680 681 682
#endif /* UNIV_DEBUG_VALGRIND */

	/* The buddy is not free. Is there a free block of this size? */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
683
	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
684 685 686 687 688 689 690 691 692

	if (bpage) {
		/* Remove the block from the free list, because a successful
		buf_buddy_relocate() will overwrite bpage->list. */

		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
		buf_buddy_remove_from_free(bpage, i);

		/* Try to relocate the buddy of buf to the free block. */
Vadim Tkachenko's avatar
Vadim Tkachenko committed
693
		if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713

			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
			goto buddy_free2;
		}

		buf_buddy_add_to_free(bpage, i);

		/* Try to relocate the buddy of the free block to buf. */
		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
						    BUF_BUDDY_LOW << i);

#if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
		{
			const buf_page_t* b;

			/* The buddy must not be (completely) free, because
			we always recombine adjacent free blocks.
			(Parts of the buddy can be free in
			buf_pool->zip_free[j] with j < i.)*/
			for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
Vadim Tkachenko's avatar
Vadim Tkachenko committed
714
			     b; b = UT_LIST_GET_NEXT(zip_list, b)) {
715 716 717 718 719 720

				ut_a(b != buddy);
			}
		}
#endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */

Vadim Tkachenko's avatar
Vadim Tkachenko committed
721
		if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785

			buf = bpage;
			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
			goto buddy_free;
		}
	}

	/* Free the block to the buddy list. */
	bpage = buf;
#ifdef UNIV_DEBUG
	if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
		/* This area has most likely been allocated for at
		least one compressed-only block descriptor.  Check
		that there are no live objects in the area.  This is
		not a complete check: it may yield false positives as
		well as false negatives.  Also, due to buddy blocks
		being recombined, it is possible (although unlikely)
		that this branch is never reached. */

		char* c;

# ifndef UNIV_DEBUG_VALGRIND
		/* Valgrind would complain about accessing
		uninitialized memory.  Besides, Valgrind performs a
		more exhaustive check, at every memory access. */
		const buf_page_t* b = buf;
		const buf_page_t* const b_end = (buf_page_t*)
			((char*) b + (BUF_BUDDY_LOW << i));

		for (; b < b_end; b++) {
			/* Avoid false positives (and cause false
			negatives) by checking for b->space < 1000. */

			if ((b->state == BUF_BLOCK_ZIP_PAGE
			     || b->state == BUF_BLOCK_ZIP_DIRTY)
			    && b->space > 0 && b->space < 1000) {
				fprintf(stderr,
					"buddy dirty %p %u (%u,%u) %p,%lu\n",
					(void*) b,
					b->state, b->space, b->offset,
					buf, i);
			}
		}
# endif /* !UNIV_DEBUG_VALGRIND */

		/* Scramble the block.  This should make any pointers
		invalid and trigger a segmentation violation.  Because
		the scrambling can be reversed, it may be possible to
		track down the object pointing to the freed data by
		dereferencing the unscrambled bpage->LRU or
		bpage->list pointers. */
		for (c = (char*) buf + (BUF_BUDDY_LOW << i);
		     c-- > (char*) buf; ) {
			*c = ~*c ^ i;
		}
	} else {
		/* Fill large blocks with a constant pattern. */
		memset(bpage, i, BUF_BUDDY_LOW << i);
	}
#endif /* UNIV_DEBUG */
	bpage->state = BUF_BLOCK_ZIP_FREE;
	buf_buddy_add_to_free(bpage, i);
}