Commit fc25437a authored by Monty's avatar Monty

MDEV-10104 Table lock race condition with replication

Problem was two race condtion in Aria page cache:
- find_block() didn't inform free_block() that it had released requests
- free_block() didn't handle pinned blocks, which could happen if
  free_block() was called as part of flush. This is fixed by not freeing
  blocks that are pinned.  This is safe as when maria_close() is called
  when last thread is using a table, there can be no pinned blocks. For
  other flush calls it's safe to ignore pinned blocks.
parent e3521ab9
......@@ -493,7 +493,8 @@ static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
#define FLUSH_CACHE 2000 /* sort this many blocks at once */
static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
my_bool abort_if_pinned);
static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link);
#ifndef DBUG_OFF
static void test_key_cache(PAGECACHE *pagecache,
......@@ -1939,7 +1940,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
removed from the cache as we set the PCBLOCK_REASSIGNED
flag (see the code below that handles reading requests).
*/
free_block(pagecache, block);
free_block(pagecache, block, 0);
return 0;
}
/* Wait until the page is flushed on disk */
......@@ -1950,7 +1951,7 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
/* Invalidate page in the block if it has not been done yet */
DBUG_ASSERT(block->status); /* Should always be true */
if (block->status)
free_block(pagecache, block);
free_block(pagecache, block, 0);
return 0;
}
......@@ -1975,8 +1976,13 @@ static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
}
else
{
DBUG_ASSERT(hash_link->requests > 0);
hash_link->requests--;
/*
When we come here either PCBLOCK_REASSIGNED or PCBLOCK_IN_SWITCH are
active. In both cases wqueue_release_queue() is called when the
state changes.
*/
DBUG_ASSERT(block->hash_link == hash_link);
remove_reader(block);
KEYCACHE_DBUG_PRINT("find_block",
("request waiting for old page to be saved"));
{
......@@ -3635,7 +3641,7 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
DBUG_ASSERT(block->hash_link->requests > 0);
page_link->requests--;
/* See NOTE for pagecache_unlock() about registering requests. */
free_block(pagecache, block);
free_block(pagecache, block, 0);
dec_counter_for_resize_op(pagecache);
return 0;
......@@ -4227,7 +4233,8 @@ my_bool pagecache_write_part(PAGECACHE *pagecache,
and add it to the free list.
*/
static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
my_bool abort_if_pinned)
{
uint status= block->status;
KEYCACHE_THREAD_TRACE("free block");
......@@ -4241,11 +4248,27 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
/*
While waiting for readers to finish, new readers might request the
block. But since we set block->status|= PCBLOCK_REASSIGNED, they
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
will wait on block->wqueue[COND_FOR_SAVED]. They must be signaled
later.
*/
block->status|= PCBLOCK_REASSIGNED;
wait_for_readers(pagecache, block);
if (unlikely(abort_if_pinned) && unlikely(block->pins))
{
/*
Block got pinned while waiting for readers.
This can only happens when called from flush_pagecache_blocks_int()
when flushing blocks as part of prepare for maria_close() or from
flush_cached_blocks()
*/
block->status&= ~PCBLOCK_REASSIGNED;
unreg_request(pagecache, block, 0);
/* All pending requests for this page must be resubmitted. */
if (block->wqueue[COND_FOR_SAVED].last_thread)
wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
return 1;
}
unlink_hash(pagecache, block->hash_link);
}
......@@ -4296,6 +4319,8 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
/* All pending requests for this page must be resubmitted. */
if (block->wqueue[COND_FOR_SAVED].last_thread)
wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
return 0;
}
......@@ -4430,10 +4455,17 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
/* type will never be FLUSH_IGNORE_CHANGED here */
if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
type == FLUSH_FORCE_WRITE))
{
if (!free_block(pagecache, block, 1))
{
pagecache->blocks_changed--;
pagecache->global_blocks_changed--;
free_block(pagecache, block);
}
else
{
block->status&= ~PCBLOCK_IN_FLUSH;
link_to_file_list(pagecache, block, file, 1);
}
}
else
{
......@@ -4671,7 +4703,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
/* It's a temporary file */
pagecache->blocks_changed--;
pagecache->global_blocks_changed--;
free_block(pagecache, block);
free_block(pagecache, block, 0);
}
}
else if (type != FLUSH_KEEP_LAZY)
......@@ -4741,11 +4773,12 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
#endif
next= block->next_changed;
if (block->hash_link->file.file == file->file &&
!block->pins &&
(! (block->status & PCBLOCK_CHANGED)
|| type == FLUSH_IGNORE_CHANGED))
{
reg_requests(pagecache, block, 1);
free_block(pagecache, block);
free_block(pagecache, block, 1);
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment