MDEV-8139 Fix Scrubbing

fil_space_t::freed_ranges: Store ranges of freed page numbers.

fil_space_t::last_freed_lsn: Store the most recent LSN of
freeing a page.

fil_space_t::freed_mutex: Protects freed_ranges, last_freed_lsn.

fil_space_create(): Initialize the freed_range mutex.

fil_space_free_low(): Frees the freed_range mutex.

range_set: Ranges of page numbers.

buf_page_create(): Removes the page from freed_ranges when page
is being reused.

btr_free_root(): Remove the PAGE_INDEX_ID invalidation. Because
btr_free_root() and dict_drop_index_tree() are executed in
the same atomic mini-transaction, there is no need to
invalidate the root page.

buf_release_freed_page(): Split from buf_flush_freed_page().
Skip any I/O

buf_flush_freed_pages(): Get the freed ranges from tablespace and
Write punch-hole or zeroes of the freed ranges.

buf_flush_try_neighbors(): Handles the flushing of freed ranges.

mtr_t::freed_pages: Variable to store the list of freed pages.

mtr_t::add_freed_pages(): To add freed pages.

mtr_t::clear_freed_pages(): To clear the freed pages.

mtr_t::m_freed_in_system_tablespace: Variable to indicate whether page has
been freed in system tablespace.

mtr_t::m_trim_pages: Variable to indicate whether the space has been trimmed.

mtr_t::commit(): Add the freed page and update the last freed lsn
in the tablespace and clear the tablespace freed range if space is
trimmed.

file_name_t::freed_pages: Store the freed pages during recovery.

file_name_t::add_freed_page(), file_name_t::remove_freed_page(): To
add and remove freed page during recovery.

store_freed_or_init_rec(): Store or remove the freed pages while
encountering FREE_PAGE or INIT_PAGE redo log record.

recv_init_crash_recovery_spaces(): Add the freed page encountered
during recovery to respective tablespace.
parent 07d1c856
##############################################################################
#
# List the test cases that are to be disabled temporarily.
#
# Separate the test case name and the comment with ':'.
#
# <testcasename> : BUG#<xxxx> <date disabled> <disabler> <comment>
#
# Do not use any TAB characters for whitespace.
#
##############################################################################
innodb_scrub : MDEV-8139 scrubbing does not work reliably
innodb_scrub_background : MDEV-8139 scrubbing does not work reliably
create table snapshot_status engine = myisam
select * from information_schema.global_status
where variable_name like 'innodb_scrub%';
# MDEV-8139 Fix scrubbing tests
# FIXME: Add index(b) to each table; ensure that undo logs are scrubbed.
create table delete_3 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compressed;
delete from delete_3;
create table delete_rollback_delete_3 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compressed;
begin;
delete from delete_rollback_delete_3;
rollback;
delete from delete_rollback_delete_3;
create table insert_rollback_3 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compressed;
begin;
rollback;
create table delete_2 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compact;
delete from delete_2;
create table delete_rollback_delete_2 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compact;
begin;
delete from delete_rollback_delete_2;
rollback;
delete from delete_rollback_delete_2;
create table insert_rollback_2 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=compact;
begin;
rollback;
create table delete_1 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=redundant;
delete from delete_1;
create table delete_rollback_delete_1 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=redundant;
begin;
delete from delete_rollback_delete_1;
rollback;
delete from delete_rollback_delete_1;
create table insert_rollback_1 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=redundant;
begin;
rollback;
create table delete_0 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=dynamic;
delete from delete_0;
create table delete_rollback_delete_0 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=dynamic;
begin;
delete from delete_rollback_delete_0;
rollback;
delete from delete_rollback_delete_0;
create table insert_rollback_0 (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format=dynamic;
begin;
rollback;
SET GLOBAL innodb_fast_shutdown=0;
# delete_3.ibd
# delete_rollback_delete_3.ibd
# insert_rollback_3.ibd
# delete_2.ibd
# delete_rollback_delete_2.ibd
# insert_rollback_2.ibd
# delete_1.ibd
# delete_rollback_delete_1.ibd
# insert_rollback_1.ibd
# delete_0.ibd
# delete_rollback_delete_0.ibd
# insert_rollback_0.ibd
check table delete_3, delete_rollback_delete_3, insert_rollback_3;
Table Op Msg_type Msg_text
test.delete_3 check status OK
test.delete_rollback_delete_3 check status OK
test.insert_rollback_3 check status OK
drop table delete_3, delete_rollback_delete_3, insert_rollback_3;
check table delete_2, delete_rollback_delete_2, insert_rollback_2;
Table Op Msg_type Msg_text
test.delete_2 check status OK
test.delete_rollback_delete_2 check status OK
test.insert_rollback_2 check status OK
drop table delete_2, delete_rollback_delete_2, insert_rollback_2;
check table delete_1, delete_rollback_delete_1, insert_rollback_1;
Table Op Msg_type Msg_text
test.delete_1 check status OK
test.delete_rollback_delete_1 check status OK
test.insert_rollback_1 check status OK
drop table delete_1, delete_rollback_delete_1, insert_rollback_1;
check table delete_0, delete_rollback_delete_0, insert_rollback_0;
Table Op Msg_type Msg_text
test.delete_0 check status OK
test.delete_rollback_delete_0 check status OK
test.insert_rollback_0 check status OK
drop table delete_0, delete_rollback_delete_0, insert_rollback_0;
show variables like 'innodb_%scrub_data%';
Variable_name Value
innodb_background_scrub_data_check_interval 3600
innodb_background_scrub_data_compressed OFF
innodb_background_scrub_data_interval 604800
innodb_background_scrub_data_uncompressed OFF
innodb_immediate_scrub_data_uncompressed ON
# verify that this test have not caused any background scrubbing
select ss.variable_name, gs.variable_value - ss.variable_value as variable_value
from snapshot_status ss,
information_schema.global_status gs
where ss.variable_name = gs.variable_name;
variable_name variable_value
INNODB_SCRUB_BACKGROUND_PAGE_REORGANIZATIONS 0
INNODB_SCRUB_BACKGROUND_PAGE_SPLITS 0
INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_MISSING_INDEX 0
INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_OUT_OF_FILESPACE 0
INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_UNDERFLOW 0
INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_UNKNOWN 0
drop table snapshot_status;
#
# immediate scrubbing is off
# background scrubbing is on
#
show variables like 'innodb_%scrub_data%';
Variable_name Value
innodb_background_scrub_data_check_interval 3600
innodb_background_scrub_data_compressed ON
innodb_background_scrub_data_interval 604800
innodb_background_scrub_data_uncompressed ON
innodb_immediate_scrub_data_uncompressed OFF
# make sure spaces are checked quickly
SET GLOBAL innodb_background_scrub_data_check_interval=1;
create table delete_3 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compressed;
# Populate table with rows
delete from delete_3;
create table delete_rollback_delete_3 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compressed;
# Populate table with rows
begin;
delete from delete_rollback_delete_3;
rollback;
delete from delete_rollback_delete_3;
create table insert_rollback_3 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compressed;
# Populate table with rows
begin;
rollback;
create table delete_2 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compact;
# Populate table with rows
delete from delete_2;
create table delete_rollback_delete_2 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compact;
# Populate table with rows
begin;
delete from delete_rollback_delete_2;
rollback;
delete from delete_rollback_delete_2;
create table insert_rollback_2 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=compact;
# Populate table with rows
begin;
rollback;
create table delete_1 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=redundant;
# Populate table with rows
delete from delete_1;
create table delete_rollback_delete_1 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=redundant;
# Populate table with rows
begin;
delete from delete_rollback_delete_1;
rollback;
delete from delete_rollback_delete_1;
create table insert_rollback_1 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=redundant;
# Populate table with rows
begin;
rollback;
create table delete_0 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=dynamic;
# Populate table with rows
delete from delete_0;
create table delete_rollback_delete_0 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=dynamic;
# Populate table with rows
begin;
delete from delete_rollback_delete_0;
rollback;
delete from delete_rollback_delete_0;
create table insert_rollback_0 (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format=dynamic;
# Populate table with rows
begin;
rollback;
# start scrubbing threads
SET GLOBAL innodb_encryption_threads=5;
# Wait max 10 min for scrubbing
SET GLOBAL innodb_fast_shutdown=0;
# delete_3.ibd
# delete_rollback_delete_3.ibd
# insert_rollback_3.ibd
# delete_2.ibd
# delete_rollback_delete_2.ibd
# insert_rollback_2.ibd
# delete_1.ibd
# delete_rollback_delete_1.ibd
# insert_rollback_1.ibd
# delete_0.ibd
# delete_rollback_delete_0.ibd
# insert_rollback_0.ibd
check table delete_3, delete_rollback_delete_3, insert_rollback_3;
Table Op Msg_type Msg_text
test.delete_3 check status OK
test.delete_rollback_delete_3 check status OK
test.insert_rollback_3 check status OK
drop table delete_3, delete_rollback_delete_3, insert_rollback_3;
check table delete_2, delete_rollback_delete_2, insert_rollback_2;
Table Op Msg_type Msg_text
test.delete_2 check status OK
test.delete_rollback_delete_2 check status OK
test.insert_rollback_2 check status OK
drop table delete_2, delete_rollback_delete_2, insert_rollback_2;
check table delete_1, delete_rollback_delete_1, insert_rollback_1;
Table Op Msg_type Msg_text
test.delete_1 check status OK
test.delete_rollback_delete_1 check status OK
test.insert_rollback_1 check status OK
drop table delete_1, delete_rollback_delete_1, insert_rollback_1;
check table delete_0, delete_rollback_delete_0, insert_rollback_0;
Table Op Msg_type Msg_text
test.delete_0 check status OK
test.delete_rollback_delete_0 check status OK
test.insert_rollback_0 check status OK
drop table delete_0, delete_rollback_delete_0, insert_rollback_0;
show variables like 'innodb_%scrub_data%';
Variable_name Value
innodb_background_scrub_data_check_interval 3600
innodb_background_scrub_data_compressed ON
innodb_background_scrub_data_interval 604800
innodb_background_scrub_data_uncompressed ON
innodb_immediate_scrub_data_uncompressed OFF
--innodb-background-scrub-data-compressed=OFF
--innodb-background-scrub-data-uncompressed=OFF
--innodb-encrypt-tables=OFF
--innodb-encryption-threads=0
--innodb-immediate-scrub-data-uncompressed=ON
--loose-innodb-debug-force-scrubbing=ON
--innodb-tablespaces-scrubbing
-- source include/have_innodb.inc
-- source include/not_embedded.inc
-- source include/have_example_key_management_plugin.inc
let $MYSQLD_DATADIR=`select @@datadir`;
let INNODB_PAGE_SIZE= `select @@innodb_page_size`;
create table snapshot_status engine = myisam
select * from information_schema.global_status
where variable_name like 'innodb_scrub_background%';
let $rowcount=500;
let $maxformatno= 4;
let $formatno= $maxformatno;
--echo # MDEV-8139 Fix scrubbing tests
--echo # FIXME: Add index(b) to each table; ensure that undo logs are scrubbed.
let $tableformat= (
a int auto_increment primary key,
b varchar(256),
c text) engine = innodb row_format;
while ($formatno)
{
dec $formatno;
let $format = `select case $formatno
when 0 then 'dynamic'
when 1 then 'redundant'
when 2 then 'compact'
when 3 then 'compressed'
end`;
let $t= delete_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
--disable_query_log
begin;
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('repairman', repeat('unicycle', 1000));
}
commit;
--enable_query_log
eval delete from $t;
let $t= delete_rollback_delete_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
--disable_query_log
begin;
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('breakhuman', repeat('bicycle', 1000));
}
commit;
--enable_query_log
begin;
eval delete from $t;
rollback;
eval delete from $t;
let $t= insert_rollback_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
begin;
--disable_query_log
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('wonderwoman', repeat('tricycle', 1000));
}
--enable_query_log
rollback;
}
SET GLOBAL innodb_fast_shutdown=0;
-- source include/shutdown_mysqld.inc
let SEARCH_ABORT= FOUND;
let SEARCH_PATTERN= (un|b|tr)icycle|(repair|breakhu|wonderwo)man;
let SEARCH_RANGE= 12582912;
let SEARCH_FILE= $MYSQLD_DATADIR/ibdata1;
# We may randomly find copies of unscrubbed pages in the doublewrite buffer.
# Let us scrub the doublewrite buffer ourselves.
perl;
use Fcntl 'SEEK_SET';
my $page_size = $ENV{INNODB_PAGE_SIZE};
open(FILE, "+<", "$ENV{SEARCH_FILE}") or die "cannot open: $!\n";
seek(FILE, $page_size * 64, SEEK_SET) or die "cannot seek: $!\n";
print(FILE chr(0) x ($page_size * 128)) or die "cannot write: $!\n";
close FILE or die "cannot close: $!\n";;
EOF
-- source include/search_pattern_in_file.inc
let $formatno= $maxformatno;
while ($formatno)
{
dec $formatno;
let $t= delete_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
let $t= delete_rollback_delete_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
let $t= insert_rollback_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
}
-- source include/start_mysqld.inc
let $formatno= $maxformatno;
while ($formatno)
{
dec $formatno;
let $t= delete_$formatno, delete_rollback_delete_$formatno, insert_rollback_$formatno;
eval check table $t;
eval drop table $t;
}
show variables like 'innodb_%scrub_data%';
--echo # verify that this test have not caused any background scrubbing
--sorted_result
select ss.variable_name, gs.variable_value - ss.variable_value as variable_value
from snapshot_status ss,
information_schema.global_status gs
where ss.variable_name = gs.variable_name;
drop table snapshot_status;
--innodb-immediate-scrub-data-uncompressed=OFF
--innodb-background-scrub-data-uncompressed=ON
--innodb-background-scrub-data-compressed=ON
--loose-innodb-debug-force-scrubbing=ON
--innodb-encryption-threads=0
--innodb-encrypt-tables=OFF
--innodb-tablespaces-scrubbing
-- source include/have_innodb.inc
-- source include/not_embedded.inc
-- source include/have_example_key_management_plugin.inc
let $MYSQLD_DATADIR=`select @@datadir`;
let INNODB_PAGE_SIZE= `select @@innodb_page_size`;
--echo #
--echo # immediate scrubbing is off
--echo # background scrubbing is on
--echo #
show variables like 'innodb_%scrub_data%';
-- echo # make sure spaces are checked quickly
SET GLOBAL innodb_background_scrub_data_check_interval=1;
let $rowcount=500;
let $maxformatno= 4;
let $formatno= $maxformatno;
let $tableformat= (
a int auto_increment primary key,
b varchar(256),
c text,
index(b)) engine = innodb row_format;
while ($formatno)
{
dec $formatno;
let $format = `select case $formatno
when 0 then 'dynamic'
when 1 then 'redundant'
when 2 then 'compact'
when 3 then 'compressed'
end`;
let $t= delete_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
-- echo # Populate table with rows
--disable_query_log
begin;
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('unicycle', repeat('wonderwoman', 1000));
}
commit;
--enable_query_log
eval delete from $t;
let $t= delete_rollback_delete_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
-- echo # Populate table with rows
--disable_query_log
begin;
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('bicycle', repeat('repairman', 1000));
}
commit;
--enable_query_log
begin;
eval delete from $t;
rollback;
eval delete from $t;
let $t= insert_rollback_$formatno;
eval create table $t $tableformat=$format;
let $numinserts = $rowcount;
-- echo # Populate table with rows
begin;
--disable_query_log
while ($numinserts)
{
dec $numinserts;
eval insert into $t(b,c) values ('tricycle', repeat('superhuman', 1000));
}
--enable_query_log
rollback;
}
-- echo # start scrubbing threads
SET GLOBAL innodb_encryption_threads=5;
-- echo # Wait max 10 min for scrubbing
let $cnt=600;
while ($cnt)
{
let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING WHERE LAST_SCRUB_COMPLETED IS NULL AND (NAME LIKE 'test/%' OR SPACE = 0)`;
if ($success)
{
let $cnt=0;
}
if (!$success)
{
real_sleep 1;
dec $cnt;
}
}
if (!$success)
{
SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING;
SHOW STATUS LIKE 'innodb_%scrub%';
-- die Timeout waiting for background threads
}
SET GLOBAL innodb_fast_shutdown=0;
-- source include/shutdown_mysqld.inc
let SEARCH_ABORT= FOUND;
let SEARCH_PATTERN= (un|b|tr)icycle|(repair|breakhu|wonderwo)man;
let SEARCH_RANGE= 12582912;
let SEARCH_FILE= $MYSQLD_DATADIR/ibdata1;
# We may randomly find copies of unscrubbed pages in the doublewrite buffer.
# Let us scrub the doublewrite buffer ourselves.
perl;
use Fcntl 'SEEK_SET';
my $page_size = $ENV{INNODB_PAGE_SIZE};
open(FILE, "+<", "$ENV{SEARCH_FILE}") or die "cannot open: $!\n";
seek(FILE, $page_size * 64, SEEK_SET) or die "cannot seek: $!\n";
print(FILE chr(0) x ($page_size * 128)) or die "cannot write: $!\n";
close FILE or die "cannot close: $!\n";;
EOF
-- source include/search_pattern_in_file.inc
let $formatno= $maxformatno;
while ($formatno)
{
dec $formatno;
let $t= delete_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
let $t= delete_rollback_delete_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
let $t= insert_rollback_$formatno.ibd;
let SEARCH_FILE= $MYSQLD_DATADIR/test/$t;
-- echo # $t
-- source include/search_pattern_in_file.inc
}
-- source include/start_mysqld.inc
let $formatno= $maxformatno;
while ($formatno)
{
dec $formatno;
let $t= delete_$formatno, delete_rollback_delete_$formatno, insert_rollback_$formatno;
eval check table $t;
eval drop table $t;
}
show variables like 'innodb_%scrub_data%';
CREATE TABLE t1(f1 int auto_increment primary key,
f2 varchar(256),
f3 text) engine = innodb;
FLUSH TABLE t1 FOR EXPORT;
UNLOCK TABLES;
FOUND 500500 /unicycle|repairman/ in t1.ibd
DELETE FROM t1;
InnoDB 0 transactions not purged
NOT FOUND /unicycle|repairman/ in t1.ibd
DROP TABLE t1;
--innodb-immediate-scrub-data-uncompressed=ON
-- source include/have_innodb.inc
let $MYSQLD_DATADIR=`select @@datadir`;
CREATE TABLE t1(f1 int auto_increment primary key,
f2 varchar(256),
f3 text) engine = innodb;
let $numinserts = 500;
--disable_query_log
begin;
while ($numinserts)
{
dec $numinserts;
eval INSERT INTO t1(f2,f3) VALUES ('repairman', repeat('unicycle', 1000));
}
commit;
--enable_query_log
FLUSH TABLE t1 FOR EXPORT;
UNLOCK TABLES;
let SEARCH_PATTERN= unicycle|repairman;
let SEARCH_FILE= $MYSQLD_DATADIR/test/t1.ibd;
-- source include/search_pattern_in_file.inc
DELETE FROM t1;
-- source include/wait_all_purged.inc
-- source include/search_pattern_in_file.inc
DROP TABLE t1;
...@@ -749,11 +749,6 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, ...@@ -749,11 +749,6 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
should remain exclusively latched until mtr_t::commit() or until it should remain exclusively latched until mtr_t::commit() or until it
is explicitly freed from the mini-transaction. */ is explicitly freed from the mini-transaction. */
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
/* MDEV-15528 FIXME: Zero out the page after the redo log for
this mini-transaction has been durably written.
This must be done unconditionally if
srv_immediate_scrub_data_uncompressed is set. */
} }
/** Set the child page number in a node pointer record. /** Set the child page number in a node pointer record.
...@@ -959,9 +954,8 @@ have been called. ...@@ -959,9 +954,8 @@ have been called.
In a persistent tablespace, the caller must invoke fsp_init_file_page() In a persistent tablespace, the caller must invoke fsp_init_file_page()
before mtr.commit(). before mtr.commit().
@param[in,out] block index root page @param[in,out] block index root page
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction */
@param[in] invalidate whether to invalidate PAGE_INDEX_ID */ static void btr_free_root(buf_block_t *block, mtr_t *mtr)
static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate)
{ {
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX | ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX |
MTR_MEMO_PAGE_SX_FIX)); MTR_MEMO_PAGE_SX_FIX));
...@@ -973,16 +967,6 @@ static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate) ...@@ -973,16 +967,6 @@ static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate)
ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame,
block->page.id().space())); block->page.id().space()));
#endif /* UNIV_BTR_DEBUG */ #endif /* UNIV_BTR_DEBUG */
if (invalidate)
{
constexpr uint16_t field= PAGE_HEADER + PAGE_INDEX_ID;
byte *page_index_id= my_assume_aligned<2>(field + block->frame);
if (mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id,
BTR_FREED_INDEX_ID) &&
UNIV_LIKELY_NULL(block->page.zip.data))
memcpy_aligned<2>(&block->page.zip.data[field], page_index_id, 8);
}
/* Free the entire segment in small steps. */ /* Free the entire segment in small steps. */
while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, mtr)); while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, mtr));
...@@ -1099,8 +1083,7 @@ btr_create( ...@@ -1099,8 +1083,7 @@ btr_create(
PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) { PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
/* Not enough space for new segment, free root /* Not enough space for new segment, free root
segment before return. */ segment before return. */
btr_free_root(block, mtr, btr_free_root(block, mtr);
!index || !index->table->is_temporary());
return(FIL_NULL); return(FIL_NULL);
} }
...@@ -1250,7 +1233,7 @@ btr_free_if_exists( ...@@ -1250,7 +1233,7 @@ btr_free_if_exists(
btr_free_but_not_root(root, mtr->get_log_mode()); btr_free_but_not_root(root, mtr->get_log_mode());
mtr->set_named_space_id(page_id.space()); mtr->set_named_space_id(page_id.space());
btr_free_root(root, mtr, true); btr_free_root(root, mtr);
} }
/** Free an index tree in a temporary tablespace. /** Free an index tree in a temporary tablespace.
...@@ -1265,7 +1248,7 @@ void btr_free(const page_id_t page_id) ...@@ -1265,7 +1248,7 @@ void btr_free(const page_id_t page_id)
if (block) { if (block) {
btr_free_but_not_root(block, MTR_LOG_NO_REDO); btr_free_but_not_root(block, MTR_LOG_NO_REDO);
btr_free_root(block, &mtr, false); btr_free_root(block, &mtr);
} }
mtr.commit(); mtr.commit();
} }
......
...@@ -2579,12 +2579,13 @@ void buf_page_free(const page_id_t page_id, ...@@ -2579,12 +2579,13 @@ void buf_page_free(const page_id_t page_id,
buf_block_t *block= reinterpret_cast<buf_block_t*> buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id)); (buf_pool.page_hash_get_low(page_id));
if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed())
mtr->add_freed_offset(page_id);
if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
{ {
/* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE,
but avoid buf_zip_decompress() */ but avoid buf_zip_decompress() */
/* FIXME: If block==NULL, introduce a separate data structure
to cover freed page ranges to augment buf_flush_freed_page() */
rw_lock_s_unlock(hash_lock); rw_lock_s_unlock(hash_lock);
return; return;
} }
...@@ -3793,16 +3794,20 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, ...@@ -3793,16 +3794,20 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
from a file even if it cannot be found in the buffer buf_pool. This is one from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen). FILE_PAGE (the other is buf_page_get_gen).
@param[in] page_id page id @param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@return pointer to the block, page bufferfixed */ @return pointer to the block, page bufferfixed */
buf_block_t* buf_block_t*
buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr)
{ {
page_id_t page_id(space->id, offset);
ut_ad(mtr->is_active()); ut_ad(mtr->is_active());
ut_ad(page_id.space() != 0 || !zip_size); ut_ad(page_id.space() != 0 || !zip_size);
space->free_page(offset, false);
buf_block_t *free_block= buf_LRU_get_free_block(false); buf_block_t *free_block= buf_LRU_get_free_block(false);
free_block->initialise(page_id, zip_size, 1); free_block->initialise(page_id, zip_size, 1);
...@@ -3831,7 +3836,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) ...@@ -3831,7 +3836,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH,
block, BUF_GET_POSSIBLY_FREED, block, BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, mtr); __FILE__, __LINE__, mtr);
mutex_exit(&recv_sys.mutex); mutex_exit(&recv_sys.mutex);
block= buf_page_get_with_no_latch(page_id, zip_size, mtr); block= buf_page_get_with_no_latch(page_id, zip_size, mtr);
mutex_enter(&recv_sys.mutex); mutex_enter(&recv_sys.mutex);
......
...@@ -978,43 +978,22 @@ static byte *buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s, ...@@ -978,43 +978,22 @@ static byte *buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s,
This function also resets the IO_FIX to IO_NONE and making the This function also resets the IO_FIX to IO_NONE and making the
page status as NORMAL. It initiates the write to the file only after page status as NORMAL. It initiates the write to the file only after
releasing the page from flush list and its associated mutex. releasing the page from flush list and its associated mutex.
@param[in,out] bpage freed buffer page @param[in,out] bpage freed buffer page */
@param[in] space tablespace object of the freed page */ static void buf_release_freed_page(buf_page_t *bpage)
static void buf_flush_freed_page(buf_page_t *bpage, const fil_space_t &space)
{ {
ut_ad(bpage->in_file()); ut_ad(bpage->in_file());
const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE; const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE;
const page_id_t page_id(bpage->id());
const auto zip_size= bpage->zip_size();
mutex_enter(&buf_pool.mutex); mutex_enter(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE); bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL; bpage->status= buf_page_t::NORMAL;
buf_flush_remove(bpage); buf_flush_remove(bpage);
buf_pool.stat.n_pages_written++;
mutex_exit(&buf_pool.mutex);
if (uncompressed) if (uncompressed)
rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock, rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock,
BUF_IO_WRITE); BUF_IO_WRITE);
const bool punch_hole= buf_LRU_free_page(bpage, true);
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) mutex_exit(&buf_pool.mutex);
space.is_compressed() ||
#endif
false;
ut_ad(space.id == page_id.space());
ut_ad(space.zip_size() == zip_size);
if (punch_hole || srv_immediate_scrub_data_uncompressed)
{
fil_io_t fio= fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0,
zip_size ? zip_size : srv_page_size,
const_cast<byte*>(field_ref_zero), nullptr, false,
punch_hole);
if (punch_hole && fio.node)
fio.node->space->release_for_io();
}
} }
/** Write a flushable page from buf_pool to a file. /** Write a flushable page from buf_pool to a file.
...@@ -1192,7 +1171,7 @@ bool buf_flush_page(buf_page_t *bpage, IORequest::flush_t flush_type, ...@@ -1192,7 +1171,7 @@ bool buf_flush_page(buf_page_t *bpage, IORequest::flush_t flush_type,
switch (status) { switch (status) {
default: default:
ut_ad(status == buf_page_t::FREED); ut_ad(status == buf_page_t::FREED);
buf_flush_freed_page(bpage, *space); buf_release_freed_page(bpage);
goto done; goto done;
case buf_page_t::NORMAL: case buf_page_t::NORMAL:
use_doublewrite= space->use_doublewrite(); use_doublewrite= space->use_doublewrite();
...@@ -1322,7 +1301,64 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, ...@@ -1322,7 +1301,64 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
return i; return i;
} }
/** Flushes to disk all flushable pages within the flush area. /** Write punch-hole or zeroes of the freed ranges when
innodb_immediate_scrub_data_uncompressed from the freed ranges.
@param[in] space tablespace which contains freed ranges
@param[in] freed_ranges freed ranges of the page to be flushed */
static void buf_flush_freed_pages(fil_space_t *space)
{
ut_ad(space != NULL);
if (!srv_immediate_scrub_data_uncompressed && !space->is_compressed())
return;
lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex);
if (space->freed_ranges.empty()
|| flush_to_disk_lsn < space->get_last_freed_lsn())
{
freed_lock.unlock();
return;
}
range_set freed_ranges= std::move(space->freed_ranges);
freed_lock.unlock();
const bool punch_hole=
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
space->is_compressed() ||
#endif
false;
for (const auto &range : freed_ranges)
{
ulint page_size= space->zip_size();
if (!page_size)
page_size= srv_page_size;
if (punch_hole)
{
const auto len= (range.last - range.first + 1) * page_size;
const page_id_t page_id(space->id, range.first);
fil_io_t fio= fil_io(IORequestWrite, true, page_id, space->zip_size(),
0, len, nullptr, nullptr, false, true);
if (fio.node)
fio.node->space->release_for_io();
}
else if (srv_immediate_scrub_data_uncompressed)
{
for (auto i= range.first; i <= range.last; i++)
{
const page_id_t page_id(space->id, i);
fil_io(IORequestWrite, false, page_id, space->zip_size(), 0,
space->zip_size() ? space->zip_size() : srv_page_size,
const_cast<byte*>(field_ref_zero), nullptr, false, false);
}
}
buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
}
}
/** Flushes to disk all flushable pages within the flush area
and also write zeroes or punch the hole for the freed ranges of pages.
@param[in] page_id page id @param[in] page_id page id
@param[in] flush LRU or FLUSH_LIST @param[in] flush LRU or FLUSH_LIST
@param[in] n_flushed number of pages flushed so far in this batch @param[in] n_flushed number of pages flushed so far in this batch
...@@ -1344,6 +1380,9 @@ buf_flush_try_neighbors( ...@@ -1344,6 +1380,9 @@ buf_flush_try_neighbors(
return 0; return 0;
} }
/* Flush the freed ranges while flushing the neighbors */
buf_flush_freed_pages(space);
page_id_t id = page_id; page_id_t id = page_id;
page_id_t high = (srv_flush_neighbors != 1 page_id_t high = (srv_flush_neighbors != 1
|| UT_LIST_GET_LEN(buf_pool.LRU) || UT_LIST_GET_LEN(buf_pool.LRU)
......
...@@ -1065,6 +1065,7 @@ fil_space_free_low( ...@@ -1065,6 +1065,7 @@ fil_space_free_low(
rw_lock_free(&space->latch); rw_lock_free(&space->latch);
fil_space_destroy_crypt_data(&space->crypt_data); fil_space_destroy_crypt_data(&space->crypt_data);
space->~fil_space_t();
ut_free(space->name); ut_free(space->name);
ut_free(space); ut_free(space);
} }
...@@ -1157,7 +1158,9 @@ fil_space_create( ...@@ -1157,7 +1158,9 @@ fil_space_create(
return(NULL); return(NULL);
} }
space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space))); /* FIXME: if calloc() is defined as an inline function that calls
memset() or bzero(), then GCC 6 -flifetime-dse can optimize it away */
space= new (ut_zalloc_nokey(sizeof(*space))) fil_space_t;
space->id = id; space->id = id;
space->name = mem_strdup(name); space->name = mem_strdup(name);
......
...@@ -554,7 +554,7 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) ...@@ -554,7 +554,7 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
mtr_x_lock_space(space, mtr); mtr_x_lock_space(space, mtr);
const auto savepoint = mtr->get_savepoint(); const auto savepoint = mtr->get_savepoint();
buf_block_t* block = buf_page_create(page_id, zip_size, mtr); buf_block_t* block = buf_page_create(space, 0, zip_size, mtr);
mtr->sx_latch_at_savepoint(savepoint, block); mtr->sx_latch_at_savepoint(savepoint, block);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE); buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
...@@ -875,7 +875,8 @@ fsp_fill_free_list( ...@@ -875,7 +875,8 @@ fsp_fill_free_list(
if (i > 0) { if (i > 0) {
const auto savepoint = mtr->get_savepoint(); const auto savepoint = mtr->get_savepoint();
block= buf_page_create(page_id_t(space->id, i), block= buf_page_create(
space, static_cast<uint32_t>(i),
zip_size, mtr); zip_size, mtr);
mtr->sx_latch_at_savepoint(savepoint, block); mtr->sx_latch_at_savepoint(savepoint, block);
...@@ -898,7 +899,8 @@ fsp_fill_free_list( ...@@ -898,7 +899,8 @@ fsp_fill_free_list(
ibuf_mtr.set_named_space(space); ibuf_mtr.set_named_space(space);
block = buf_page_create( block = buf_page_create(
page_id_t(space->id, space,
static_cast<uint32_t>(
i + FSP_IBUF_BITMAP_OFFSET), i + FSP_IBUF_BITMAP_OFFSET),
zip_size, &ibuf_mtr); zip_size, &ibuf_mtr);
ibuf_mtr.sx_latch_at_savepoint(0, block); ibuf_mtr.sx_latch_at_savepoint(0, block);
...@@ -1059,7 +1061,8 @@ fsp_page_create( ...@@ -1059,7 +1061,8 @@ fsp_page_create(
rw_lock_type_t rw_latch, rw_lock_type_t rw_latch,
mtr_t* mtr) mtr_t* mtr)
{ {
buf_block_t* block = buf_page_create(page_id_t(space->id, offset), buf_block_t* block = buf_page_create(
space, static_cast<uint32_t>(offset),
space->zip_size(), mtr); space->zip_size(), mtr);
/* The latch may already have been acquired, so we cannot invoke /* The latch may already have been acquired, so we cannot invoke
...@@ -1251,7 +1254,7 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) ...@@ -1251,7 +1254,7 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr)
return; return;
} }
mtr->free(page_id_t(space->id, offset)); mtr->free(*space, static_cast<uint32_t>(offset));
const ulint bit = offset % FSP_EXTENT_SIZE; const ulint bit = offset % FSP_EXTENT_SIZE;
...@@ -2557,7 +2560,7 @@ fseg_free_page_low( ...@@ -2557,7 +2560,7 @@ fseg_free_page_low(
fsp_free_extent(space, offset, mtr); fsp_free_extent(space, offset, mtr);
} }
mtr->free(page_id_t(space->id, offset)); mtr->free(*space, static_cast<uint32_t>(offset));
} }
/** Free a page in a file segment. /** Free a page in a file segment.
...@@ -2674,7 +2677,7 @@ fseg_free_extent( ...@@ -2674,7 +2677,7 @@ fseg_free_extent(
for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
if (!xdes_is_free(descr, i)) { if (!xdes_is_free(descr, i)) {
buf_page_free( buf_page_free(
page_id_t(space->id, first_page_in_extent + i), page_id_t(space->id, first_page_in_extent + 1),
mtr, __FILE__, __LINE__); mtr, __FILE__, __LINE__);
} }
} }
......
...@@ -340,12 +340,14 @@ buf_page_get_low( ...@@ -340,12 +340,14 @@ buf_page_get_low(
from a file even if it cannot be found in the buffer buf_pool. This is one from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen). FILE_PAGE (the other is buf_page_get_gen).
@param[in] page_id page id @param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@return pointer to the block, page bufferfixed */ @return pointer to the block, page bufferfixed */
buf_block_t* buf_block_t*
buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr); buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr);
/********************************************************************//** /********************************************************************//**
Releases a compressed-only page acquired with buf_page_get_zip(). */ Releases a compressed-only page acquired with buf_page_get_zip(). */
......
...@@ -37,9 +37,8 @@ Created 10/25/1995 Heikki Tuuri ...@@ -37,9 +37,8 @@ Created 10/25/1995 Heikki Tuuri
#include "log0recv.h" #include "log0recv.h"
#include "dict0types.h" #include "dict0types.h"
#include "ilist.h" #include "ilist.h"
#ifdef UNIV_LINUX #include <set>
# include <set> #include <mutex>
#endif
struct unflushed_spaces_tag_t; struct unflushed_spaces_tag_t;
struct rotation_list_tag_t; struct rotation_list_tag_t;
...@@ -111,6 +110,175 @@ enum fil_type_t { ...@@ -111,6 +110,175 @@ enum fil_type_t {
struct fil_node_t; struct fil_node_t;
/** Structure to store first and last value of range */
struct range_t
{
uint32_t first;
uint32_t last;
};
/** Sort the range based on first value of the range */
struct range_compare
{
bool operator() (const range_t lhs, const range_t rhs) const
{
return lhs.first < rhs.first;
}
};
using range_set_t= std::set<range_t, range_compare>;
/** Range to store the set of ranges of integers */
class range_set
{
private:
range_set_t ranges;
public:
/** Merge the current range with previous range.
@param[in] range range to be merged
@param[in] prev_range range to be merged with next */
void merge_range(range_set_t::iterator range,
range_set_t::iterator prev_range)
{
if (range->first != prev_range->last + 1)
return;
/* Merge the current range with previous range */
range_t new_range {prev_range->first, range->last};
ranges.erase(prev_range);
ranges.erase(range);
ranges.emplace(new_range);
}
/** Split the range and add two more ranges
@param[in] range range to be split
@param[in] value Value to be removed from range */
void split_range(range_set_t::iterator range, uint32_t value)
{
range_t split1{range->first, value - 1};
range_t split2{value + 1, range->last};
/* Remove the existing element */
ranges.erase(range);
/* Insert the two elements */
ranges.emplace(split1);
ranges.emplace(split2);
}
/** Remove the value with the given range
@param[in,out] range range to be changed
@param[in] value value to be removed */
void remove_within_range(range_set_t::iterator range, uint32_t value)
{
range_t new_range{range->first, range->last};
if (value == range->first)
{
if (range->first == range->last)
{
ranges.erase(range);
return;
}
else
new_range.first++;
}
else if (value == range->last)
new_range.last--;
else if (range->first < value && range->last > value)
return split_range(range, value);
ranges.erase(range);
ranges.emplace(new_range);
}
/** Remove the value from the ranges.
@param[in] value Value to be removed. */
void remove_value(uint32_t value)
{
if (ranges.empty())
return;
range_t new_range {value, value};
range_set_t::iterator range= ranges.lower_bound(new_range);
if (range == ranges.end())
return remove_within_range(std::prev(range), value);
if (range->first > value && range != ranges.begin())
/* Iterate the previous ranges to delete */
return remove_within_range(std::prev(range), value);
return remove_within_range(range, value);
}
/** Add the value within the existing range
@param[in] range_set::add_rangerange range to be modified
@param[in] value value to be added */
range_set_t::iterator add_within_range(range_set_t::iterator range,
uint32_t value)
{
if (range->first <= value && range->last >= value)
return range;
range_t new_range{range->first, range->last};
if (range->last + 1 == value)
new_range.last++;
else if (range->first - 1 == value)
new_range.first--;
else return ranges.end();
ranges.erase(range);
return ranges.emplace(new_range).first;
}
/** Add the range in the ranges set
@param[in] new_range range to be added */
void add_range(range_t new_range)
{
auto r_offset= ranges.lower_bound(new_range);
auto r_begin= ranges.begin();
auto r_end= ranges.end();
if (!ranges.size())
{
new_range:
ranges.emplace(new_range);
return;
}
if (r_offset == r_end)
{
/* last range */
if (add_within_range(std::prev(r_offset), new_range.first) == r_end)
goto new_range;
}
else if (r_offset == r_begin)
{
/* First range */
if (add_within_range(r_offset, new_range.first) == r_end)
goto new_range;
}
else if (r_offset->first - 1 == new_range.first)
{
/* Change starting of the existing range */
auto r_value= add_within_range(r_offset, new_range.first);
if (r_value != ranges.begin())
merge_range(r_value, std::prev(r_value));
}
else
{
/* previous range last_value alone */
if (add_within_range(std::prev(r_offset), new_range.first) == r_end)
goto new_range;
}
}
/** Add the value in the ranges
@param[in] value value to be added */
void add_value(uint32_t value)
{
range_t new_range{value, value};
add_range(new_range);
}
ulint size() { return ranges.size(); }
void clear() { ranges.clear(); }
bool empty() const { return ranges.empty(); }
typename range_set_t::iterator begin() { return ranges.begin(); }
typename range_set_t::iterator end() { return ranges.end(); }
};
#endif #endif
/** Tablespace or log data space */ /** Tablespace or log data space */
...@@ -203,6 +371,16 @@ struct fil_space_t ...@@ -203,6 +371,16 @@ struct fil_space_t
punch hole */ punch hole */
bool punch_hole; bool punch_hole;
/** mutex to protect freed ranges */
std::mutex freed_range_mutex;
/** Variables to store freed ranges. This can be used to write
zeroes/punch the hole in files. Protected by freed_mutex */
range_set freed_ranges;
/** Stores last page freed lsn. Protected by freed_mutex */
lsn_t last_freed_lsn;
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
/** @return whether the tablespace is about to be dropped */ /** @return whether the tablespace is about to be dropped */
...@@ -314,6 +492,22 @@ struct fil_space_t ...@@ -314,6 +492,22 @@ struct fil_space_t
ut_ad(0); ut_ad(0);
return false; return false;
} }
/** @return last_freed_lsn */
lsn_t get_last_freed_lsn() { return last_freed_lsn; }
/** Update last_freed_lsn */
void update_last_freed_lsn(lsn_t lsn)
{
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
last_freed_lsn= lsn;
}
/** Clear all freed ranges */
void clear_freed_ranges()
{
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
freed_ranges.clear();
}
#endif /* !UNIV_INNOCHECKSUM */ #endif /* !UNIV_INNOCHECKSUM */
/** FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags; /** FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags;
check fsp0types.h to more info about flags. */ check fsp0types.h to more info about flags. */
...@@ -583,6 +777,38 @@ struct fil_space_t ...@@ -583,6 +777,38 @@ struct fil_space_t
return(ssize == 0 || !is_ibd return(ssize == 0 || !is_ibd
|| srv_page_size != UNIV_PAGE_SIZE_ORIG); || srv_page_size != UNIV_PAGE_SIZE_ORIG);
} }
#ifndef UNIV_INNOCHECKSUM
/** Add/remove the free page in the freed ranges list.
@param[in] offset page number to be added
@param[in] free true if page to be freed */
void free_page(uint32_t offset, bool add=true)
{
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
if (add)
return freed_ranges.add_value(offset);
if (freed_ranges.empty())
return;
return freed_ranges.remove_value(offset);
}
/** Add the range of freed pages */
void add_free_ranges(range_set ranges)
{
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
freed_ranges= std::move(ranges);
}
/** Add the set of freed page ranges */
void add_free_range(const range_t range)
{
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
freed_ranges.add_range(range);
}
#endif /*!UNIV_INNOCHECKSUM */
}; };
#ifndef UNIV_INNOCHECKSUM #ifndef UNIV_INNOCHECKSUM
......
...@@ -524,11 +524,19 @@ inline void mtr_t::init(buf_block_t *b) ...@@ -524,11 +524,19 @@ inline void mtr_t::init(buf_block_t *b)
} }
/** Free a page. /** Free a page.
@param id page identifier */ @param[in] space tablespace contains page to be freed
inline void mtr_t::free(const page_id_t id) @param[in] offset page offset to be freed */
inline void mtr_t::free(fil_space_t &space, uint32_t offset)
{ {
page_id_t freed_page_id(space.id, offset);
if (m_log_mode == MTR_LOG_ALL) if (m_log_mode == MTR_LOG_ALL)
m_log.close(log_write<FREE_PAGE>(id, nullptr)); m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr));
ut_ad(!m_user_space || m_user_space == &space);
if (&space == fil_system.sys_space)
freed_system_tablespace_page();
else
m_user_space= &space;
} }
/** Write an EXTENDED log record. /** Write an EXTENDED log record.
...@@ -651,4 +659,5 @@ inline void mtr_t::trim_pages(const page_id_t id) ...@@ -651,4 +659,5 @@ inline void mtr_t::trim_pages(const page_id_t id)
byte *l= log_write<EXTENDED>(id, nullptr, 1, true); byte *l= log_write<EXTENDED>(id, nullptr, 1, true);
*l++= TRIM_PAGES; *l++= TRIM_PAGES;
m_log.close(l); m_log.close(l);
set_trim_pages();
} }
...@@ -312,6 +312,24 @@ struct mtr_t { ...@@ -312,6 +312,24 @@ struct mtr_t {
/** @return true if we are inside the change buffer code */ /** @return true if we are inside the change buffer code */
bool is_inside_ibuf() const { return m_inside_ibuf; } bool is_inside_ibuf() const { return m_inside_ibuf; }
/** Note that system tablespace page has been freed. */
void freed_system_tablespace_page() { m_freed_in_system_tablespace= true; }
/** Note that pages has been trimed */
void set_trim_pages() { m_trim_pages= true; }
/** @return true if pages has been trimed */
bool is_trim_pages() { return m_trim_pages; }
/** @return whether a page_compressed table was modified */
bool is_page_compressed() const
{
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
return m_user_space && m_user_space->is_compressed();
#else
return false;
#endif
}
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/** Check if we are holding an rw-latch in this mini-transaction /** Check if we are holding an rw-latch in this mini-transaction
@param lock latch to search for @param lock latch to search for
...@@ -348,6 +366,12 @@ struct mtr_t { ...@@ -348,6 +366,12 @@ struct mtr_t {
/** @return the memo stack */ /** @return the memo stack */
mtr_buf_t* get_memo() { return &m_memo; } mtr_buf_t* get_memo() { return &m_memo; }
/** @return true if system tablespace page has been freed */
bool is_freed_system_tablespace_page()
{
return m_freed_in_system_tablespace;
}
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
/** @return true if a record was added to the mini-transaction */ /** @return true if a record was added to the mini-transaction */
...@@ -470,8 +494,9 @@ struct mtr_t { ...@@ -470,8 +494,9 @@ struct mtr_t {
@param[in,out] b buffer page */ @param[in,out] b buffer page */
void init(buf_block_t *b); void init(buf_block_t *b);
/** Free a page. /** Free a page.
@param id page identifier */ @param[in] space tablespace contains page to be freed
inline void free(const page_id_t id); @param[in] offset page offset to be freed */
inline void free(fil_space_t &space, uint32_t offset);
/** Write log for partly initializing a B-tree or R-tree page. /** Write log for partly initializing a B-tree or R-tree page.
@param block B-tree or R-tree page @param block B-tree or R-tree page
@param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */ @param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
...@@ -551,6 +576,20 @@ struct mtr_t { ...@@ -551,6 +576,20 @@ struct mtr_t {
const char *path, const char *path,
const char *new_path= nullptr); const char *new_path= nullptr);
/** Add freed page numbers to freed_pages */
void add_freed_offset(page_id_t id)
{
ut_ad(m_user_space == NULL || id.space() == m_user_space->id);
m_freed_ranges.add_value(id.page_no());
}
/** Clear the freed pages */
void clear_freed_ranges()
{
m_freed_ranges.clear();
m_freed_in_system_tablespace= 0;
m_trim_pages= false;
}
private: private:
/** Log a write of a byte string to a page. /** Log a write of a byte string to a page.
@param block buffer page @param block buffer page
...@@ -621,6 +660,12 @@ struct mtr_t { ...@@ -621,6 +660,12 @@ struct mtr_t {
to suppress some read-ahead operations, @see ibuf_inside() */ to suppress some read-ahead operations, @see ibuf_inside() */
uint16_t m_inside_ibuf:1; uint16_t m_inside_ibuf:1;
/** whether the page has been freed in system tablespace */
uint16_t m_freed_in_system_tablespace:1;
/** whether the pages has been trimmed */
uint16_t m_trim_pages:1;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/** Persistent user tablespace associated with the /** Persistent user tablespace associated with the
mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */ mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */
...@@ -638,6 +683,9 @@ struct mtr_t { ...@@ -638,6 +683,9 @@ struct mtr_t {
/** LSN at commit time */ /** LSN at commit time */
lsn_t m_commit_lsn; lsn_t m_commit_lsn;
/** set of freed page ids */
range_set m_freed_ranges;
}; };
#include "mtr0mtr.ic" #include "mtr0mtr.ic"
......
...@@ -543,11 +543,24 @@ struct file_name_t { ...@@ -543,11 +543,24 @@ struct file_name_t {
/** FSP_SIZE of tablespace */ /** FSP_SIZE of tablespace */
ulint size; ulint size;
/** Freed pages of tablespace */
range_set freed_ranges;
/** Constructor */ /** Constructor */
file_name_t(std::string name_, bool deleted) file_name_t(std::string name_, bool deleted)
: name(std::move(name_)), space(NULL), : name(std::move(name_)), space(NULL),
status(deleted ? DELETED: NORMAL), status(deleted ? DELETED: NORMAL),
size(0) {} size(0) {}
/** Add the freed pages */
void add_freed_page(uint32_t page_no) { freed_ranges.add_value(page_no); }
/** Remove the freed pages */
void remove_freed_page(uint32_t page_no)
{
if (freed_ranges.empty()) return;
freed_ranges.remove_value(page_no);
}
}; };
/** Map of dirty tablespaces during recovery */ /** Map of dirty tablespaces during recovery */
...@@ -1764,6 +1777,34 @@ inline void recv_sys_t::add(const page_id_t page_id, ...@@ -1764,6 +1777,34 @@ inline void recv_sys_t::add(const page_id_t page_id,
log_phys_t(start_lsn, lsn, l, len)); log_phys_t(start_lsn, lsn, l, len));
} }
/** Store/remove the freed pages in fil_name_t of recv_spaces.
@param[in] page_id freed or init page_id
@param[in] freed TRUE if page is freed */
static void store_freed_or_init_rec(page_id_t page_id, bool freed)
{
uint32_t space_id= page_id.space();
uint32_t page_no= page_id.page_no();
if (is_predefined_tablespace(space_id))
{
fil_space_t *space;
if (space_id == TRX_SYS_SPACE)
space= fil_system.sys_space;
else
space= fil_space_get(space_id);
space->free_page(page_no, freed);
return;
}
recv_spaces_t::iterator i= recv_spaces.lower_bound(space_id);
if (i != recv_spaces.end() && i->first == space_id)
{
if (freed)
i->second.add_freed_page(page_no);
else
i->second.remove_freed_page(page_no);
}
}
/** Parse and register one mini-transaction in log_t::FORMAT_10_5. /** Parse and register one mini-transaction in log_t::FORMAT_10_5.
@param checkpoint_lsn the log sequence number of the latest checkpoint @param checkpoint_lsn the log sequence number of the latest checkpoint
...@@ -1963,6 +2004,7 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t *store, bool apply) ...@@ -1963,6 +2004,7 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t *store, bool apply)
case INIT_PAGE: case INIT_PAGE:
last_offset= FIL_PAGE_TYPE; last_offset= FIL_PAGE_TYPE;
free_or_init_page: free_or_init_page:
store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE);
if (UNIV_UNLIKELY(rlen != 0)) if (UNIV_UNLIKELY(rlen != 0))
goto record_corrupted; goto record_corrupted;
break; break;
...@@ -2531,7 +2573,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, ...@@ -2531,7 +2573,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
{ {
mtr.start(); mtr.start();
mtr.set_log_mode(MTR_LOG_NONE); mtr.set_log_mode(MTR_LOG_NONE);
block= buf_page_create(page_id, space->zip_size(), &mtr); block= buf_page_create(space, page_id.page_no(), space->zip_size(), &mtr);
p= recv_sys.pages.find(page_id); p= recv_sys.pages.find(page_id);
if (p == recv_sys.pages.end()) if (p == recv_sys.pages.end())
{ {
...@@ -3240,6 +3282,12 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) ...@@ -3240,6 +3282,12 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace)
/* The tablespace was found, and there /* The tablespace was found, and there
are some redo log records for it. */ are some redo log records for it. */
fil_names_dirty(rs.second.space); fil_names_dirty(rs.second.space);
/* Add the freed page ranges in the respective
tablespace */
if (!rs.second.freed_ranges.empty())
rs.second.space->add_free_ranges(
std::move(rs.second.freed_ranges));
} else if (rs.second.name == "") { } else if (rs.second.name == "") {
ib::error() << "Missing FILE_CREATE, FILE_DELETE" ib::error() << "Missing FILE_CREATE, FILE_DELETE"
" or FILE_MODIFY before FILE_CHECKPOINT" " or FILE_MODIFY before FILE_CHECKPOINT"
......
...@@ -372,6 +372,7 @@ void mtr_t::start() ...@@ -372,6 +372,7 @@ void mtr_t::start()
ut_d(m_user_space_id= TRX_SYS_SPACE); ut_d(m_user_space_id= TRX_SYS_SPACE);
m_user_space= nullptr; m_user_space= nullptr;
m_commit_lsn= 0; m_commit_lsn= 0;
m_freed_in_system_tablespace= m_trim_pages= false;
} }
/** Release the resources */ /** Release the resources */
...@@ -381,6 +382,7 @@ inline void mtr_t::release_resources() ...@@ -381,6 +382,7 @@ inline void mtr_t::release_resources()
ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>())); ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>()));
m_log.erase(); m_log.erase();
m_memo.erase(); m_memo.erase();
clear_freed_ranges();
ut_d(m_commit= true); ut_d(m_commit= true);
} }
...@@ -413,6 +415,30 @@ void mtr_t::commit() ...@@ -413,6 +415,30 @@ void mtr_t::commit()
to insert into the flush list. */ to insert into the flush list. */
log_mutex_exit(); log_mutex_exit();
if (!m_freed_ranges.empty())
{
fil_space_t *freed_space= m_user_space;
/* Get the freed tablespace in case of predefined tablespace */
if (!freed_space)
{
ut_ad(is_freed_system_tablespace_page());
freed_space= fil_system.sys_space;
}
ut_ad(memo_contains(freed_space->latch, MTR_MEMO_X_LOCK));
/* Update the last freed lsn */
freed_space->update_last_freed_lsn(m_commit_lsn);
for (const auto &range : m_freed_ranges)
freed_space->add_free_range(range);
}
if (is_trim_pages())
{
ut_ad(m_user_space != nullptr);
m_user_space->clear_freed_ranges();
}
m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
(ReleaseBlocks(start_lsn, m_commit_lsn))); (ReleaseBlocks(start_lsn, m_commit_lsn)));
if (m_made_dirty) if (m_made_dirty)
...@@ -441,6 +467,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ...@@ -441,6 +467,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(!m_made_dirty); ut_ad(!m_made_dirty);
ut_ad(m_memo.size() == 0); ut_ad(m_memo.size() == 0);
ut_ad(!srv_read_only_mode); ut_ad(!srv_read_only_mode);
ut_ad(m_freed_ranges.empty());
ut_ad(!m_freed_in_system_tablespace);
if (checkpoint_lsn) { if (checkpoint_lsn) {
byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT); byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT);
......
...@@ -696,6 +696,10 @@ static void trx_purge_truncate_history() ...@@ -696,6 +696,10 @@ static void trx_purge_truncate_history()
const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
mtr.start(); mtr.start();
mtr_x_lock_space(purge_sys.truncate.current, &mtr); mtr_x_lock_space(purge_sys.truncate.current, &mtr);
/* Associate the undo tablespace with mtr.
During mtr::commit(), InnoDB can use the undo
tablespace object to clear all freed ranges */
mtr.set_named_space(purge_sys.truncate.current);
mtr.trim_pages(page_id_t(space.id, size)); mtr.trim_pages(page_id_t(space.id, size));
fsp_header_init(purge_sys.truncate.current, size, &mtr); fsp_header_init(purge_sys.truncate.current, size, &mtr);
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment