MDEV-15527 page_compressed compressed page partially during import tablespace

- Importing table operation fails to punch the hole in
the filesystem when page compressed table is involved.
To achieve that, InnoDB firstly punches the hole for
the IOBuffer size(1MB). After that, InnoDB should write
page by page when page compression is involved.
parent dfda1c92
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2020, MariaDB Corporation.
Copyright (c) 2015, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -3375,6 +3375,57 @@ struct fil_iterator_t {
byte* crypt_io_buffer; /*!< IO buffer when encrypted */
};
/** InnoDB writes page by page when there is page compressed
tablespace involved. It does help to save the disk space when
punch hole is enabled
@param iter Tablespace iterator
@param write_request Request to write into the file
@param offset offset of the file to be written
@param writeptr buffer to be written
@param n_bytes number of bytes to be written
@param try_punch_only Try the range punch only because the
current range is full of empty pages
@return DB_SUCCESS */
static
dberr_t fil_import_compress_fwrite(const fil_iterator_t &iter,
const IORequest &write_request,
ulint offset,
const byte *writeptr,
ulint n_bytes,
bool try_punch_only=false)
{
dberr_t err= os_file_punch_hole(iter.file, offset, n_bytes);
if (err != DB_SUCCESS || try_punch_only)
return err;
for (ulint j= 0; j < n_bytes; j+= srv_page_size)
{
/* Read the original data length from block and
safer to read FIL_PAGE_COMPRESSED_SIZE because it
is not encrypted*/
ulint n_write_bytes= srv_page_size;
if (j || offset)
{
n_write_bytes= mach_read_from_2(writeptr + j + FIL_PAGE_DATA);
const unsigned ptype= mach_read_from_2(writeptr + j + FIL_PAGE_TYPE);
/* Ignore the empty page */
if (ptype == 0 && n_write_bytes == 0)
continue;
n_write_bytes+= FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
n_write_bytes+= FIL_PAGE_COMPRESSION_METHOD_SIZE;
}
err= os_file_write(write_request, iter.filepath, iter.file,
writeptr + j, offset + j, n_write_bytes);
if (err != DB_SUCCESS)
break;
}
return err;
}
/********************************************************************//**
TODO: This can be made parallel trivially by chunking up the file and creating
a callback per thread. . Main benefit will be to use multiple CPUs for
......@@ -3416,7 +3467,10 @@ fil_iterate(
/* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
copying for non-index pages. Unfortunately, it is
required by buf_zip_decompress() */
dberr_t err = DB_SUCCESS;
dberr_t err = DB_SUCCESS;
bool page_compressed = false;
bool punch_hole = true;
IORequest write_request(IORequest::WRITE);
for (offset = iter.start; offset < iter.end; offset += n_bytes) {
if (callback.is_interrupted()) {
......@@ -3494,9 +3548,8 @@ fil_iterate(
goto func_exit;
}
const bool page_compressed
= fil_page_is_compressed_encrypted(src)
|| fil_page_is_compressed(src);
page_compressed= fil_page_is_compressed_encrypted(src)
|| fil_page_is_compressed(src);
if (page_compressed && block->page.zip.data) {
goto page_corrupted;
......@@ -3651,13 +3704,23 @@ fil_iterate(
}
}
/* A page was updated in the set, write back to disk. */
if (updated) {
IORequest write_request(IORequest::WRITE);
if (page_compressed && punch_hole && srv_use_trim) {
err = fil_import_compress_fwrite(
iter, write_request, offset, writeptr, n_bytes,
!updated);
err = os_file_write(write_request,
iter.filepath, iter.file,
writeptr, offset, n_bytes);
if (err != DB_SUCCESS) {
punch_hole = false;
if (updated) {
goto normal_write;
}
}
} else if (updated) {
/* A page was updated in the set, write back to disk. */
normal_write:
err = os_file_write(
write_request, iter.filepath, iter.file,
writeptr, offset, n_bytes);
if (err != DB_SUCCESS) {
goto func_exit;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment