Commit 81695ab8 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11520 Extending an InnoDB data file unnecessarily allocates

a large memory buffer on Windows

fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc()
for memory allocation, and handle failures. Properly check the return
status of posix_fallocate(), and pass the correct arguments to
posix_fallocate().

On Windows, instead of extending the file by at most 1 megabyte at a time,
write a zero-filled page at the end of the file.
According to the Microsoft blog post
https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573
this will physically extend the file by writing zero bytes.
(InnoDB never uses DeviceIoControl() to set the file sparse.)

I tested that the file extension works properly with a multi-file
system tablespace, both with --innodb-use-fallocate and
--skip-innodb-use-fallocate (the default):

./mtr \
--mysqld=--innodb-use-fallocate \
--mysqld=--innodb-autoextend-increment=1 \
--mysqld=--innodb-data-file-path='ibdata1:5M;ibdata2:5M:autoextend' \
--parallel=auto --force --retry=0 --suite=innodb &

ls -lsh mysql-test/var/*/mysqld.1/data/ibdata2
(several samples while running the test)
parent 6dc00f97
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -4953,15 +4954,12 @@ fil_extend_space_to_desired_size(
byte* buf;
ulint buf_size;
ulint start_page_no;
ulint file_start_page_no;
ulint page_size;
ulint pages_added;
ibool success;
ut_ad(!srv_read_only_mode);
retry:
pages_added = 0;
success = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
......@@ -5015,29 +5013,29 @@ fil_extend_space_to_desired_size(
mutex_exit(&fil_system->mutex);
start_page_no = space->size;
file_start_page_no = space->size - node->size;
const ulint file_start_page_no = space->size - node->size;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
os_offset_t start_offset = start_page_no * page_size;
os_offset_t n_pages = (size_after_extend - start_page_no);
os_offset_t len = n_pages * page_size;
if (posix_fallocate(node->handle, start_offset, len) == -1) {
ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
"space for file \'%s\' failed. Current size "
INT64PF ", desired size " INT64PF "\n",
node->name, start_offset, len+start_offset);
os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
success = FALSE;
} else {
success = TRUE;
os_offset_t start_offset
= (start_page_no - file_start_page_no) * page_size;
ulint n_pages = size_after_extend - start_page_no;
os_offset_t len = os_offset_t(n_pages) * page_size;
int err = posix_fallocate(node->handle, start_offset, len);
success = !err;
if (!success) {
ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
" from " INT64PF " to " INT64PF " bytes"
" failed with error %d",
node->name, start_offset, len + start_offset,
err);
}
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
success = FALSE; os_has_said_disk_full = TRUE;);
mutex_enter(&fil_system->mutex);
if (success) {
node->size += n_pages;
space->size += n_pages;
......@@ -5054,14 +5052,24 @@ fil_extend_space_to_desired_size(
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
start_page_no = size_after_extend - 1;
buf_size = page_size;
#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
#endif
buf2 = static_cast<byte*>(calloc(1, buf_size + page_size));
if (!buf2) {
ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
" bytes to extend file",
buf_size + page_size);
success = FALSE;
}
buf = static_cast<byte*>(ut_align(buf2, page_size));
memset(buf, 0, buf_size);
while (start_page_no < size_after_extend) {
while (success && start_page_no < size_after_extend) {
ulint n_pages
= ut_min(buf_size / page_size,
size_after_extend - start_page_no);
......@@ -5070,56 +5078,47 @@ fil_extend_space_to_desired_size(
= ((os_offset_t) (start_page_no - file_start_page_no))
* page_size;
const char* name = node->name == NULL ? space->name : node->name;
#ifdef UNIV_HOTBACKUP
success = os_file_write(name, node->handle, buf,
success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages);
#else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
name, node->handle, buf,
node->name, node->handle, buf,
offset, page_size * n_pages,
NULL, NULL);
#endif /* UNIV_HOTBACKUP */
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
if (success) {
os_has_said_disk_full = FALSE;
} else {
/* Let us measure the size of the file to determine
how much we were able to extend it */
os_offset_t size;
size = os_file_get_size(node->handle);
ut_a(size != (os_offset_t) -1);
success = FALSE; os_has_said_disk_full = TRUE;);
n_pages = ((ulint) (size / page_size))
- node->size - pages_added;
/* Let us measure the size of the file to determine
how much we were able to extend it */
os_offset_t size = os_file_get_size(node->handle);
ut_a(size != (os_offset_t) -1);
pages_added += n_pages;
break;
}
start_page_no += n_pages;
pages_added += n_pages;
start_page_no = (ulint) (size / page_size)
+ file_start_page_no;
}
mem_free(buf2);
free(buf2);
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
ut_a(start_page_no - file_start_page_no >= node->size);
space->size += pages_added;
node->size += pages_added;
if (buf) {
ulint file_size = start_page_no - file_start_page_no;
space->size += file_size - node->size;
node->size = file_size;
}
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
/* At this point file has been extended */
#ifdef HAVE_POSIX_FALLOCATE
file_extended:
#endif /* HAVE_POSIX_FALLOCATE */
node->being_extended = FALSE;
*actual_size = space->size;
......
......@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -2123,48 +2123,47 @@ os_file_set_size(
os_file_t file, /*!< in: handle to a file */
os_offset_t size) /*!< in: file size */
{
os_offset_t current_size;
ibool ret;
byte* buf;
byte* buf2;
ulint buf_size;
current_size = 0;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
if (posix_fallocate(file, current_size, size) == -1) {
fprintf(stderr, "InnoDB: Error: preallocating file "
"space for file \'%s\' failed. Current size "
"%lu, desired size %lu\n",
name, (long unsigned) current_size, (long unsigned) size);
os_file_handle_error_no_exit(name, "posix_fallocate", FALSE);
return(FALSE);
int err = posix_fallocate(file, 0, size);
if (err) {
ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for"
"file %s failed with error %d",
size, name, err);
}
return(TRUE);
return(!err);
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
buf_size = UNIV_PAGE_SIZE;
os_offset_t current_size = size - buf_size;
#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
os_offset_t current_size = 0;
#endif
buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
if (!buf2) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Cannot allocate " ULINTPF " bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
return(FALSE);
}
/* Align the buffer for possible raw i/o */
buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
if (size >= (os_offset_t) 100 << 20) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
while (current_size < size) {
do {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {
......@@ -2175,37 +2174,15 @@ os_file_set_size(
ret = os_file_write(name, file, buf, current_size, n_bytes);
if (!ret) {
ut_free(buf2);
goto error_handling;
}
/* Print about progress for each 100 MB written */
if ((current_size + n_bytes) / (100 << 20)
!= current_size / (100 << 20)) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (100 << 20)));
break;
}
current_size += n_bytes;
}
} while (current_size < size);
if (size >= (os_offset_t) 100 << 20) {
fprintf(stderr, "\n");
}
free(buf2);
ut_free(buf2);
ret = os_file_flush(file);
if (ret) {
return(TRUE);
}
error_handling:
return(FALSE);
return(ret && os_file_flush(file));
}
/***********************************************************************//**
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -4993,15 +4994,12 @@ fil_extend_space_to_desired_size(
byte* buf;
ulint buf_size;
ulint start_page_no;
ulint file_start_page_no;
ulint page_size;
ulint pages_added;
ibool success;
ut_ad(!srv_read_only_mode);
retry:
pages_added = 0;
success = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
......@@ -5055,27 +5053,26 @@ fil_extend_space_to_desired_size(
mutex_exit(&fil_system->mutex);
start_page_no = space->size;
file_start_page_no = space->size - node->size;
const ulint file_start_page_no = space->size - node->size;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
os_offset_t start_offset = start_page_no * page_size;
os_offset_t n_pages = (size_after_extend - start_page_no);
os_offset_t len = n_pages * page_size;
if (posix_fallocate(node->handle, start_offset, len) == -1) {
ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
"space for file \'%s\' failed. Current size "
INT64PF ", desired size " INT64PF,
node->name, start_offset, len+start_offset);
os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
success = FALSE;
} else {
success = TRUE;
os_offset_t start_offset
= (start_page_no - file_start_page_no) * page_size;
ulint n_pages = size_after_extend - start_page_no;
os_offset_t len = os_offset_t(n_pages) * page_size;
int err = posix_fallocate(node->handle, start_offset, len);
success = !err;
if (!success) {
ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
" from " INT64PF " to " INT64PF " bytes"
" failed with error %d",
node->name, start_offset, len + start_offset,
err);
}
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
success = FALSE; errno = 28;os_has_said_disk_full = TRUE;);
success = FALSE; os_has_said_disk_full = TRUE;);
mutex_enter(&fil_system->mutex);
......@@ -5095,14 +5092,24 @@ fil_extend_space_to_desired_size(
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
start_page_no = size_after_extend - 1;
buf_size = page_size;
#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
#endif
buf2 = static_cast<byte*>(calloc(1, buf_size + page_size));
if (!buf2) {
ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
" bytes to extend file",
buf_size + page_size);
success = FALSE;
}
buf = static_cast<byte*>(ut_align(buf2, page_size));
memset(buf, 0, buf_size);
while (start_page_no < size_after_extend) {
while (success && start_page_no < size_after_extend) {
ulint n_pages
= ut_min(buf_size / page_size,
size_after_extend - start_page_no);
......@@ -5111,55 +5118,47 @@ fil_extend_space_to_desired_size(
= ((os_offset_t) (start_page_no - file_start_page_no))
* page_size;
const char* name = node->name == NULL ? space->name : node->name;
#ifdef UNIV_HOTBACKUP
success = os_file_write(name, node->handle, buf,
success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages);
#else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
name, node->handle, buf,
node->name, node->handle, buf,
offset, page_size * n_pages,
NULL, NULL, space_id, NULL);
#endif /* UNIV_HOTBACKUP */
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
success = FALSE; os_has_said_disk_full = TRUE;);
if (success) {
os_has_said_disk_full = FALSE;
} else {
/* Let us measure the size of the file to determine
how much we were able to extend it */
os_offset_t size;
size = os_file_get_size(node->handle);
ut_a(size != (os_offset_t) -1);
n_pages = ((ulint) (size / page_size))
- node->size - pages_added;
/* Let us measure the size of the file to determine
how much we were able to extend it */
os_offset_t size = os_file_get_size(node->handle);
ut_a(size != (os_offset_t) -1);
pages_added += n_pages;
break;
}
start_page_no += n_pages;
pages_added += n_pages;
start_page_no = (ulint) (size / page_size)
+ file_start_page_no;
}
mem_free(buf2);
free(buf2);
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
ut_a(start_page_no - file_start_page_no >= node->size);
space->size += pages_added;
node->size += pages_added;
if (buf) {
ulint file_size = start_page_no - file_start_page_no;
space->size += file_size - node->size;
node->size = file_size;
}
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
/* At this point file has been extended */
#ifdef HAVE_POSIX_FALLOCATE
file_extended:
#endif /* HAVE_POSIX_FALLOCATE */
node->being_extended = FALSE;
*actual_size = space->size;
......
......@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -2341,48 +2341,47 @@ os_file_set_size(
os_file_t file, /*!< in: handle to a file */
os_offset_t size) /*!< in: file size */
{
os_offset_t current_size;
ibool ret;
byte* buf;
byte* buf2;
ulint buf_size;
current_size = 0;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
if (posix_fallocate(file, current_size, size) == -1) {
ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
"space for file \'%s\' failed. Current size "
INT64PF ", desired size " INT64PF,
name, current_size, size);
os_file_handle_error_no_exit (name, "posix_fallocate",
FALSE);
return(FALSE);
int err = posix_fallocate(file, 0, size);
if (err) {
ib_logf(IB_LOG_LEVEL_ERROR,
"preallocating " INT64PF " bytes for"
"file %s failed with error %d",
size, name, err);
}
return(TRUE);
return(!err);
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
buf_size = UNIV_PAGE_SIZE;
os_offset_t current_size = size - buf_size;
#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
os_offset_t current_size = 0;
#endif
buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
if (!buf2) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Cannot allocate " ULINTPF " bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
return(FALSE);
}
/* Align the buffer for possible raw i/o */
buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
if (size >= (os_offset_t) 100 << 20) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
while (current_size < size) {
do {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {
......@@ -2393,37 +2392,15 @@ os_file_set_size(
ret = os_file_write(name, file, buf, current_size, n_bytes);
if (!ret) {
ut_free(buf2);
goto error_handling;
}
/* Print about progress for each 100 MB written */
if ((current_size + n_bytes) / (100 << 20)
!= current_size / (100 << 20)) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (100 << 20)));
break;
}
current_size += n_bytes;
}
if (size >= (os_offset_t) 100 << 20) {
} while (current_size < size);
fprintf(stderr, "\n");
}
ut_free(buf2);
ret = os_file_flush(file);
if (ret) {
return(TRUE);
}
free(buf2);
error_handling:
return(FALSE);
return(ret && os_file_flush(file));
}
/***********************************************************************//**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment