Commit 978179a9 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11520 Extending an InnoDB data file unnecessarily allocates

a large memory buffer on Windows

fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc()
for memory allocation, and handle failures. Properly check the return
status of posix_fallocate().

On Windows, instead of extending the file by at most 1 megabyte at a time,
write a zero-filled page at the end of the file.
According to the Microsoft blog post
https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573
this will physically extend the file by writing zero bytes.
(InnoDB never uses DeviceIoControl() to set the file sparse.)

For innodb_plugin, port the XtraDB fix for MySQL Bug#56433
(introducing fil_system->file_extend_mutex). The bug was
fixed differently in MySQL 5.6 (and MariaDB Server 10.0).
parent 2bfe83ad
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -248,6 +249,7 @@ the ib_logfiles form a 'space' and it is handled here */
struct fil_system_struct {
#ifndef UNIV_HOTBACKUP
mutex_t mutex; /*!< The mutex protecting the cache */
mutex_t file_extend_mutex;
#endif /* !UNIV_HOTBACKUP */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
......@@ -1658,6 +1660,8 @@ fil_init(
mutex_create(fil_system_mutex_key,
&fil_system->mutex, SYNC_ANY_LATCH);
mutex_create(fil_system_mutex_key,
&fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH);
fil_system->spaces = hash_create(hash_size);
fil_system->name_hash = hash_create(hash_size);
......@@ -4096,6 +4100,10 @@ fil_extend_space_to_desired_size(
ulint page_size;
ibool success = TRUE;
/* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433
to prevent concurrent fil_extend_space_to_desired_size()
while fil_system->mutex is temporarily released */
mutex_enter(&fil_system->file_extend_mutex);
fil_mutex_enter_and_prepare_for_io(space_id);
space = fil_space_get_by_id(space_id);
......@@ -4107,6 +4115,7 @@ fil_extend_space_to_desired_size(
*actual_size = space->size;
mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);
return(TRUE);
}
......@@ -4123,22 +4132,24 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;
mutex_exit(&fil_system->mutex);
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
ib_int64_t start_offset = start_page_no * page_size;
ib_int64_t end_offset = (size_after_extend - start_page_no) * page_size;
ib_int64_t desired_size = size_after_extend*page_size;
int err = posix_fallocate(
node->handle, start_offset, end_offset);
mutex_exit(&fil_system->mutex);
success = !err;
if (posix_fallocate(node->handle, start_offset, end_offset) == -1) {
fprintf(stderr, "InnoDB: Error: preallocating file "
"space for file \'%s\' failed. Current size "
" %lld, len %lld, desired size %lld\n",
node->name, start_offset, end_offset, desired_size);
success = FALSE;
} else {
success = TRUE;
if (!success) {
fprintf(stderr,
"InnoDB: Error: extending file %s"
" from %lld to %lld bytes"
" failed with error %d\n",
node->name, start_offset, end_offset, err);
}
mutex_enter(&fil_system->mutex);
......@@ -4154,14 +4165,25 @@ fil_extend_space_to_desired_size(
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
start_page_no = size_after_extend - 1;
buf_size = page_size;
#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = mem_alloc(buf_size + page_size);
#endif
buf2 = calloc(1, buf_size + page_size);
if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + page_size);
mutex_exit(&fil_system->file_extend_mutex);
return(FALSE);
}
buf = ut_align(buf2, page_size);
memset(buf, 0, buf_size);
while (start_page_no < size_after_extend) {
for (;;) {
ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);
......@@ -4170,6 +4192,7 @@ fil_extend_space_to_desired_size(
offset_low = ((start_page_no - file_start_page_no)
% (4096 * ((1024 * 1024) / page_size)))
* page_size;
#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
......@@ -4181,34 +4204,37 @@ fil_extend_space_to_desired_size(
page_size * n_pages,
NULL, NULL);
#endif
if (success) {
node->size += n_pages;
space->size += n_pages;
os_has_said_disk_full = FALSE;
} else {
/* Let us measure the size of the file to determine
how much we were able to extend it */
/* Let us measure the size of the file to determine
how much we were able to extend it */
n_pages = ((ulint)
(os_file_get_size_as_iblonglong(
node->handle)
/ page_size)) - node->size;
n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle)
/ page_size);
node->size += n_pages;
space->size += n_pages;
mutex_enter(&fil_system->mutex);
ut_a(n_pages >= node->size);
start_page_no += n_pages - node->size;
space->size += n_pages - node->size;
node->size = n_pages;
if (success) {
os_has_said_disk_full = FALSE;
}
if (!success || start_page_no >= size_after_extend) {
break;
}
start_page_no += n_pages;
mutex_exit(&fil_system->mutex);
}
mem_free(buf2);
free(buf2);
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
#ifdef HAVE_POSIX_FALLOCATE
complete_io:
#endif /* HAVE_POSIX_FALLOCATE */
*actual_size = space->size;
......@@ -4228,6 +4254,7 @@ fil_extend_space_to_desired_size(
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
size_after_extend, *actual_size); */
mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);
fil_flush(space_id);
......
......@@ -675,6 +675,7 @@ or row lock! */
#define SYNC_BUF_BLOCK 146 /* Block mutex */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
#define SYNC_OUTER_ANY_LATCH 136
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
......
......@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -2027,48 +2028,44 @@ os_file_set_size(
ut_a(size == (size & 0xFFFFFFFF));
current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
if (posix_fallocate(file, current_size, desired_size) == -1) {
if (srv_use_posix_fallocate) {
int err = posix_fallocate(file, 0, desired_size);
if (err) {
fprintf(stderr,
"InnoDB: Error: preallocating data for"
" file %s failed at\n"
"InnoDB: offset 0 size %lld %lld. Operating system"
" error number %d.\n"
"InnoDB: Check that the disk is not full"
" or a disk quota exceeded.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (long long)size_high, (long long)size, errno);
return (FALSE);
"InnoDB: Error: preallocating %lld bytes for"
" file %s failed with error %d.\n",
desired_size, name, err);
}
return (TRUE);
return(!err);
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
buf_size = UNIV_PAGE_SIZE;
current_size = desired_size - buf_size;
#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
current_size = 0;
#endif
buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE);
if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
return(FALSE);
}
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
while (current_size < desired_size) {
do {
ulint n_bytes;
if (desired_size - current_size < (ib_int64_t) buf_size) {
......@@ -2082,37 +2079,14 @@ os_file_set_size(
(ulint)(current_size >> 32),
n_bytes);
if (!ret) {
ut_free(buf2);
goto error_handling;
}
/* Print about progress for each 100 MB written */
if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
!= current_size / (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (ib_int64_t)(100 * 1024 * 1024)));
break;
}
current_size += n_bytes;
}
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "\n");
}
} while (current_size < desired_size);
ut_free(buf2);
ret = os_file_flush(file);
if (ret) {
return(TRUE);
}
error_handling:
return(FALSE);
free(buf2);
return(ret && os_file_flush(file));
}
/***********************************************************************//**
......
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -4934,9 +4935,9 @@ fil_extend_space_to_desired_size(
ulint page_size;
ibool success = TRUE;
/* file_extend_mutex is for http://bugs.mysql.com/56433 */
/* to protect from the other fil_extend_space_to_desired_size() */
/* during temprary releasing &fil_system->mutex */
/* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433
to prevent concurrent fil_extend_space_to_desired_size()
while fil_system->mutex is temporarily released */
mutex_enter(&fil_system->file_extend_mutex);
fil_mutex_enter_and_prepare_for_io(space_id);
......@@ -4966,6 +4967,8 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;
mutex_exit(&fil_system->mutex);
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
......@@ -4973,19 +4976,19 @@ fil_extend_space_to_desired_size(
= file_start_page_no * page_size;
ib_int64_t end_offset
= (size_after_extend - file_start_page_no) * page_size;
int err = posix_fallocate(
node->handle, start_offset, end_offset);
mutex_exit(&fil_system->mutex);
success = (posix_fallocate(node->handle, start_offset,
end_offset) == 0);
if (!success)
{
success = !err;
if (!success) {
fprintf(stderr,
"InnoDB: Error: preallocating file space for "
"file \'%s\' failed. Current size %lld, "
"len %lld, desired size %lld\n", node->name,
start_offset, end_offset,
start_offset + end_offset);
"InnoDB: Error: extending file %s"
" from %lld to %lld bytes"
" failed with error %d\n",
node->name, start_offset, end_offset, err);
}
mutex_enter(&fil_system->mutex);
if (success) {
......@@ -4999,14 +5002,25 @@ fil_extend_space_to_desired_size(
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
start_page_no = size_after_extend - 1;
buf_size = page_size;
#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = mem_alloc(buf_size + page_size);
#endif
buf2 = calloc(1, buf_size + page_size);
if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + page_size);
mutex_exit(&fil_system->file_extend_mutex);
return(FALSE);
}
buf = ut_align(buf2, page_size);
memset(buf, 0, buf_size);
while (start_page_no < size_after_extend) {
for (;;) {
ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);
......@@ -5016,7 +5030,6 @@ fil_extend_space_to_desired_size(
% (4096 * ((1024 * 1024) / page_size)))
* page_size;
mutex_exit(&fil_system->mutex);
#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
......@@ -5028,36 +5041,37 @@ fil_extend_space_to_desired_size(
page_size * n_pages,
NULL, NULL, space_id, NULL);
#endif
mutex_enter(&fil_system->mutex);
if (success) {
node->size += n_pages;
space->size += n_pages;
/* Let us measure the size of the file to determine
how much we were able to extend it */
os_has_said_disk_full = FALSE;
} else {
/* Let us measure the size of the file to determine
how much we were able to extend it */
n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle)
/ page_size);
n_pages = ((ulint)
(os_file_get_size_as_iblonglong(
node->handle)
/ page_size)) - node->size;
mutex_enter(&fil_system->mutex);
ut_a(n_pages >= node->size);
node->size += n_pages;
space->size += n_pages;
start_page_no += n_pages - node->size;
space->size += n_pages - node->size;
node->size = n_pages;
if (success) {
os_has_said_disk_full = FALSE;
}
if (!success || start_page_no >= size_after_extend) {
break;
}
start_page_no += n_pages;
mutex_exit(&fil_system->mutex);
}
mem_free(buf2);
free(buf2);
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
#ifdef HAVE_POSIX_FALLOCATE
complete_io:
#endif /* HAVE_POSIX_FALLOCATE */
*actual_size = space->size;
......
......@@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2011, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -2184,42 +2185,44 @@ os_file_set_size(
ut_a(size == (size & 0xFFFFFFFF));
current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
if (posix_fallocate(file, current_size, desired_size) == -1) {
fprintf(stderr, "InnoDB: Error: preallocating file "
"space for file \'%s\' failed. Current size "
"%lld, desired size %lld\n",
name, current_size, desired_size);
os_file_handle_error_no_exit(name, "posix_fallocate");
return(FALSE);
int err = posix_fallocate(file, 0, desired_size);
if (err) {
fprintf(stderr,
"InnoDB: Error: preallocating %lld bytes for"
" file %s failed with error %d.\n",
desired_size, name, err);
}
return(TRUE);
return(!err);
}
#endif
#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
buf_size = UNIV_PAGE_SIZE;
current_size = desired_size - buf_size;
#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
current_size = 0;
#endif
buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE);
if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
return(FALSE);
}
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
while (current_size < desired_size) {
do {
ulint n_bytes;
if (desired_size - current_size < (ib_int64_t) buf_size) {
......@@ -2233,37 +2236,14 @@ os_file_set_size(
(ulint)(current_size >> 32),
n_bytes);
if (!ret) {
ut_free(buf2);
goto error_handling;
}
/* Print about progress for each 100 MB written */
if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
!= current_size / (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (ib_int64_t)(100 * 1024 * 1024)));
break;
}
current_size += n_bytes;
}
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "\n");
}
ut_free(buf2);
ret = os_file_flush(file, TRUE);
if (ret) {
return(TRUE);
}
} while (current_size < desired_size);
error_handling:
return(FALSE);
free(buf2);
return(ret && os_file_flush(file, TRUE));
}
/***********************************************************************//**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment