Commit 19733efa authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14244 MariaDB 10.2.10 fails to run on Debian Stretch with ext3 and O_DIRECT

os_file_set_size(): If posix_fallocate() returns EINVAL, fall back
to writing zero bytes to the file. Also, remove some error log output,
and make it possible for a server shutdown to interrupt the fall-back
code.

MariaDB 10.2 used to handle the EINVAL return value from posix_fallocate()
before commit b731a5bc
which refactored os_file_set_size() to try posix_fallocate().

Why is the EINVAL returned? The GNU posix_fallocate() function
would first try the fallocate() system call, which would return
-EOPNOTSUPP for many file systems (notably, not ext4). Then, it
would fall back to extending the file one block at a time by invoking
pwrite(fd, "", 1, offset) where offset is 1 less than a multiple of
the file block size. This would fail with EINVAL if the file is in
O_DIRECT mode, because O_DIRECT requires aligned operation.
parent 0f4e0055
...@@ -5382,13 +5382,21 @@ os_file_set_size( ...@@ -5382,13 +5382,21 @@ os_file_set_size(
} while (err == EINTR } while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE); && srv_shutdown_state == SRV_SHUTDOWN_NONE);
if (err) { switch (err) {
case 0:
return true;
default:
ib::error() << "preallocating " ib::error() << "preallocating "
<< size << " bytes for file " << name << size << " bytes for file " << name
<< " failed with error " << err; << " failed with error " << err;
/* fall through */
case EINTR:
errno = err;
return false;
case EINVAL:
/* fall back to the code below */
break;
} }
errno = err;
return(!err);
# endif /* HAVE_POSIX_ALLOCATE */ # endif /* HAVE_POSIX_ALLOCATE */
#endif /* _WIN32*/ #endif /* _WIN32*/
...@@ -5410,14 +5418,9 @@ os_file_set_size( ...@@ -5410,14 +5418,9 @@ os_file_set_size(
memset(buf, 0, buf_size); memset(buf, 0, buf_size);
os_offset_t current_size = os_file_get_size(file); os_offset_t current_size = os_file_get_size(file);
bool write_progress_info =
(size - current_size >= (os_offset_t) 100 << 20);
if (write_progress_info) {
ib::info() << "Progress in MB:";
}
while (current_size < size) { while (current_size < size
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
ulint n_bytes; ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) { if (size - current_size < (os_offset_t) buf_size) {
...@@ -5433,32 +5436,15 @@ os_file_set_size( ...@@ -5433,32 +5436,15 @@ os_file_set_size(
request, name, file, buf, current_size, n_bytes); request, name, file, buf, current_size, n_bytes);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
break;
ut_free(buf2);
return(false);
}
/* Print about progress for each 100 MB written */
if (write_progress_info &&
((current_size + n_bytes) / (100 << 20)
!= current_size / (100 << 20))) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (100 << 20)));
} }
current_size += n_bytes; current_size += n_bytes;
} }
if (write_progress_info) {
fprintf(stderr, "\n");
}
ut_free(buf2); ut_free(buf2);
return(os_file_flush(file)); return(current_size >= size && os_file_flush(file));
} }
/** Truncates a file to a specified size in bytes. /** Truncates a file to a specified size in bytes.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment