Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
96d3a797
Commit
96d3a797
authored
Oct 12, 2012
by
Sergei Golubchik
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Percona-Server-5.5.27-rel29.0
parent
0352f09a
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
2017 additions
and
62 deletions
+2017
-62
CMakeLists.txt
CMakeLists.txt
+30
-1
btr/btr0sea.c
btr/btr0sea.c
+9
-0
buf/buf0buf.c
buf/buf0buf.c
+13
-0
buf/buf0lru.c
buf/buf0lru.c
+78
-34
handler/ha_innodb.cc
handler/ha_innodb.cc
+24
-7
handler/i_s.cc
handler/i_s.cc
+295
-0
handler/i_s.h
handler/i_s.h
+1
-0
include/buf0lru.h
include/buf0lru.h
+5
-6
include/log0log.h
include/log0log.h
+5
-0
include/log0online.h
include/log0online.h
+111
-0
include/log0recv.h
include/log0recv.h
+37
-0
include/os0file.h
include/os0file.h
+9
-0
include/os0sync.h
include/os0sync.h
+24
-4
include/srv0srv.h
include/srv0srv.h
+23
-0
include/univ.i
include/univ.i
+1
-1
include/ut0rbt.h
include/ut0rbt.h
+22
-0
log/log0log.c
log/log0log.c
+124
-2
log/log0online.c
log/log0online.c
+1085
-0
log/log0recv.c
log/log0recv.c
+5
-3
os/os0file.c
os/os0file.c
+20
-0
srv/srv0srv.c
srv/srv0srv.c
+52
-0
srv/srv0start.c
srv/srv0start.c
+17
-2
ut/ut0rbt.c
ut/ut0rbt.c
+27
-2
No files found.
CMakeLists.txt
View file @
96d3a797
...
...
@@ -85,12 +85,41 @@ IF(NOT CMAKE_CROSSCOMPILING)
}"
HAVE_IB_GCC_ATOMIC_BUILTINS
)
CHECK_C_SOURCE_RUNS
(
"
#include <stdint.h>
int main()
{
int64_t x, y, res;
x = 10;
y = 123;
res = __sync_bool_compare_and_swap(&x, x, y);
if (!res || x != y) {
return(1);
}
x = 10;
y = 123;
res = __sync_add_and_fetch(&x, y);
if (res != 123 + 10 || x != 123 + 10) {
return(1);
}
return(0);
}"
HAVE_IB_GCC_ATOMIC_BUILTINS_64
)
ENDIF
()
IF
(
HAVE_IB_GCC_ATOMIC_BUILTINS
)
ADD_DEFINITIONS
(
-DHAVE_IB_GCC_ATOMIC_BUILTINS=1
)
ENDIF
()
IF
(
HAVE_IB_GCC_ATOMIC_BUILTINS_64
)
ADD_DEFINITIONS
(
-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1
)
ENDIF
()
# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
IF
(
NOT CMAKE_CROSSCOMPILING
)
CHECK_C_SOURCE_RUNS
(
...
...
@@ -227,7 +256,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
ibuf/ibuf0ibuf.c
pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
lock/lock0lock.c lock/lock0iter.c
log/log0log.c log/log0recv.c
log/log0log.c log/log0recv.c
log/log0online.c
mach/mach0data.c
mem/mem0mem.c mem/mem0pool.c
mtr/mtr0log.c mtr/mtr0mtr.c
...
...
btr/btr0sea.c
View file @
96d3a797
...
...
@@ -183,6 +183,15 @@ btr_search_sys_create(
//rw_lock_create(btr_search_latch_key, &btr_search_latch,
// SYNC_SEARCH_SYS);
/* PS bug lp:1018264 - Multiple hash index partitions causes overly
large hash index: When multiple adaptive hash index partitions are
specified, _each_ partition was being created with hash_size which
should be 1/64 of the total size of all buffer pools which is
incorrect and can cause overly high memory usage. hash_size
should be representing the _total_ size of all partitions, not the
individual size of each partition. */
hash_size
/=
btr_search_index_num
;
btr_search_sys
=
mem_alloc
(
sizeof
(
btr_search_sys_t
));
/* btr_search_index_num should be <= 32. (bits of trx->has_search_latch) */
...
...
buf/buf0buf.c
View file @
96d3a797
...
...
@@ -2838,6 +2838,7 @@ buf_page_get_gen(
&&
ibuf_debug
)
{
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
ulint
page_no
=
buf_block_get_page_no
(
block
);
if
(
buf_LRU_free_block
(
&
block
->
page
,
TRUE
,
FALSE
))
{
mutex_exit
(
block_mutex
);
...
...
@@ -2864,6 +2865,18 @@ buf_page_get_gen(
"innodb_change_buffering_debug evict %u %u
\n
"
,
(
unsigned
)
space
,
(
unsigned
)
offset
);
return
(
NULL
);
}
else
if
(
UNIV_UNLIKELY
(
buf_block_get_state
(
block
)
!=
BUF_BLOCK_FILE_PAGE
||
(
buf_block_get_page_no
(
block
)
!=
page_no
)
||
(
buf_block_get_space
(
block
)
!=
space
)))
{
/* buf_LRU_free_block temporarily releases the
block mutex, and now block points to something
else. */
mutex_exit
(
block_mutex
);
block
=
NULL
;
goto
loop2
;
}
else
if
(
buf_flush_page_try
(
buf_pool
,
block
))
{
fprintf
(
stderr
,
"innodb_change_buffering_debug flush %u %u
\n
"
,
...
...
buf/buf0lru.c
View file @
96d3a797
...
...
@@ -2531,6 +2531,14 @@ buf_LRU_stat_update(void)
Dump the LRU page list to the specific file. */
#define LRU_DUMP_FILE "ib_lru_dump"
#define LRU_DUMP_TEMP_FILE "ib_lru_dump.tmp"
#define LRU_OS_FILE_WRITE() \
os_file_write(LRU_DUMP_FILE, dump_file, buffer, \
(buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL, \
(buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)), \
buffer_size)
#define LRU_DUMP_PAGE_COUNT 1
/* Specifies how many dump pages
should be filled for each hold
of the LRU_list_mutex. */
UNIV_INTERN
ibool
...
...
@@ -2541,23 +2549,30 @@ buf_LRU_file_dump(void)
ibool
success
;
byte
*
buffer_base
=
NULL
;
byte
*
buffer
=
NULL
;
const
ulint
buffer_size
=
LRU_DUMP_PAGE_COUNT
*
UNIV_PAGE_SIZE
;
buf_page_t
*
bpage
;
buf_page_t
*
first_bpage
;
ulint
buffers
;
ulint
offset
;
ibool
ret
=
FALSE
;
ulint
pages_written
;
ulint
i
;
ulint
total_pages
;
/* Sanity test to make sure page size is a multiple of
assumed dump record size */
ut_a
(
UNIV_PAGE_SIZE
%
8
==
0
);
for
(
i
=
0
;
i
<
srv_n_data_files
;
i
++
)
{
if
(
strstr
(
srv_data_file_names
[
i
],
LRU_DUMP_FILE
)
!=
NULL
)
{
fprintf
(
stderr
,
" InnoDB: The name '%s' seems to be used for"
" innodb_data_file_path.
For safety, dumping of the LRU list
"
"
is not being done
.
\n
"
,
LRU_DUMP_FILE
);
" innodb_data_file_path.
Dumping LRU list is
"
"
not done for safeness
.
\n
"
,
LRU_DUMP_FILE
);
goto
end
;
}
}
buffer_base
=
ut_malloc
(
2
*
UNIV_PAGE_SIZE
);
buffer_base
=
ut_malloc
(
UNIV_PAGE_SIZE
+
buffer_size
);
buffer
=
ut_align
(
buffer_base
,
UNIV_PAGE_SIZE
);
if
(
!
buffer
)
{
fprintf
(
stderr
,
...
...
@@ -2577,18 +2592,28 @@ buf_LRU_file_dump(void)
}
buffers
=
offset
=
0
;
for
(
i
=
0
;
i
<
srv_buf_pool_instances
;
i
++
)
{
buf_pool_t
*
buf_pool
;
buf_pool
=
buf_pool_from_array
(
i
);
mutex_enter
(
&
buf_pool
->
LRU_list_mutex
);
bpage
=
UT_LIST_GET_LAST
(
buf_pool
->
LRU
);
bpage
=
first_bpage
=
UT_LIST_GET_FIRST
(
buf_pool
->
LRU
);
total_pages
=
UT_LIST_GET_LEN
(
buf_pool
->
LRU
);
while
(
bpage
!=
NULL
)
{
if
(
offset
==
0
)
{
memset
(
buffer
,
0
,
UNIV_PAGE_SIZE
);
pages_written
=
0
;
while
(
bpage
!=
NULL
&&
(
pages_written
++
<
total_pages
))
{
buf_page_t
*
next_bpage
=
UT_LIST_GET_NEXT
(
LRU
,
bpage
);
if
(
next_bpage
==
first_bpage
)
{
/* Do not release list mutex here, it will be
released just outside this while loop */
fprintf
(
stderr
,
"InnoDB: detected cycle in LRU for"
" buffer pool %lu, skipping to next"
" buffer pool.
\n
"
,
i
);
break
;
}
mach_write_to_4
(
buffer
+
offset
*
4
,
bpage
->
space
);
...
...
@@ -2596,52 +2621,71 @@ buf_LRU_file_dump(void)
mach_write_to_4
(
buffer
+
offset
*
4
,
bpage
->
offset
);
offset
++
;
if
(
offset
==
UNIV_PAGE_SIZE
/
4
)
{
ut_a
(
offset
<=
buffer_size
);
if
(
offset
==
buffer_size
/
4
)
{
mutex_t
*
next_block_mutex
=
NULL
;
if
(
srv_shutdown_state
!=
SRV_SHUTDOWN_NONE
)
{
success
=
0
;
mutex_exit
(
&
buf_pool
->
LRU_list_mutex
);
success
=
FALSE
;
fprintf
(
stderr
,
" InnoDB: stopped dumping lru"
" pages because of server"
" shutdown.
\n
"
);
goto
end
;
}
/* While writing file, release buffer pool
mutex but keep the next page fixed so we
don't worry about our list iterator becoming
invalid */
if
(
next_bpage
)
{
next_block_mutex
=
buf_page_get_mutex
(
next_bpage
);
mutex_enter
(
next_block_mutex
);
next_bpage
->
buf_fix_count
++
;
mutex_exit
(
next_block_mutex
);
}
mutex_exit
(
&
buf_pool
->
LRU_list_mutex
);
success
=
LRU_OS_FILE_WRITE
();
/* Grab this here so that next_bpage can't
be purged when we drop the fix_count */
mutex_enter
(
&
buf_pool
->
LRU_list_mutex
);
if
(
next_bpage
)
{
mutex_enter
(
next_block_mutex
);
next_bpage
->
buf_fix_count
--
;
mutex_exit
(
next_block_mutex
);
}
success
=
os_file_write
(
LRU_DUMP_FILE
,
dump_file
,
buffer
,
(
buffers
<<
UNIV_PAGE_SIZE_SHIFT
)
&
0xFFFFFFFFUL
,
(
buffers
>>
(
32
-
UNIV_PAGE_SIZE_SHIFT
)),
UNIV_PAGE_SIZE
);
if
(
!
success
)
{
mutex_exit
(
&
buf_pool
->
LRU_list_mutex
);
fprintf
(
stderr
,
" InnoDB: cannot write page %lu of %s
\n
"
,
" InnoDB: cannot write page"
" %lu of %s
\n
"
,
buffers
,
LRU_DUMP_FILE
);
goto
end
;
}
buffers
++
;
offset
=
0
;
}
bpage
=
UT_LIST_GET_PREV
(
LRU
,
bpage
);
}
bpage
=
next_bpage
;
}
else
{
bpage
=
UT_LIST_GET_NEXT
(
LRU
,
bpage
);
}
}
/* while(bpage ...) */
mutex_exit
(
&
buf_pool
->
LRU_list_mutex
);
}
if
(
offset
==
0
)
{
memset
(
buffer
,
0
,
UNIV_PAGE_SIZE
);
}
}
/* for(srv_buf_pool_instances ...) */
mach_write_to_4
(
buffer
+
offset
*
4
,
0xFFFFFFFFUL
);
offset
++
;
mach_write_to_4
(
buffer
+
offset
*
4
,
0xFFFFFFFFUL
);
offset
++
;
success
=
os_file_write
(
LRU_DUMP_FILE
,
dump_file
,
buffer
,
(
buffers
<<
UNIV_PAGE_SIZE_SHIFT
)
&
0xFFFFFFFFUL
,
(
buffers
>>
(
32
-
UNIV_PAGE_SIZE_SHIFT
)),
UNIV_PAGE_SIZE
);
if
(
!
success
)
{
goto
end
;
}
ret
=
TRUE
;
success
=
LRU_OS_FILE_WRITE
();
end:
if
(
dump_file
!=
-
1
)
{
if
(
success
)
{
...
...
@@ -2656,7 +2700,7 @@ buf_LRU_file_dump(void)
if
(
buffer_base
)
ut_free
(
buffer_base
);
return
(
ret
);
return
(
success
);
}
typedef
struct
{
...
...
handler/ha_innodb.cc
View file @
96d3a797
...
...
@@ -361,7 +361,8 @@ static PSI_thread_info all_innodb_threads[] = {
{
&
srv_error_monitor_thread_key
,
"srv_error_monitor_thread"
,
0
},
{
&
srv_monitor_thread_key
,
"srv_monitor_thread"
,
0
},
{
&
srv_master_thread_key
,
"srv_master_thread"
,
0
},
{
&
srv_purge_thread_key
,
"srv_purge_thread"
,
0
}
{
&
srv_purge_thread_key
,
"srv_purge_thread"
,
0
},
{
&
srv_log_tracking_thread_key
,
"srv_redo_log_follow_thread"
,
0
}
};
# endif
/* UNIV_PFS_THREAD */
...
...
@@ -371,7 +372,8 @@ performance schema instrumented if "UNIV_PFS_IO" is defined */
static
PSI_file_info
all_innodb_files
[]
=
{
{
&
innodb_file_data_key
,
"innodb_data_file"
,
0
},
{
&
innodb_file_log_key
,
"innodb_log_file"
,
0
},
{
&
innodb_file_temp_key
,
"innodb_temp_file"
,
0
}
{
&
innodb_file_temp_key
,
"innodb_temp_file"
,
0
},
{
&
innodb_file_bmp_key
,
"innodb_bmp_file"
,
0
}
};
# endif
/* UNIV_PFS_IO */
#endif
/* HAVE_PSI_INTERFACE */
...
...
@@ -12454,8 +12456,8 @@ static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table,
NULL
,
NULL
,
FALSE
);
#ifdef UNIV_DEBUG
static
MYSQL_SYSVAR_ULONG
(
sys_stats_root_page
,
innobase_sys
_stats_root_page
,
PLUGIN_VAR_RQCMDARG
|
PLUGIN_VAR_READONLY
,
static
MYSQL_SYSVAR_ULONG
(
persistent
_stats_root_page
,
innobase_sys_stats_root_page
,
PLUGIN_VAR_RQCMDARG
|
PLUGIN_VAR_READONLY
,
"Override the SYS_STATS root page id, 0 = no override (for testing only)"
,
NULL
,
NULL
,
0
,
0
,
ULONG_MAX
,
0
);
#endif
...
...
@@ -12659,6 +12661,18 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
"NULLS_UNEQUAL and NULLS_IGNORED"
,
NULL
,
NULL
,
SRV_STATS_NULLS_EQUAL
,
&
innodb_stats_method_typelib
);
static
MYSQL_SYSVAR_BOOL
(
track_changed_pages
,
srv_track_changed_pages
,
PLUGIN_VAR_NOCMDARG
|
PLUGIN_VAR_READONLY
,
"Track the redo log for changed pages and output a changed page bitmap"
,
NULL
,
NULL
,
FALSE
);
static
MYSQL_SYSVAR_ULONGLONG
(
changed_pages_limit
,
srv_changed_pages_limit
,
PLUGIN_VAR_RQCMDARG
,
"The maximum number of rows for "
"INFORMATION_SCHEMA.INNODB_CHANGED_PAGES table, "
"0 - unlimited"
,
NULL
,
NULL
,
1000000
,
0
,
~
0ULL
,
0
);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static
MYSQL_SYSVAR_UINT
(
change_buffering_debug
,
ibuf_debug
,
PLUGIN_VAR_RQCMDARG
,
...
...
@@ -12823,7 +12837,7 @@ static MYSQL_SYSVAR_UINT(buffer_pool_restore_at_startup, srv_auto_lru_dump,
static
MYSQL_SYSVAR_BOOL
(
blocking_buffer_pool_restore
,
innobase_blocking_lru_restore
,
PLUGIN_VAR_
NO
CMDARG
|
PLUGIN_VAR_READONLY
,
PLUGIN_VAR_
OP
CMDARG
|
PLUGIN_VAR_READONLY
,
"Block XtraDB startup process until buffer pool is full restored from a "
"dump file (if present). Disabled by default."
,
NULL
,
NULL
,
FALSE
);
...
...
@@ -12911,7 +12925,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR
(
stats_update_need_lock
),
MYSQL_SYSVAR
(
use_sys_stats_table
),
#ifdef UNIV_DEBUG
MYSQL_SYSVAR
(
sys
_stats_root_page
),
MYSQL_SYSVAR
(
persistent
_stats_root_page
),
#endif
MYSQL_SYSVAR
(
stats_sample_pages
),
MYSQL_SYSVAR
(
adaptive_hash_index
),
...
...
@@ -12943,6 +12957,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR
(
use_sys_malloc
),
MYSQL_SYSVAR
(
use_native_aio
),
MYSQL_SYSVAR
(
change_buffering
),
MYSQL_SYSVAR
(
track_changed_pages
),
MYSQL_SYSVAR
(
changed_pages_limit
),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR
(
change_buffering_debug
),
#endif
/* UNIV_DEBUG || UNIV_IBUF_DEBUG */
...
...
@@ -13002,7 +13018,8 @@ i_s_innodb_index_stats,
i_s_innodb_buffer_pool_pages
,
i_s_innodb_buffer_pool_pages_index
,
i_s_innodb_buffer_pool_pages_blob
,
i_s_innodb_admin_command
i_s_innodb_admin_command
,
i_s_innodb_changed_pages
mysql_declare_plugin_end
;
/** @brief Initialize the default value of innodb_commit_concurrency.
...
...
handler/i_s.cc
View file @
96d3a797
...
...
@@ -22,6 +22,14 @@ InnoDB INFORMATION SCHEMA tables interface to MySQL.
Created July 18, 2007 Vasil Dimov
*******************************************************/
#ifndef MYSQL_SERVER
#define MYSQL_SERVER
/* For Item_* classes */
#include <item.h>
/* Prevent influence of this definition to other headers */
#undef MYSQL_SERVER
#else
#include <mysql_priv.h>
#endif //MYSQL_SERVER
#include <mysqld_error.h>
#include <sql_acl.h> // PROCESS_ACL
...
...
@@ -44,6 +52,7 @@ extern "C" {
#include "dict0mem.h"
#include "dict0types.h"
#include "ha_prototypes.h"
/* for innobase_convert_name() */
#include "srv0srv.h"
/* for srv_track_changed_pages */
#include "srv0start.h"
/* for srv_was_started */
#include "trx0i_s.h"
#include "trx0trx.h"
/* for TRX_QUE_STATE_STR_MAX_LEN */
...
...
@@ -53,6 +62,7 @@ extern "C" {
#include "dict0dict.h"
/* for dict_sys */
#include "buf0lru.h"
/* for XTRA_LRU_[DUMP/RESTORE] */
#include "btr0btr.h"
/* for btr_page_get_index_id */
#include "log0online.h"
}
#define OK(expr) \
...
...
@@ -5389,3 +5399,288 @@ UNIV_INTERN struct st_mysql_plugin i_s_innodb_undo_logs =
STRUCT_FLD
(
flags
,
0UL
),
};
static
ST_FIELD_INFO
i_s_innodb_changed_pages_info
[]
=
{
{
STRUCT_FLD
(
field_name
,
"space_id"
),
STRUCT_FLD
(
field_length
,
MY_INT32_NUM_DECIMAL_DIGITS
),
STRUCT_FLD
(
field_type
,
MYSQL_TYPE_LONG
),
STRUCT_FLD
(
value
,
0
),
STRUCT_FLD
(
field_flags
,
MY_I_S_UNSIGNED
),
STRUCT_FLD
(
old_name
,
""
),
STRUCT_FLD
(
open_method
,
SKIP_OPEN_TABLE
)},
{
STRUCT_FLD
(
field_name
,
"page_id"
),
STRUCT_FLD
(
field_length
,
MY_INT32_NUM_DECIMAL_DIGITS
),
STRUCT_FLD
(
field_type
,
MYSQL_TYPE_LONG
),
STRUCT_FLD
(
value
,
0
),
STRUCT_FLD
(
field_flags
,
MY_I_S_UNSIGNED
),
STRUCT_FLD
(
old_name
,
""
),
STRUCT_FLD
(
open_method
,
SKIP_OPEN_TABLE
)},
{
STRUCT_FLD
(
field_name
,
"start_lsn"
),
STRUCT_FLD
(
field_length
,
MY_INT64_NUM_DECIMAL_DIGITS
),
STRUCT_FLD
(
field_type
,
MYSQL_TYPE_LONGLONG
),
STRUCT_FLD
(
value
,
0
),
STRUCT_FLD
(
field_flags
,
MY_I_S_UNSIGNED
),
STRUCT_FLD
(
old_name
,
""
),
STRUCT_FLD
(
open_method
,
SKIP_OPEN_TABLE
)},
{
STRUCT_FLD
(
field_name
,
"end_lsn"
),
STRUCT_FLD
(
field_length
,
MY_INT64_NUM_DECIMAL_DIGITS
),
STRUCT_FLD
(
field_type
,
MYSQL_TYPE_LONGLONG
),
STRUCT_FLD
(
value
,
0
),
STRUCT_FLD
(
field_flags
,
MY_I_S_UNSIGNED
),
STRUCT_FLD
(
old_name
,
""
),
STRUCT_FLD
(
open_method
,
SKIP_OPEN_TABLE
)},
END_OF_ST_FIELD_INFO
};
/***********************************************************************
This function parses condition and gets upper bounds for start and end LSN's
if condition corresponds to certain pattern.
We can't know right position to avoid scanning bitmap files from the beginning
to the lower bound. But we can stop scanning bitmap files if we reach upper bound.
It's expected the most used queries will be like the following:
SELECT * FROM INNODB_CHANGED_PAGES WHERE START_LSN > num1 AND start_lsn < num2;
That's why the pattern is:
pattern: comp | and_comp;
comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn;
lsn: start_lsn | end_lsn;
and_comp: some_expression AND some_expression | some_expression AND and_comp;
some_expression: comp | any_other_expression;
Suppose the condition is start_lsn < 100, this means we have to read all
blocks with start_lsn < 100. Which is equivalent to reading all the blocks
with end_lsn <= 99, or just end_lsn < 100. That's why it's enough to find
maximum lsn value, doesn't matter if this is start or end lsn and compare
it with "start_lsn" field.
Example:
SELECT * FROM INNODB_CHANGED_PAGES
WHERE
start_lsn > 10 AND
end_lsn <= 1111 AND
555 > end_lsn AND
page_id = 100;
max_lsn will be set to 555.
*/
static
void
limit_lsn_range_from_condition
(
/*===========================*/
TABLE
*
table
,
/*!<in: table */
COND
*
cond
,
/*!<in: condition */
ib_uint64_t
*
max_lsn
)
/*!<in/out: maximum LSN
(must be initialized with maximum
available value) */
{
if
(
cond
->
type
()
!=
Item
::
COND_ITEM
&&
cond
->
type
()
!=
Item
::
FUNC_ITEM
)
return
;
switch
(((
Item_func
*
)
cond
)
->
functype
())
{
case
Item_func
:
:
COND_AND_FUNC
:
{
List_iterator
<
Item
>
li
(
*
((
Item_cond
*
)
cond
)
->
argument_list
());
Item
*
item
;
while
((
item
=
li
++
))
limit_lsn_range_from_condition
(
table
,
item
,
max_lsn
);
break
;
}
case
Item_func
:
:
LT_FUNC
:
case
Item_func
:
:
LE_FUNC
:
case
Item_func
:
:
GT_FUNC
:
case
Item_func
:
:
GE_FUNC
:
{
Item
*
left
;
Item
*
right
;
Item_field
*
item_field
;
ib_uint64_t
tmp_result
;
/*
a <= b equals to b >= a that's why we just exchange
"left" and "right" in the case of ">" or ">="
function
*/
if
(((
Item_func
*
)
cond
)
->
functype
()
==
Item_func
::
LT_FUNC
||
((
Item_func
*
)
cond
)
->
functype
()
==
Item_func
::
LE_FUNC
)
{
left
=
((
Item_func
*
)
cond
)
->
arguments
()[
0
];
right
=
((
Item_func
*
)
cond
)
->
arguments
()[
1
];
}
else
{
left
=
((
Item_func
*
)
cond
)
->
arguments
()[
1
];
right
=
((
Item_func
*
)
cond
)
->
arguments
()[
0
];
}
if
(
!
left
||
!
right
)
return
;
if
(
left
->
type
()
!=
Item
::
FIELD_ITEM
)
return
;
if
(
right
->
type
()
!=
Item
::
INT_ITEM
)
return
;
item_field
=
(
Item_field
*
)
left
;
if
(
/* START_LSN */
table
->
field
[
2
]
!=
item_field
->
field
&&
/* END_LSN */
table
->
field
[
3
]
!=
item_field
->
field
)
{
return
;
}
/* Check if the current field belongs to our table */
if
(
table
!=
item_field
->
field
->
table
)
return
;
tmp_result
=
right
->
val_int
();
if
(
tmp_result
<
*
max_lsn
)
*
max_lsn
=
tmp_result
;
break
;
}
default:
;
}
}
/***********************************************************************
Fill the dynamic table information_schema.innodb_changed_pages.
@return 0 on success, 1 on failure */
static
int
i_s_innodb_changed_pages_fill
(
/*==========================*/
THD
*
thd
,
/*!<in: thread */
TABLE_LIST
*
tables
,
/*!<in/out: tables to fill */
COND
*
cond
)
/*!<in: condition */
{
TABLE
*
table
=
(
TABLE
*
)
tables
->
table
;
log_bitmap_iterator_t
i
;
ib_uint64_t
output_rows_num
=
0UL
;
ib_uint64_t
max_lsn
=
~
0ULL
;
if
(
!
srv_track_changed_pages
)
return
0
;
if
(
!
log_online_bitmap_iterator_init
(
&
i
))
return
1
;
if
(
cond
)
limit_lsn_range_from_condition
(
table
,
cond
,
&
max_lsn
);
while
(
log_online_bitmap_iterator_next
(
&
i
)
&&
(
!
srv_changed_pages_limit
||
output_rows_num
<
srv_changed_pages_limit
)
&&
/*
There is no need to compare both start LSN and end LSN fields
with maximum value. It's enough to compare only start LSN.
Example:
max_lsn = 100
\\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1
I------I I-------I I-------------I I----I
////////////////// | - Query 2
1 2 3 4
Query 1:
SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100
will select 1,2,3 bitmaps
Query 2:
SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100
will select 1,2 bitmaps
The condition start_lsn <= 100 will be false after reading
1,2,3 bitmaps which suits for both cases.
*/
LOG_BITMAP_ITERATOR_START_LSN
(
i
)
<=
max_lsn
)
{
if
(
!
LOG_BITMAP_ITERATOR_PAGE_CHANGED
(
i
))
continue
;
/* SPACE_ID */
table
->
field
[
0
]
->
store
(
LOG_BITMAP_ITERATOR_SPACE_ID
(
i
));
/* PAGE_ID */
table
->
field
[
1
]
->
store
(
LOG_BITMAP_ITERATOR_PAGE_NUM
(
i
));
/* START_LSN */
table
->
field
[
2
]
->
store
(
LOG_BITMAP_ITERATOR_START_LSN
(
i
));
/* END_LSN */
table
->
field
[
3
]
->
store
(
LOG_BITMAP_ITERATOR_END_LSN
(
i
));
/*
I_S tables are in-memory tables. If bitmap file is big enough
a lot of memory can be used to store the table. But the size
of used memory can be diminished if we store only data which
corresponds to some conditions (in WHERE sql clause). Here
conditions are checked for the field values stored above.
Conditions are checked twice. The first is here (during table
generation) and the second during query execution. Maybe it
makes sense to use some flag in THD object to avoid double
checking.
*/
if
(
cond
&&
!
cond
->
val_int
())
continue
;
if
(
schema_table_store_record
(
thd
,
table
))
{
log_online_bitmap_iterator_release
(
&
i
);
return
1
;
}
++
output_rows_num
;
}
log_online_bitmap_iterator_release
(
&
i
);
return
0
;
}
static
int
i_s_innodb_changed_pages_init
(
/*==========================*/
void
*
p
)
{
DBUG_ENTER
(
"i_s_innodb_changed_pages_init"
);
ST_SCHEMA_TABLE
*
schema
=
(
ST_SCHEMA_TABLE
*
)
p
;
schema
->
fields_info
=
i_s_innodb_changed_pages_info
;
schema
->
fill_table
=
i_s_innodb_changed_pages_fill
;
DBUG_RETURN
(
0
);
}
UNIV_INTERN
struct
st_mysql_plugin
i_s_innodb_changed_pages
=
{
STRUCT_FLD
(
type
,
MYSQL_INFORMATION_SCHEMA_PLUGIN
),
STRUCT_FLD
(
info
,
&
i_s_info
),
STRUCT_FLD
(
name
,
"INNODB_CHANGED_PAGES"
),
STRUCT_FLD
(
author
,
"Percona"
),
STRUCT_FLD
(
descr
,
"InnoDB CHANGED_PAGES table"
),
STRUCT_FLD
(
license
,
PLUGIN_LICENSE_GPL
),
STRUCT_FLD
(
init
,
i_s_innodb_changed_pages_init
),
STRUCT_FLD
(
deinit
,
i_s_common_deinit
),
STRUCT_FLD
(
version
,
0x0100
/* 1.0 */
),
STRUCT_FLD
(
status_vars
,
NULL
),
STRUCT_FLD
(
system_vars
,
NULL
),
STRUCT_FLD
(
__reserved1
,
NULL
),
STRUCT_FLD
(
flags
,
0UL
),
};
handler/i_s.h
View file @
96d3a797
...
...
@@ -51,5 +51,6 @@ extern struct st_mysql_plugin i_s_innodb_admin_command;
extern
struct
st_mysql_plugin
i_s_innodb_buffer_pool_pages
;
extern
struct
st_mysql_plugin
i_s_innodb_buffer_pool_pages_index
;
extern
struct
st_mysql_plugin
i_s_innodb_buffer_pool_pages_blob
;
extern
struct
st_mysql_plugin
i_s_innodb_changed_pages
;
#endif
/* i_s_h */
include/buf0lru.h
View file @
96d3a797
...
...
@@ -94,13 +94,12 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns TRUE, it will temporarily
release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
NOTE: This will temporarily release buf_pool_mutex. Furthermore, the
page frame will no longer be accessible via bpage.
The caller must hold buf_p
ool->mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when
calling this function.
The caller must hold buf_p
age_get_mutex(bpage) and release this mutex
after the call. No other buf_page_get_mutex() may be held when
calling this function.
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
ibool
...
...
include/log0log.h
View file @
96d3a797
...
...
@@ -962,6 +962,11 @@ struct log_struct{
become signaled */
/* @} */
#endif
/* UNIV_LOG_ARCHIVE */
ib_uint64_t
tracked_lsn
;
/*!< log tracking has advanced to this
lsn. Field accessed atomically where
64-bit atomic ops are supported,
protected by the log sys mutex
otherwise. */
};
/** Test if flush order mutex is owned. */
...
...
include/log0online.h
0 → 100644
View file @
96d3a797
/*****************************************************************************
Copyright (c) 2011-2012, Percona Inc. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/log0online.h
Online database log parsing for changed page tracking
*******************************************************/
#ifndef log0online_h
#define log0online_h
#include "univ.i"
#include "os0file.h"
/*********************************************************************//**
Initializes the online log following subsytem. */
UNIV_INTERN
void
log_online_read_init
();
/*===================*/
/*********************************************************************//**
Shuts down the online log following subsystem. */
UNIV_INTERN
void
log_online_read_shutdown
();
/*=======================*/
/*********************************************************************//**
Reads and parses the redo log up to last checkpoint LSN to build the changed
page bitmap which is then written to disk. */
UNIV_INTERN
void
log_online_follow_redo_log
();
/*=========================*/
/** The iterator through all bits of changed pages bitmap blocks */
struct
log_bitmap_iterator_struct
{
char
in_name
[
FN_REFLEN
];
/*!< the file name for bitmap
input */
os_file_t
in
;
/*!< the bitmap input file */
ib_uint64_t
in_offset
;
/*!< the next write position in the
bitmap output file */
ib_uint32_t
bit_offset
;
/*!< bit offset inside of bitmap
block*/
ib_uint64_t
start_lsn
;
/*!< Start lsn of the block */
ib_uint64_t
end_lsn
;
/*!< End lsn of the block */
ib_uint32_t
space_id
;
/*!< Block space id */
ib_uint32_t
first_page_id
;
/*!< First block page id */
ibool
changed
;
/*!< true if current page was changed */
byte
*
page
;
/*!< Bitmap block */
};
typedef
struct
log_bitmap_iterator_struct
log_bitmap_iterator_t
;
#define LOG_BITMAP_ITERATOR_START_LSN(i) \
((i).start_lsn)
#define LOG_BITMAP_ITERATOR_END_LSN(i) \
((i).end_lsn)
#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \
((i).space_id)
#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \
((i).first_page_id + (i).bit_offset)
#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \
((i).changed)
/*********************************************************************//**
Initializes log bitmap iterator.
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init
(
/*============================*/
log_bitmap_iterator_t
*
i
);
/*!<in/out: iterator */
/*********************************************************************//**
Releases log bitmap iterator. */
UNIV_INTERN
void
log_online_bitmap_iterator_release
(
/*===============================*/
log_bitmap_iterator_t
*
i
);
/*!<in/out: iterator */
/*********************************************************************//**
Iterates through bits of saved bitmap blocks.
Sequentially reads blocks from bitmap file(s) and interates through
their bits. Ignores blocks with wrong checksum.
@return TRUE if iteration is successful, FALSE if all bits are iterated. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_next
(
/*============================*/
log_bitmap_iterator_t
*
i
);
/*!<in/out: iterator */
#endif
include/log0recv.h
View file @
96d3a797
...
...
@@ -32,6 +32,28 @@ Created 9/20/1997 Heikki Tuuri
#include "hash0hash.h"
#include "log0log.h"
/******************************************************//**
Checks the 4-byte checksum to the trailer checksum field of a log
block. We also accept a log block in the old format before
InnoDB-3.23.52 where the checksum field contains the log block number.
@return TRUE if ok, or if the log block may be in the format of InnoDB
version predating 3.23.52 */
UNIV_INTERN
ibool
log_block_checksum_is_ok_or_old_format
(
/*===================================*/
const
byte
*
block
);
/*!< in: pointer to a log block */
/*******************************************************//**
Calculates the new value for lsn when more data is added to the log. */
UNIV_INTERN
ib_uint64_t
recv_calc_lsn_on_data_add
(
/*======================*/
ib_uint64_t
lsn
,
/*!< in: old lsn */
ib_uint64_t
len
);
/*!< in: this many bytes of data is
added, log block headers not included */
#ifdef UNIV_HOTBACKUP
extern
ibool
recv_replay_file_ops
;
...
...
@@ -182,6 +204,21 @@ UNIV_INTERN
void
recv_recovery_rollback_active
(
void
);
/*===============================*/
/*******************************************************************//**
Tries to parse a single log record and returns its length.
@return length of the record, or 0 if the record was not complete */
UNIV_INTERN
ulint
recv_parse_log_rec
(
/*===============*/
byte
*
ptr
,
/*!< in: pointer to a buffer */
byte
*
end_ptr
,
/*!< in: pointer to the buffer end */
byte
*
type
,
/*!< out: type */
ulint
*
space
,
/*!< out: space id */
ulint
*
page_no
,
/*!< out: page number */
byte
**
body
);
/*!< out: log record body start */
/*******************************************************//**
Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found. Unless
...
...
include/os0file.h
View file @
96d3a797
...
...
@@ -197,6 +197,7 @@ extern ulint srv_log_block_size;
extern
mysql_pfs_key_t
innodb_file_data_key
;
extern
mysql_pfs_key_t
innodb_file_log_key
;
extern
mysql_pfs_key_t
innodb_file_temp_key
;
extern
mysql_pfs_key_t
innodb_file_bmp_key
;
/* Following four macros are instumentations to register
various file I/O operations with performance schema.
...
...
@@ -867,6 +868,14 @@ os_file_set_eof(
/*============*/
FILE
*
file
);
/*!< in: file to be truncated */
/***********************************************************************//**
Truncates a file at the specified position.
@return TRUE if success */
UNIV_INTERN
ibool
os_file_set_eof_at
(
os_file_t
file
,
/*!< in: handle to a file */
ib_uint64_t
new_len
);
/*!< in: new file length */
/***********************************************************************//**
NOTE! Use the corresponding macro os_file_flush(), not directly this function!
Flushes the write buffers of a given file to the disk.
@return TRUE if success */
...
...
include/os0sync.h
View file @
96d3a797
...
...
@@ -265,7 +265,11 @@ Atomic compare-and-swap and increment for InnoDB. */
#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
#define HAVE_ATOMIC_BUILTINS
# define HAVE_ATOMIC_BUILTINS
# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
# define HAVE_ATOMIC_BUILTINS_64
# endif
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
...
...
@@ -304,6 +308,9 @@ amount of increment. */
# define os_atomic_increment_ulint(ptr, amount) \
os_atomic_increment(ptr, amount)
# define os_atomic_increment_uint64(ptr, amount) \
os_atomic_increment(ptr, amount)
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
...
...
@@ -312,12 +319,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
#elif defined(HAVE_IB_SOLARIS_ATOMICS)
#define HAVE_ATOMIC_BUILTINS
# define HAVE_ATOMIC_BUILTINS
# define HAVE_ATOMIC_BUILTINS_64
/* If not compiling with GCC or GCC doesn't support the atomic
intrinsics and running on Solaris >= 10 use Solaris atomics */
#include <atomic.h>
#
include <atomic.h>
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
...
...
@@ -357,6 +365,9 @@ amount of increment. */
# define os_atomic_increment_ulint(ptr, amount) \
atomic_add_long_nv(ptr, amount)
# define os_atomic_increment_uint64(ptr, amount) \
atomic_add_64_nv(ptr, amount)
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
...
...
@@ -365,7 +376,11 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
#elif defined(HAVE_WINDOWS_ATOMICS)
#define HAVE_ATOMIC_BUILTINS
# define HAVE_ATOMIC_BUILTINS
# ifndef _WIN32
# define HAVE_ATOMIC_BUILTINS_64
# endif
/* On Windows, use Windows atomics / interlocked */
# ifdef _WIN64
...
...
@@ -403,6 +418,11 @@ amount of increment. */
# define os_atomic_increment_ulint(ptr, amount) \
((ulint) (win_xchg_and_add(ptr, amount) + amount))
# define os_atomic_increment_uint64(ptr, amount) \
((ib_uint64_t) (InterlockedExchangeAdd64( \
(ib_int64_t*) ptr, \
(ib_int64_t) amount) + amount))
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val.
InterlockedExchange() operates on LONG, and the LONG will be
...
...
include/srv0srv.h
View file @
96d3a797
...
...
@@ -66,6 +66,14 @@ extern os_event_t srv_timeout_event;
/* The error monitor thread waits on this event. */
extern
os_event_t
srv_error_event
;
/* This event is set on checkpoint completion to wake the redo log parser
thread */
extern
os_event_t
srv_checkpoint_completed_event
;
/* This event is set on the online redo log following thread exit to signal
that the (slow) shutdown may proceed */
extern
os_event_t
srv_redo_log_thread_finished_event
;
/* If the last data file is auto-extended, we add this many pages to it
at a time */
#define SRV_AUTO_EXTEND_INCREMENT \
...
...
@@ -133,6 +141,11 @@ extern char* srv_doublewrite_file;
extern
ibool
srv_recovery_stats
;
extern
my_bool
srv_track_changed_pages
;
extern
ulonglong
srv_changed_pages_limit
;
extern
ibool
srv_auto_extend_last_data_file
;
extern
ulint
srv_last_file_size_max
;
extern
char
**
srv_log_group_home_dirs
;
...
...
@@ -399,6 +412,7 @@ extern mysql_pfs_key_t srv_error_monitor_thread_key;
extern
mysql_pfs_key_t
srv_monitor_thread_key
;
extern
mysql_pfs_key_t
srv_master_thread_key
;
extern
mysql_pfs_key_t
srv_purge_thread_key
;
extern
mysql_pfs_key_t
srv_log_tracking_thread_key
;
/* This macro register the current thread and its key with performance
schema */
...
...
@@ -694,6 +708,15 @@ srv_LRU_dump_restore_thread(
void
*
arg
);
/*!< in: a dummy parameter required by
os_thread_create */
/******************************************************************//**
A thread which follows the redo log and outputs the changed page bitmap.
@return a dummy value */
UNIV_INTERN
os_thread_ret_t
srv_redo_log_follow_thread
(
/*=======================*/
void
*
arg
);
/*!< in: a dummy parameter required by
os_thread_create */
/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
due to failure to obtain necessary mutex */
...
...
include/univ.i
View file @
96d3a797
...
...
@@ -54,7 +54,7 @@ Created 1/20/1994 Heikki Tuuri
#
define
INNODB_VERSION_BUGFIX
8
#
ifndef
PERCONA_INNODB_VERSION
#
define
PERCONA_INNODB_VERSION
2
7
.0
#
define
PERCONA_INNODB_VERSION
2
9
.0
#
endif
...
...
include/ut0rbt.h
View file @
96d3a797
...
...
@@ -116,6 +116,10 @@ struct ib_rbt_bound_struct {
/* Compare a key with the node value (t is tree, k is key, n is node)*/
#define rbt_compare(t, k, n) (t->compare(k, n->value))
/* Node size. FIXME: name might clash, but currently it does not, so for easier
maintenance do not rename it for now. */
#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
/**********************************************************************//**
Free an instance of a red black tree */
UNIV_INTERN
...
...
@@ -187,6 +191,17 @@ rbt_add_node(
ib_rbt_bound_t
*
parent
,
/*!< in: parent */
const
void
*
value
);
/*!< in: this value is copied
to the node */
/****************************************************************//**
Add a new caller-provided node to tree at the specified position.
The node must have its key fields initialized correctly.
@return added node */
UNIV_INTERN
const
ib_rbt_node_t
*
rbt_add_preallocated_node
(
/*======================*/
ib_rbt_t
*
tree
,
/*!< in: rb tree */
ib_rbt_bound_t
*
parent
,
/*!< in: parent */
ib_rbt_node_t
*
node
);
/*!< in: node */
/**********************************************************************//**
Return the left most data node in the tree
@return left most node */
...
...
@@ -271,6 +286,13 @@ Clear the tree, deletes (and free's) all the nodes. */
UNIV_INTERN
void
rbt_clear
(
/*======*/
ib_rbt_t
*
tree
);
/*!< in: rb tree */
/****************************************************************//**
Clear the tree without deleting and freeing its nodes. */
UNIV_INTERN
void
rbt_reset
(
/*======*/
ib_rbt_t
*
tree
);
/*!< in: rb tree */
/**********************************************************************//**
...
...
log/log0log.c
View file @
96d3a797
...
...
@@ -214,6 +214,54 @@ log_buf_pool_get_oldest_modification(void)
return
(
lsn
);
}
/****************************************************************//**
Safely reads the log_sys->tracked_lsn value. Uses atomic operations
if available, otherwise this field is protected with the log system
mutex. The writer counterpart function is log_set_tracked_lsn() in
log0online.c.
@return log_sys->tracked_lsn value. */
UNIV_INLINE
ib_uint64_t
log_get_tracked_lsn
()
{
#ifdef HAVE_ATOMIC_BUILTINS_64
return
os_atomic_increment_uint64
(
&
log_sys
->
tracked_lsn
,
0
);
#else
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
return
log_sys
->
tracked_lsn
;
#endif
}
/****************************************************************//**
Checks if the log groups have a big enough margin of free space in
so that a new log entry can be written without overwriting log data
that is not read by the changed page bitmap thread.
@return TRUE if there is not enough free space. */
static
ibool
log_check_tracking_margin
(
ulint
lsn_advance
)
/*!< in: an upper limit on how much log data we
plan to write. If zero, the margin will be
checked for the already-written log. */
{
ib_uint64_t
tracked_lsn
;
ulint
tracked_lsn_age
;
if
(
!
srv_track_changed_pages
)
{
return
FALSE
;
}
ut_ad
(
mutex_own
(
&
(
log_sys
->
mutex
)));
tracked_lsn
=
log_get_tracked_lsn
();
tracked_lsn_age
=
log_sys
->
lsn
-
tracked_lsn
;
/* The overwrite would happen when log_sys->log_group_capacity is
exceeded, but we use max_checkpoint_age for an extra safety margin. */
return
tracked_lsn_age
+
lsn_advance
>
log_sys
->
max_checkpoint_age
;
}
/************************************************************//**
Opens the log for log_write_low. The log must be closed with log_close and
released with log_release.
...
...
@@ -230,9 +278,7 @@ log_reserve_and_open(
ulint
archived_lsn_age
;
ulint
dummy
;
#endif
/* UNIV_LOG_ARCHIVE */
#ifdef UNIV_DEBUG
ulint
count
=
0
;
#endif
/* UNIV_DEBUG */
ut_a
(
len
<
log
->
buf_size
/
2
);
loop:
...
...
@@ -260,6 +306,19 @@ log_reserve_and_open(
goto
loop
;
}
if
(
log_check_tracking_margin
(
len_upper_limit
)
&&
(
++
count
<
50
))
{
/* This log write would violate the untracked LSN free space
margin. Limit this to 50 retries as there might be situations
where we have no choice but to proceed anyway, i.e. if the log
is about to be overflown, log tracking or not. */
mutex_exit
(
&
(
log
->
mutex
));
os_thread_sleep
(
10000
);
goto
loop
;
}
#ifdef UNIV_LOG_ARCHIVE
if
(
log
->
archiving_state
!=
LOG_ARCH_OFF
)
{
...
...
@@ -398,6 +457,8 @@ log_close(void)
ulint
first_rec_group
;
ib_uint64_t
oldest_lsn
;
ib_uint64_t
lsn
;
ib_uint64_t
tracked_lsn
;
ulint
tracked_lsn_age
;
log_t
*
log
=
log_sys
;
ib_uint64_t
checkpoint_age
;
...
...
@@ -424,6 +485,19 @@ log_close(void)
log
->
check_flush_or_checkpoint
=
TRUE
;
}
if
(
srv_track_changed_pages
)
{
tracked_lsn
=
log_get_tracked_lsn
();
tracked_lsn_age
=
lsn
-
tracked_lsn
;
if
(
tracked_lsn_age
>=
log
->
log_group_capacity
)
{
fprintf
(
stderr
,
" InnoDB: Error: the age of the "
"oldest untracked record exceeds the log "
"group capacity!
\n
"
);
}
}
checkpoint_age
=
lsn
-
log
->
last_checkpoint_lsn
;
if
(
checkpoint_age
>=
log
->
log_group_capacity
)
{
...
...
@@ -891,6 +965,8 @@ log_init(void)
log_sys
->
archiving_on
=
os_event_create
(
NULL
);
#endif
/* UNIV_LOG_ARCHIVE */
log_sys
->
tracked_lsn
=
0
;
/*----------------------------*/
log_block_init
(
log_sys
->
buf
,
log_sys
->
lsn
);
...
...
@@ -1740,6 +1816,12 @@ log_io_complete_checkpoint(void)
}
mutex_exit
(
&
(
log_sys
->
mutex
));
/* Wake the redo log watching thread to parse the log up to this
checkpoint. */
if
(
srv_track_changed_pages
)
{
os_event_set
(
srv_checkpoint_completed_event
);
}
}
/*******************************************************************//**
...
...
@@ -3086,6 +3168,15 @@ log_check_margins(void)
log_checkpoint_margin
();
mutex_enter
(
&
(
log_sys
->
mutex
));
if
(
log_check_tracking_margin
(
0
))
{
mutex_exit
(
&
(
log_sys
->
mutex
));
os_thread_sleep
(
10000
);
goto
loop
;
}
mutex_exit
(
&
(
log_sys
->
mutex
));
#ifdef UNIV_LOG_ARCHIVE
log_archive_margin
();
#endif
/* UNIV_LOG_ARCHIVE */
...
...
@@ -3114,6 +3205,7 @@ logs_empty_and_mark_files_at_shutdown(void)
/*=======================================*/
{
ib_uint64_t
lsn
;
ib_uint64_t
tracked_lsn
;
ulint
arch_log_no
;
ibool
server_busy
;
ulint
count
=
0
;
...
...
@@ -3299,6 +3391,12 @@ logs_empty_and_mark_files_at_shutdown(void)
}
srv_shutdown_state
=
SRV_SHUTDOWN_LAST_PHASE
;
/* Wake the log tracking thread which will then immediatelly
quit because of srv_shutdown_state value */
if
(
srv_track_changed_pages
)
{
os_event_set
(
srv_checkpoint_completed_event
);
os_event_wait
(
srv_redo_log_thread_finished_event
);
}
fil_close_all_files
();
ut_a
(
srv_get_active_thread_type
()
==
ULINT_UNDEFINED
);
return
;
...
...
@@ -3308,9 +3406,12 @@ logs_empty_and_mark_files_at_shutdown(void)
mutex_enter
(
&
log_sys
->
mutex
);
tracked_lsn
=
log_get_tracked_lsn
();
lsn
=
log_sys
->
lsn
;
if
(
lsn
!=
log_sys
->
last_checkpoint_lsn
||
(
srv_track_changed_pages
&&
(
tracked_lsn
!=
log_sys
->
last_checkpoint_lsn
))
#ifdef UNIV_LOG_ARCHIVE
||
(
srv_log_archive_on
&&
lsn
!=
log_sys
->
archived_lsn
+
LOG_BLOCK_HDR_SIZE
)
...
...
@@ -3368,6 +3469,11 @@ logs_empty_and_mark_files_at_shutdown(void)
srv_shutdown_state
=
SRV_SHUTDOWN_LAST_PHASE
;
/* Signal the log following thread to quit */
if
(
srv_track_changed_pages
)
{
os_event_set
(
srv_checkpoint_completed_event
);
}
/* Make some checks that the server really is quiet */
ut_a
(
srv_get_active_thread_type
()
==
ULINT_UNDEFINED
);
...
...
@@ -3388,6 +3494,10 @@ logs_empty_and_mark_files_at_shutdown(void)
fil_flush_file_spaces
(
FIL_TABLESPACE
);
if
(
srv_track_changed_pages
)
{
os_event_wait
(
srv_redo_log_thread_finished_event
);
}
fil_close_all_files
();
/* Make some checks that the server really is quiet */
...
...
@@ -3514,6 +3624,18 @@ log_print(
((
log_sys
->
n_log_ios
-
log_sys
->
n_log_ios_old
)
/
time_elapsed
));
if
(
srv_track_changed_pages
)
{
/* The maximum tracked LSN age is equal to the maximum
checkpoint age */
fprintf
(
file
,
"Log tracking enabled
\n
"
"Log tracked up to %llu
\n
"
"Max tracked LSN age %lu
\n
"
,
log_get_tracked_lsn
(),
log_sys
->
max_checkpoint_age
);
}
log_sys
->
n_log_ios_old
=
log_sys
->
n_log_ios
;
log_sys
->
last_printout_time
=
current_time
;
...
...
log/log0online.c
0 → 100644
View file @
96d3a797
/*****************************************************************************
Copyright (c) 2011-2012 Percona Inc. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file log/log0online.c
Online database log parsing for changed page tracking
*******************************************************/
#include "log0online.h"
#include "my_dbug.h"
#include "log0recv.h"
#include "mach0data.h"
#include "mtr0log.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "trx0sys.h"
#include "ut0rbt.h"
enum
{
FOLLOW_SCAN_SIZE
=
4
*
(
UNIV_PAGE_SIZE_MAX
)
};
/** Log parsing and bitmap output data structure */
struct
log_bitmap_struct
{
byte
read_buf
[
FOLLOW_SCAN_SIZE
];
/*!< log read buffer */
byte
parse_buf
[
RECV_PARSING_BUF_SIZE
];
/*!< log parse buffer */
byte
*
parse_buf_end
;
/*!< parse buffer position where the
next read log data should be copied to.
If the previous log records were fully
parsed, it points to the start,
otherwise points immediatelly past the
end of the incomplete log record. */
char
*
out_name
;
/*!< the file name for bitmap output */
os_file_t
out
;
/*!< the bitmap output file */
ib_uint64_t
out_offset
;
/*!< the next write position in the
bitmap output file */
ib_uint64_t
start_lsn
;
/*!< the LSN of the next unparsed
record and the start of the next LSN
interval to be parsed. */
ib_uint64_t
end_lsn
;
/*!< the end of the LSN interval to be
parsed, equal to the next checkpoint
LSN at the time of parse */
ib_uint64_t
next_parse_lsn
;
/*!< the LSN of the next unparsed
record in the current parse */
ib_rbt_t
*
modified_pages
;
/*!< the current modified page set,
organized as the RB-tree with the keys
of (space, 4KB-block-start-page-id)
pairs */
ib_rbt_node_t
*
page_free_list
;
/*!< Singly-linked list of freed nodes
of modified_pages tree for later
reuse. Nodes are linked through
ib_rbt_node_t.left as this field has
both the correct type and the tree does
not mind its overwrite during
rbt_next() tree traversal. */
};
/* The log parsing and bitmap output struct instance */
static
struct
log_bitmap_struct
*
log_bmp_sys
;
/* File name stem for modified page bitmaps */
static
const
char
*
modified_page_stem
=
"ib_modified_log."
;
/* On server startup with empty database srv_start_lsn == 0, in
which case the first LSN of actual log records will be this. */
#define MIN_TRACKED_LSN ((LOG_START_LSN) + (LOG_BLOCK_HDR_SIZE))
/* Tests if num bit of bitmap is set */
#define IS_BIT_SET(bitmap, num) \
(*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
/** The bitmap file block size in bytes. All writes will be multiples of this.
*/
enum
{
MODIFIED_PAGE_BLOCK_SIZE
=
4096
};
/** Offsets in a file bitmap block */
enum
{
MODIFIED_PAGE_IS_LAST_BLOCK
=
0
,
/* 1 if last block in the current
write, 0 otherwise. */
MODIFIED_PAGE_START_LSN
=
4
,
/* The starting tracked LSN of this and
other blocks in the same write */
MODIFIED_PAGE_END_LSN
=
12
,
/* The ending tracked LSN of this and
other blocks in the same write */
MODIFIED_PAGE_SPACE_ID
=
20
,
/* The space ID of tracked pages in
this block */
MODIFIED_PAGE_1ST_PAGE_ID
=
24
,
/* The page ID of the first tracked
page in this block */
MODIFIED_PAGE_BLOCK_UNUSED_1
=
28
,
/* Unused in order to align the start
of bitmap at 8 byte boundary */
MODIFIED_PAGE_BLOCK_BITMAP
=
32
,
/* Start of the bitmap itself */
MODIFIED_PAGE_BLOCK_UNUSED_2
=
MODIFIED_PAGE_BLOCK_SIZE
-
8
,
/* Unused in order to align the end of
bitmap at 8 byte boundary */
MODIFIED_PAGE_BLOCK_CHECKSUM
=
MODIFIED_PAGE_BLOCK_SIZE
-
4
/* The checksum of the current block */
};
/** Length of the bitmap data in a block in bytes */
enum
{
MODIFIED_PAGE_BLOCK_BITMAP_LEN
=
MODIFIED_PAGE_BLOCK_UNUSED_2
-
MODIFIED_PAGE_BLOCK_BITMAP
};
/** Length of the bitmap data in a block in page ids */
enum
{
MODIFIED_PAGE_BLOCK_ID_COUNT
=
MODIFIED_PAGE_BLOCK_BITMAP_LEN
*
8
};
/****************************************************************//**
Provide a comparisson function for the RB-tree tree (space,
block_start_page) pairs. Actual implementation does not matter as
long as the ordering is full.
@return -1 if p1 < p2, 0 if p1 == p2, 1 if p1 > p2
*/
static
int
log_online_compare_bmp_keys
(
/*========================*/
const
void
*
p1
,
/*!<in: 1st key to compare */
const
void
*
p2
)
/*!<in: 2nd key to compare */
{
const
byte
*
k1
=
(
const
byte
*
)
p1
;
const
byte
*
k2
=
(
const
byte
*
)
p2
;
ulint
k1_space
=
mach_read_from_4
(
k1
+
MODIFIED_PAGE_SPACE_ID
);
ulint
k2_space
=
mach_read_from_4
(
k2
+
MODIFIED_PAGE_SPACE_ID
);
if
(
k1_space
==
k2_space
)
{
ulint
k1_start_page
=
mach_read_from_4
(
k1
+
MODIFIED_PAGE_1ST_PAGE_ID
);
ulint
k2_start_page
=
mach_read_from_4
(
k2
+
MODIFIED_PAGE_1ST_PAGE_ID
);
return
k1_start_page
<
k2_start_page
?
-
1
:
k1_start_page
>
k2_start_page
?
1
:
0
;
}
return
k1_space
<
k2_space
?
-
1
:
1
;
}
/****************************************************************//**
Set a bit for tracked page in the bitmap. Expand the bitmap tree as
necessary. */
static
void
log_online_set_page_bit
(
/*====================*/
ulint
space
,
/*!<in: log record space id */
ulint
page_no
)
/*!<in: log record page id */
{
ulint
block_start_page
;
ulint
block_pos
;
uint
bit_pos
;
ib_rbt_bound_t
tree_search_pos
;
byte
search_page
[
MODIFIED_PAGE_BLOCK_SIZE
];
byte
*
page_ptr
;
ut_a
(
space
!=
ULINT_UNDEFINED
);
ut_a
(
page_no
!=
ULINT_UNDEFINED
);
block_start_page
=
page_no
/
MODIFIED_PAGE_BLOCK_ID_COUNT
*
MODIFIED_PAGE_BLOCK_ID_COUNT
;
block_pos
=
block_start_page
?
(
page_no
%
block_start_page
/
8
)
:
(
page_no
/
8
);
bit_pos
=
page_no
%
8
;
mach_write_to_4
(
search_page
+
MODIFIED_PAGE_SPACE_ID
,
space
);
mach_write_to_4
(
search_page
+
MODIFIED_PAGE_1ST_PAGE_ID
,
block_start_page
);
if
(
!
rbt_search
(
log_bmp_sys
->
modified_pages
,
&
tree_search_pos
,
search_page
))
{
page_ptr
=
rbt_value
(
byte
,
tree_search_pos
.
last
);
}
else
{
ib_rbt_node_t
*
new_node
;
if
(
log_bmp_sys
->
page_free_list
)
{
new_node
=
log_bmp_sys
->
page_free_list
;
log_bmp_sys
->
page_free_list
=
new_node
->
left
;
}
else
{
new_node
=
ut_malloc
(
SIZEOF_NODE
(
log_bmp_sys
->
modified_pages
));
}
memset
(
new_node
,
0
,
SIZEOF_NODE
(
log_bmp_sys
->
modified_pages
));
page_ptr
=
rbt_value
(
byte
,
new_node
);
mach_write_to_4
(
page_ptr
+
MODIFIED_PAGE_SPACE_ID
,
space
);
mach_write_to_4
(
page_ptr
+
MODIFIED_PAGE_1ST_PAGE_ID
,
block_start_page
);
rbt_add_preallocated_node
(
log_bmp_sys
->
modified_pages
,
&
tree_search_pos
,
new_node
);
}
page_ptr
[
MODIFIED_PAGE_BLOCK_BITMAP
+
block_pos
]
|=
(
1U
<<
bit_pos
);
}
/****************************************************************//**
Calculate a bitmap block checksum. Algorithm borrowed from
log_block_calc_checksum.
@return checksum */
UNIV_INLINE
ulint
log_online_calc_checksum
(
/*=====================*/
const
byte
*
block
)
/*!<in: bitmap block */
{
ulint
sum
;
ulint
sh
;
ulint
i
;
sum
=
1
;
sh
=
0
;
for
(
i
=
0
;
i
<
MODIFIED_PAGE_BLOCK_CHECKSUM
;
i
++
)
{
ulint
b
=
block
[
i
];
sum
&=
0x7FFFFFFFUL
;
sum
+=
b
;
sum
+=
b
<<
sh
;
sh
++
;
if
(
sh
>
24
)
{
sh
=
0
;
}
}
return
sum
;
}
/****************************************************************//**
Get the last tracked fully LSN from the bitmap file by reading
backwards untile a correct end page is found. Detects incomplete
writes and corrupted data. Sets the start output position for the
written bitmap data.
@return the last fully tracked LSN */
static
ib_uint64_t
log_online_read_last_tracked_lsn
()
/*==============================*/
{
byte
page
[
MODIFIED_PAGE_BLOCK_SIZE
];
ib_uint64_t
read_offset
=
log_bmp_sys
->
out_offset
;
/* Initialize these to nonequal values so that file size == 0 case with
zero loop repetitions is handled correctly */
ulint
checksum
=
0
;
ulint
actual_checksum
=
!
checksum
;
ibool
is_last_page
=
FALSE
;
ib_uint64_t
result
;
ut_ad
(
log_bmp_sys
->
out_offset
%
MODIFIED_PAGE_BLOCK_SIZE
==
0
);
while
(
checksum
!=
actual_checksum
&&
read_offset
>
0
&&
!
is_last_page
)
{
ulint
offset_low
,
offset_high
;
ibool
success
;
read_offset
-=
MODIFIED_PAGE_BLOCK_SIZE
;
offset_high
=
(
ulint
)(
read_offset
>>
32
);
offset_low
=
(
ulint
)(
read_offset
&
0xFFFFFFFF
);
success
=
os_file_read
(
log_bmp_sys
->
out
,
page
,
offset_low
,
offset_high
,
MODIFIED_PAGE_BLOCK_SIZE
);
if
(
!
success
)
{
/* The following call prints an error message */
os_file_get_last_error
(
TRUE
);
/* Here and below assume that bitmap file names do not
contain apostrophes, thus no need for
ut_print_filename(). */
fprintf
(
stderr
,
"InnoDB: Warning: failed reading "
"changed page bitmap file
\'
%s
\'\n
"
,
log_bmp_sys
->
out_name
);
return
MIN_TRACKED_LSN
;
}
is_last_page
=
mach_read_from_4
(
page
+
MODIFIED_PAGE_IS_LAST_BLOCK
);
checksum
=
mach_read_from_4
(
page
+
MODIFIED_PAGE_BLOCK_CHECKSUM
);
actual_checksum
=
log_online_calc_checksum
(
page
);
if
(
checksum
!=
actual_checksum
)
{
fprintf
(
stderr
,
"InnoDB: Warning: corruption "
"detected in
\'
%s
\'
at offset %llu
\n
"
,
log_bmp_sys
->
out_name
,
read_offset
);
}
};
if
(
UNIV_LIKELY
(
checksum
==
actual_checksum
&&
is_last_page
))
{
log_bmp_sys
->
out_offset
=
read_offset
+
MODIFIED_PAGE_BLOCK_SIZE
;
result
=
mach_read_from_8
(
page
+
MODIFIED_PAGE_END_LSN
);
}
else
{
log_bmp_sys
->
out_offset
=
read_offset
;
result
=
0
;
}
/* Truncate the output file to discard the corrupted bitmap data, if
any */
if
(
!
os_file_set_eof_at
(
log_bmp_sys
->
out
,
log_bmp_sys
->
out_offset
))
{
fprintf
(
stderr
,
"InnoDB: Warning: failed truncating "
"changed page bitmap file
\'
%s
\'
to %llu bytes
\n
"
,
log_bmp_sys
->
out_name
,
log_bmp_sys
->
out_offset
);
result
=
0
;
}
return
result
;
}
/****************************************************************//**
Safely write the log_sys->tracked_lsn value. Uses atomic operations
if available, otherwise this field is protected with the log system
mutex. The reader counterpart function is log_get_tracked_lsn() in
log0log.c. */
UNIV_INLINE
void
log_set_tracked_lsn
(
/*================*/
ib_uint64_t
tracked_lsn
)
/*!<in: new value */
{
#ifdef HAVE_ATOMIC_BUILTINS_64
/* Single writer, no data race here */
ib_uint64_t
old_value
=
os_atomic_increment_uint64
(
&
log_sys
->
tracked_lsn
,
0
);
(
void
)
os_atomic_increment_uint64
(
&
log_sys
->
tracked_lsn
,
tracked_lsn
-
old_value
);
#else
mutex_enter
(
&
log_sys
->
mutex
);
log_sys
->
tracked_lsn
=
tracked_lsn
;
mutex_exit
(
&
log_sys
->
mutex
);
#endif
}
/****************************************************************//**
Diagnose a gap in tracked LSN range on server startup due to crash or
very fast shutdown and try to close it by tracking the data
immediatelly, if possible. */
static
void
log_online_track_missing_on_startup
(
/*================================*/
ib_uint64_t
last_tracked_lsn
,
/*!<in: last tracked LSN read
from the bitmap file */
ib_uint64_t
tracking_start_lsn
)
/*!<in: last checkpoint LSN of
the current server startup */
{
ut_ad
(
last_tracked_lsn
!=
tracking_start_lsn
);
fprintf
(
stderr
,
"InnoDB: last tracked LSN in
\'
%s
\'
is %llu, but "
"last checkpoint LSN is %llu. This might be due to a server "
"crash or a very fast shutdown. "
,
log_bmp_sys
->
out_name
,
last_tracked_lsn
,
tracking_start_lsn
);
/* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
bitmap file, handle this too. */
last_tracked_lsn
=
ut_max
(
last_tracked_lsn
,
MIN_TRACKED_LSN
);
/* See if we can fully recover the missing interval */
if
(
log_sys
->
lsn
-
last_tracked_lsn
<
log_sys
->
log_group_capacity
)
{
fprintf
(
stderr
,
"Reading the log to advance the last tracked LSN.
\n
"
);
log_bmp_sys
->
start_lsn
=
last_tracked_lsn
;
log_set_tracked_lsn
(
log_bmp_sys
->
start_lsn
);
log_online_follow_redo_log
();
ut_ad
(
log_bmp_sys
->
end_lsn
>=
tracking_start_lsn
);
fprintf
(
stderr
,
"InnoDB: continuing tracking changed pages from LSN "
"%llu
\n
"
,
log_bmp_sys
->
end_lsn
);
}
else
{
fprintf
(
stderr
,
"The age of last tracked LSN exceeds log capacity, "
"tracking-based incremental backups will work only "
"from the higher LSN!
\n
"
);
log_bmp_sys
->
end_lsn
=
log_bmp_sys
->
start_lsn
=
tracking_start_lsn
;
log_set_tracked_lsn
(
log_bmp_sys
->
start_lsn
);
fprintf
(
stderr
,
"InnoDB: starting tracking changed pages from LSN "
"%llu
\n
"
,
log_bmp_sys
->
end_lsn
);
}
}
/*********************************************************************//**
Initialize the online log following subsytem. */
UNIV_INTERN
void
log_online_read_init
()
/*==================*/
{
char
buf
[
FN_REFLEN
];
ibool
success
;
ib_uint64_t
tracking_start_lsn
=
ut_max
(
log_sys
->
last_checkpoint_lsn
,
MIN_TRACKED_LSN
);
/* Assert (could be compile-time assert) that bitmap data start and end
in a bitmap block is 8-byte aligned */
ut_a
(
MODIFIED_PAGE_BLOCK_BITMAP
%
8
==
0
);
ut_a
(
MODIFIED_PAGE_BLOCK_BITMAP_LEN
%
8
==
0
);
log_bmp_sys
=
ut_malloc
(
sizeof
(
*
log_bmp_sys
));
ut_snprintf
(
buf
,
FN_REFLEN
,
"%s%s%d"
,
srv_data_home
,
modified_page_stem
,
1
);
log_bmp_sys
->
out_name
=
ut_malloc
(
strlen
(
buf
)
+
1
);
ut_strcpy
(
log_bmp_sys
->
out_name
,
buf
);
log_bmp_sys
->
modified_pages
=
rbt_create
(
MODIFIED_PAGE_BLOCK_SIZE
,
log_online_compare_bmp_keys
);
log_bmp_sys
->
page_free_list
=
NULL
;
log_bmp_sys
->
out
=
os_file_create_simple_no_error_handling
(
innodb_file_bmp_key
,
log_bmp_sys
->
out_name
,
OS_FILE_OPEN
,
OS_FILE_READ_WRITE
,
&
success
);
if
(
!
success
)
{
/* New file, tracking from scratch */
log_bmp_sys
->
out
=
os_file_create_simple_no_error_handling
(
innodb_file_bmp_key
,
log_bmp_sys
->
out_name
,
OS_FILE_CREATE
,
OS_FILE_READ_WRITE
,
&
success
);
if
(
!
success
)
{
/* The following call prints an error message */
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Error: Cannot create
\'
%s
\'\n
"
,
log_bmp_sys
->
out_name
);
exit
(
1
);
}
log_bmp_sys
->
out_offset
=
0
;
}
else
{
/* Old file, read last tracked LSN and continue from there */
ulint
size_low
;
ulint
size_high
;
ib_uint64_t
last_tracked_lsn
;
success
=
os_file_get_size
(
log_bmp_sys
->
out
,
&
size_low
,
&
size_high
);
ut_a
(
success
);
log_bmp_sys
->
out_offset
=
((
ib_uint64_t
)
size_high
<<
32
)
|
size_low
;
if
(
log_bmp_sys
->
out_offset
%
MODIFIED_PAGE_BLOCK_SIZE
!=
0
)
{
fprintf
(
stderr
,
"InnoDB: Warning: truncated block detected "
"in
\'
%s
\'
at offset %llu
\n
"
,
log_bmp_sys
->
out_name
,
log_bmp_sys
->
out_offset
);
log_bmp_sys
->
out_offset
-=
log_bmp_sys
->
out_offset
%
MODIFIED_PAGE_BLOCK_SIZE
;
}
last_tracked_lsn
=
log_online_read_last_tracked_lsn
();
if
(
last_tracked_lsn
<
tracking_start_lsn
)
{
log_online_track_missing_on_startup
(
last_tracked_lsn
,
tracking_start_lsn
);
return
;
}
if
(
last_tracked_lsn
>
tracking_start_lsn
)
{
fprintf
(
stderr
,
"InnoDB: last tracked LSN in
\'
%s
\'
"
"is %llu, but last checkpoint LSN is %llu. "
"The tracking-based incremental backups will "
"work only from the latter LSN!
\n
"
,
log_bmp_sys
->
out_name
,
last_tracked_lsn
,
tracking_start_lsn
);
}
}
fprintf
(
stderr
,
"InnoDB: starting tracking changed pages from "
"LSN %llu
\n
"
,
tracking_start_lsn
);
log_bmp_sys
->
start_lsn
=
tracking_start_lsn
;
log_set_tracked_lsn
(
tracking_start_lsn
);
}
/*********************************************************************//**
Shut down the online log following subsystem. */
UNIV_INTERN
void
log_online_read_shutdown
()
/*======================*/
{
ib_rbt_node_t
*
free_list_node
=
log_bmp_sys
->
page_free_list
;
os_file_close
(
log_bmp_sys
->
out
);
rbt_free
(
log_bmp_sys
->
modified_pages
);
while
(
free_list_node
)
{
ib_rbt_node_t
*
next
=
free_list_node
->
left
;
ut_free
(
free_list_node
);
free_list_node
=
next
;
}
ut_free
(
log_bmp_sys
->
out_name
);
ut_free
(
log_bmp_sys
);
}
/*********************************************************************//**
For the given minilog record type determine if the record has (space; page)
associated with it.
@return TRUE if the record has (space; page) in it */
static
ibool
log_online_rec_has_page
(
/*====================*/
byte
type
)
/*!<in: the minilog record type */
{
return
type
!=
MLOG_MULTI_REC_END
&&
type
!=
MLOG_DUMMY_RECORD
;
}
/*********************************************************************//**
Check if a page field for a given log record type actually contains a page
id. It does not for file operations and MLOG_LSN.
@return TRUE if page field contains actual page id, FALSE otherwise */
static
ibool
log_online_rec_page_means_page
(
/*===========================*/
byte
type
)
/*!<in: log record type */
{
return
log_online_rec_has_page
(
type
)
#ifdef UNIV_LOG_LSN_DEBUG
&&
type
!=
MLOG_LSN
#endif
&&
type
!=
MLOG_FILE_CREATE
&&
type
!=
MLOG_FILE_RENAME
&&
type
!=
MLOG_FILE_DELETE
&&
type
!=
MLOG_FILE_CREATE2
;
}
/*********************************************************************//**
Parse the log data in the parse buffer for the (space, page) pairs and add
them to the modified page set as necessary. Removes the fully-parsed records
from the buffer. If an incomplete record is found, moves it to the end of the
buffer. */
static
void
log_online_parse_redo_log
()
/*=======================*/
{
byte
*
ptr
=
log_bmp_sys
->
parse_buf
;
byte
*
end
=
log_bmp_sys
->
parse_buf_end
;
ulint
len
=
0
;
while
(
ptr
!=
end
&&
log_bmp_sys
->
next_parse_lsn
<
log_bmp_sys
->
end_lsn
)
{
byte
type
;
ulint
space
;
ulint
page_no
;
byte
*
body
;
/* recv_sys is not initialized, so on corrupt log we will
SIGSEGV. But the log of a live database should not be
corrupt. */
len
=
recv_parse_log_rec
(
ptr
,
end
,
&
type
,
&
space
,
&
page_no
,
&
body
);
if
(
len
>
0
)
{
if
(
log_online_rec_page_means_page
(
type
)
&&
(
space
!=
TRX_DOUBLEWRITE_SPACE
))
{
ut_a
(
len
>=
3
);
log_online_set_page_bit
(
space
,
page_no
);
}
ptr
+=
len
;
ut_ad
(
ptr
<=
end
);
log_bmp_sys
->
next_parse_lsn
=
recv_calc_lsn_on_data_add
(
log_bmp_sys
->
next_parse_lsn
,
len
);
}
else
{
/* Incomplete log record. Shift it to the
beginning of the parse buffer and leave it to be
completed on the next read. */
ut_memmove
(
log_bmp_sys
->
parse_buf
,
ptr
,
end
-
ptr
);
log_bmp_sys
->
parse_buf_end
=
log_bmp_sys
->
parse_buf
+
(
end
-
ptr
);
ptr
=
end
;
}
}
if
(
len
>
0
)
{
log_bmp_sys
->
parse_buf_end
=
log_bmp_sys
->
parse_buf
;
}
}
/*********************************************************************//**
Check the log block checksum.
@return TRUE if the log block checksum is OK, FALSE otherwise. */
static
ibool
log_online_is_valid_log_seg
(
/*========================*/
const
byte
*
log_block
)
/*!< in: read log data */
{
ibool
checksum_is_ok
=
log_block_checksum_is_ok_or_old_format
(
log_block
);
if
(
!
checksum_is_ok
)
{
fprintf
(
stderr
,
"InnoDB Error: log block checksum mismatch"
"expected %lu, calculated checksum %lu
\n
"
,
(
ulong
)
log_block_get_checksum
(
log_block
),
(
ulong
)
log_block_calc_checksum
(
log_block
));
}
return
checksum_is_ok
;
}
/*********************************************************************//**
Copy new log data to the parse buffer while skipping log block header,
trailer and already parsed data. */
static
void
log_online_add_to_parse_buf
(
/*========================*/
const
byte
*
log_block
,
/*!< in: read log data */
ulint
data_len
,
/*!< in: length of read log data */
ulint
skip_len
)
/*!< in: how much of log data to
skip */
{
ulint
start_offset
=
skip_len
?
skip_len
:
LOG_BLOCK_HDR_SIZE
;
ulint
end_offset
=
(
data_len
==
OS_FILE_LOG_BLOCK_SIZE
)
?
data_len
-
LOG_BLOCK_TRL_SIZE
:
data_len
;
ulint
actual_data_len
=
(
end_offset
>=
start_offset
)
?
end_offset
-
start_offset
:
0
;
ut_memcpy
(
log_bmp_sys
->
parse_buf_end
,
log_block
+
start_offset
,
actual_data_len
);
log_bmp_sys
->
parse_buf_end
+=
actual_data_len
;
ut_a
(
log_bmp_sys
->
parse_buf_end
-
log_bmp_sys
->
parse_buf
<=
RECV_PARSING_BUF_SIZE
);
}
/*********************************************************************//**
Parse the log block: first copies the read log data to the parse buffer while
skipping log block header, trailer and already parsed data. Then it actually
parses the log to add to the modified page bitmap. */
static
void
log_online_parse_redo_log_block
(
/*============================*/
const
byte
*
log_block
,
/*!< in: read log data */
ulint
skip_already_parsed_len
)
/*!< in: how many bytes of
log data should be skipped as
they were parsed before */
{
ulint
block_data_len
;
block_data_len
=
log_block_get_data_len
(
log_block
);
ut_ad
(
block_data_len
%
OS_FILE_LOG_BLOCK_SIZE
==
0
||
block_data_len
<
OS_FILE_LOG_BLOCK_SIZE
);
log_online_add_to_parse_buf
(
log_block
,
block_data_len
,
skip_already_parsed_len
);
log_online_parse_redo_log
();
}
/*********************************************************************//**
Read and parse one redo log chunk and updates the modified page bitmap. */
static
void
log_online_follow_log_seg
(
/*======================*/
log_group_t
*
group
,
/*!< in: the log group to use */
ib_uint64_t
block_start_lsn
,
/*!< in: the LSN to read from */
ib_uint64_t
block_end_lsn
)
/*!< in: the LSN to read to */
{
/* Pointer to the current OS_FILE_LOG_BLOCK-sized chunk of the read log
data to parse */
byte
*
log_block
=
log_bmp_sys
->
read_buf
;
byte
*
log_block_end
=
log_bmp_sys
->
read_buf
+
(
block_end_lsn
-
block_start_lsn
);
mutex_enter
(
&
log_sys
->
mutex
);
log_group_read_log_seg
(
LOG_RECOVER
,
log_bmp_sys
->
read_buf
,
group
,
block_start_lsn
,
block_end_lsn
);
mutex_exit
(
&
log_sys
->
mutex
);
while
(
log_block
<
log_block_end
&&
log_bmp_sys
->
next_parse_lsn
<
log_bmp_sys
->
end_lsn
)
{
/* How many bytes of log data should we skip in the current log
block. Skipping is necessary because we round down the next
parse LSN thus it is possible to read the already-processed log
data many times */
ulint
skip_already_parsed_len
=
0
;
if
(
!
log_online_is_valid_log_seg
(
log_block
))
{
break
;
}
if
((
block_start_lsn
<=
log_bmp_sys
->
next_parse_lsn
)
&&
(
block_start_lsn
+
OS_FILE_LOG_BLOCK_SIZE
>
log_bmp_sys
->
next_parse_lsn
))
{
/* The next parse LSN is inside the current block, skip
data preceding it. */
skip_already_parsed_len
=
log_bmp_sys
->
next_parse_lsn
-
block_start_lsn
;
}
else
{
/* If the next parse LSN is not inside the current
block, then the only option is that we have processed
ahead already. */
ut_a
(
block_start_lsn
>
log_bmp_sys
->
next_parse_lsn
);
}
/* TODO: merge the copying to the parse buf code with
skip_already_len calculations */
log_online_parse_redo_log_block
(
log_block
,
skip_already_parsed_len
);
log_block
+=
OS_FILE_LOG_BLOCK_SIZE
;
block_start_lsn
+=
OS_FILE_LOG_BLOCK_SIZE
;
}
return
;
}
/*********************************************************************//**
Read and parse the redo log in a given group in FOLLOW_SCAN_SIZE-sized
chunks and updates the modified page bitmap. */
static
void
log_online_follow_log_group
(
/*========================*/
log_group_t
*
group
,
/*!< in: the log group to use */
ib_uint64_t
contiguous_lsn
)
/*!< in: the LSN of log block start
containing the log_parse_start_lsn */
{
ib_uint64_t
block_start_lsn
=
contiguous_lsn
;
ib_uint64_t
block_end_lsn
;
log_bmp_sys
->
next_parse_lsn
=
log_bmp_sys
->
start_lsn
;
log_bmp_sys
->
parse_buf_end
=
log_bmp_sys
->
parse_buf
;
do
{
block_end_lsn
=
block_start_lsn
+
FOLLOW_SCAN_SIZE
;
log_online_follow_log_seg
(
group
,
block_start_lsn
,
block_end_lsn
);
/* Next parse LSN can become higher than the last read LSN
only in the case when the read LSN falls right on the block
boundary, in which case next parse lsn is bumped to the actual
data LSN on the next (not yet read) block. This assert is
slightly conservative. */
ut_a
(
log_bmp_sys
->
next_parse_lsn
<=
block_end_lsn
+
LOG_BLOCK_HDR_SIZE
+
LOG_BLOCK_TRL_SIZE
);
block_start_lsn
=
block_end_lsn
;
}
while
(
block_end_lsn
<
log_bmp_sys
->
end_lsn
);
/* Assert that the last read log record is a full one */
ut_a
(
log_bmp_sys
->
parse_buf_end
==
log_bmp_sys
->
parse_buf
);
}
/*********************************************************************//**
Write, flush one bitmap block to disk and advance the output position if
successful. */
static
void
log_online_write_bitmap_page
(
/*=========================*/
const
byte
*
block
)
/*!< in: block to write */
{
ibool
success
;
success
=
os_file_write
(
log_bmp_sys
->
out_name
,
log_bmp_sys
->
out
,
block
,
(
ulint
)(
log_bmp_sys
->
out_offset
&
0xFFFFFFFF
),
(
ulint
)(
log_bmp_sys
->
out_offset
<<
32
),
MODIFIED_PAGE_BLOCK_SIZE
);
if
(
UNIV_UNLIKELY
(
!
success
))
{
/* The following call prints an error message */
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Error: failed writing changed page "
"bitmap file
\'
%s
\'\n
"
,
log_bmp_sys
->
out_name
);
return
;
}
success
=
os_file_flush
(
log_bmp_sys
->
out
,
FALSE
);
if
(
UNIV_UNLIKELY
(
!
success
))
{
/* The following call prints an error message */
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Error: failed flushing "
"changed page bitmap file
\'
%s
\'\n
"
,
log_bmp_sys
->
out_name
);
return
;
}
log_bmp_sys
->
out_offset
+=
MODIFIED_PAGE_BLOCK_SIZE
;
}
/*********************************************************************//**
Append the current changed page bitmap to the bitmap file. Clears the
bitmap tree and recycles its nodes to the free list. */
static
void
log_online_write_bitmap
()
/*=====================*/
{
ib_rbt_node_t
*
bmp_tree_node
;
const
ib_rbt_node_t
*
last_bmp_tree_node
;
bmp_tree_node
=
(
ib_rbt_node_t
*
)
rbt_first
(
log_bmp_sys
->
modified_pages
);
last_bmp_tree_node
=
rbt_last
(
log_bmp_sys
->
modified_pages
);
while
(
bmp_tree_node
)
{
byte
*
page
=
rbt_value
(
byte
,
bmp_tree_node
);
if
(
bmp_tree_node
==
last_bmp_tree_node
)
{
mach_write_to_4
(
page
+
MODIFIED_PAGE_IS_LAST_BLOCK
,
1
);
}
mach_write_to_8
(
page
+
MODIFIED_PAGE_START_LSN
,
log_bmp_sys
->
start_lsn
);
mach_write_to_8
(
page
+
MODIFIED_PAGE_END_LSN
,
log_bmp_sys
->
end_lsn
);
mach_write_to_4
(
page
+
MODIFIED_PAGE_BLOCK_CHECKSUM
,
log_online_calc_checksum
(
page
));
log_online_write_bitmap_page
(
page
);
bmp_tree_node
->
left
=
log_bmp_sys
->
page_free_list
;
log_bmp_sys
->
page_free_list
=
bmp_tree_node
;
bmp_tree_node
=
(
ib_rbt_node_t
*
)
rbt_next
(
log_bmp_sys
->
modified_pages
,
bmp_tree_node
);
}
rbt_reset
(
log_bmp_sys
->
modified_pages
);
}
/*********************************************************************//**
Read and parse the redo log up to last checkpoint LSN to build the changed
page bitmap which is then written to disk. */
UNIV_INTERN
void
log_online_follow_redo_log
()
/*========================*/
{
ib_uint64_t
contiguous_start_lsn
;
log_group_t
*
group
;
/* Grab the LSN of the last checkpoint, we will parse up to it */
mutex_enter
(
&
(
log_sys
->
mutex
));
log_bmp_sys
->
end_lsn
=
log_sys
->
last_checkpoint_lsn
;
mutex_exit
(
&
(
log_sys
->
mutex
));
if
(
log_bmp_sys
->
end_lsn
==
log_bmp_sys
->
start_lsn
)
{
return
;
}
group
=
UT_LIST_GET_FIRST
(
log_sys
->
log_groups
);
ut_a
(
group
);
contiguous_start_lsn
=
ut_uint64_align_down
(
log_bmp_sys
->
start_lsn
,
OS_FILE_LOG_BLOCK_SIZE
);
while
(
group
)
{
log_online_follow_log_group
(
group
,
contiguous_start_lsn
);
group
=
UT_LIST_GET_NEXT
(
log_groups
,
group
);
}
/* A crash injection site that ensures last checkpoint LSN > last
tracked LSN, so that LSN tracking for this interval is tested. */
DBUG_EXECUTE_IF
(
"crash_before_bitmap_write"
,
DBUG_SUICIDE
(););
log_online_write_bitmap
();
log_bmp_sys
->
start_lsn
=
log_bmp_sys
->
end_lsn
;
log_set_tracked_lsn
(
log_bmp_sys
->
start_lsn
);
}
/*********************************************************************//**
Initializes log bitmap iterator.
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init
(
/*============================*/
log_bitmap_iterator_t
*
i
)
/*!<in/out: iterator */
{
ibool
success
;
ut_a
(
i
);
ut_snprintf
(
i
->
in_name
,
FN_REFLEN
,
"%s%s%d"
,
srv_data_home
,
modified_page_stem
,
1
);
i
->
in_offset
=
0
;
/*
Set up bit offset out of the reasonable limit
to intiate reading block from file in
log_online_bitmap_iterator_next()
*/
i
->
bit_offset
=
MODIFIED_PAGE_BLOCK_BITMAP_LEN
;
i
->
in
=
os_file_create_simple_no_error_handling
(
innodb_file_bmp_key
,
i
->
in_name
,
OS_FILE_OPEN
,
OS_FILE_READ_ONLY
,
&
success
);
if
(
!
success
)
{
/* The following call prints an error message */
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Error: Cannot open
\'
%s
\'\n
"
,
i
->
in_name
);
return
FALSE
;
}
i
->
page
=
ut_malloc
(
MODIFIED_PAGE_BLOCK_SIZE
);
i
->
start_lsn
=
i
->
end_lsn
=
0
;
i
->
space_id
=
0
;
i
->
first_page_id
=
0
;
i
->
changed
=
FALSE
;
return
TRUE
;
}
/*********************************************************************//**
Releases log bitmap iterator. */
UNIV_INTERN
void
log_online_bitmap_iterator_release
(
/*===============================*/
log_bitmap_iterator_t
*
i
)
/*!<in/out: iterator */
{
ut_a
(
i
);
os_file_close
(
i
->
in
);
ut_free
(
i
->
page
);
}
/*********************************************************************//**
Iterates through bits of saved bitmap blocks.
Sequentially reads blocks from bitmap file(s) and interates through
their bits. Ignores blocks with wrong checksum.
@return TRUE if iteration is successful, FALSE if all bits are iterated. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_next
(
/*============================*/
log_bitmap_iterator_t
*
i
)
/*!<in/out: iterator */
{
ulint
offset_low
;
ulint
offset_high
;
ulint
size_low
;
ulint
size_high
;
ulint
checksum
=
0
;
ulint
actual_checksum
=
!
checksum
;
ibool
success
;
ut_a
(
i
);
if
(
i
->
bit_offset
<
MODIFIED_PAGE_BLOCK_BITMAP_LEN
)
{
++
i
->
bit_offset
;
i
->
changed
=
IS_BIT_SET
(
i
->
page
+
MODIFIED_PAGE_BLOCK_BITMAP
,
i
->
bit_offset
);
return
TRUE
;
}
while
(
checksum
!=
actual_checksum
)
{
success
=
os_file_get_size
(
i
->
in
,
&
size_low
,
&
size_high
);
if
(
!
success
)
{
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Warning: can't get size of "
"page bitmap file
\'
%s
\'\n
"
,
i
->
in_name
);
return
FALSE
;
}
if
(
i
->
in_offset
>=
(
ib_uint64_t
)(
size_low
)
+
((
ib_uint64_t
)(
size_high
)
<<
32
))
return
FALSE
;
offset_high
=
(
ulint
)(
i
->
in_offset
>>
32
);
offset_low
=
(
ulint
)(
i
->
in_offset
&
0xFFFFFFFF
);
success
=
os_file_read
(
i
->
in
,
i
->
page
,
offset_low
,
offset_high
,
MODIFIED_PAGE_BLOCK_SIZE
);
if
(
!
success
)
{
os_file_get_last_error
(
TRUE
);
fprintf
(
stderr
,
"InnoDB: Warning: failed reading "
"changed page bitmap file
\'
%s
\'\n
"
,
i
->
in_name
);
return
FALSE
;
}
checksum
=
mach_read_from_4
(
i
->
page
+
MODIFIED_PAGE_BLOCK_CHECKSUM
);
actual_checksum
=
log_online_calc_checksum
(
i
->
page
);
i
->
in_offset
+=
MODIFIED_PAGE_BLOCK_SIZE
;
}
i
->
start_lsn
=
mach_read_from_8
(
i
->
page
+
MODIFIED_PAGE_START_LSN
);
i
->
end_lsn
=
mach_read_from_8
(
i
->
page
+
MODIFIED_PAGE_END_LSN
);
i
->
space_id
=
mach_read_from_4
(
i
->
page
+
MODIFIED_PAGE_SPACE_ID
);
i
->
first_page_id
=
mach_read_from_4
(
i
->
page
+
MODIFIED_PAGE_1ST_PAGE_ID
);
i
->
bit_offset
=
0
;
i
->
changed
=
IS_BIT_SET
(
i
->
page
+
MODIFIED_PAGE_BLOCK_BITMAP
,
i
->
bit_offset
);
return
TRUE
;
}
log/log0recv.c
View file @
96d3a797
...
...
@@ -850,7 +850,7 @@ block. We also accept a log block in the old format before
InnoDB-3.23.52 where the checksum field contains the log block number.
@return TRUE if ok, or if the log block may be in the format of InnoDB
version predating 3.23.52 */
static
UNIV_INTERN
ibool
log_block_checksum_is_ok_or_old_format
(
/*===================================*/
...
...
@@ -2095,7 +2095,7 @@ recv_apply_log_recs_for_backup(void)
/*******************************************************************//**
Tries to parse a single log record and returns its length.
@return length of the record, or 0 if the record was not complete */
static
UNIV_INTERN
ulint
recv_parse_log_rec
(
/*===============*/
...
...
@@ -2166,7 +2166,7 @@ recv_parse_log_rec(
/*******************************************************//**
Calculates the new value for lsn when more data is added to the log. */
static
UNIV_INTERN
ib_uint64_t
recv_calc_lsn_on_data_add
(
/*======================*/
...
...
@@ -3562,6 +3562,8 @@ recv_reset_logs(
log_sys
->
archived_lsn
=
log_sys
->
lsn
;
#endif
/* UNIV_LOG_ARCHIVE */
log_sys
->
tracked_lsn
=
log_sys
->
lsn
;
log_block_init
(
log_sys
->
buf
,
log_sys
->
lsn
);
log_block_set_first_rec_group
(
log_sys
->
buf
,
LOG_BLOCK_HDR_SIZE
);
...
...
os/os0file.c
View file @
96d3a797
...
...
@@ -154,6 +154,7 @@ UNIV_INTERN ibool os_aio_print_debug = FALSE;
UNIV_INTERN
mysql_pfs_key_t
innodb_file_data_key
;
UNIV_INTERN
mysql_pfs_key_t
innodb_file_log_key
;
UNIV_INTERN
mysql_pfs_key_t
innodb_file_temp_key
;
UNIV_INTERN
mysql_pfs_key_t
innodb_file_bmp_key
;
#endif
/* UNIV_PFS_IO */
/** The asynchronous i/o array slot structure */
...
...
@@ -2046,6 +2047,25 @@ os_file_set_eof(
#endif
/* __WIN__ */
}
/***********************************************************************//**
Truncates a file at the specified position.
@return TRUE if success */
UNIV_INTERN
ibool
os_file_set_eof_at
(
os_file_t
file
,
/*!< in: handle to a file */
ib_uint64_t
new_len
)
/*!< in: new file length */
{
#ifdef __WIN__
/* TODO: untested! */
return
(
!
_chsize_s
(
file
,
new_len
));
#else
/* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */
return
(
!
ftruncate
(
file
,
new_len
));
#endif
}
#ifndef __WIN__
/***********************************************************************//**
Wrapper to fsync(2) that retries the call on some errors.
...
...
srv/srv0srv.c
View file @
96d3a797
...
...
@@ -67,6 +67,7 @@ Created 10/8/1995 Heikki Tuuri
#include "mem0pool.h"
#include "sync0sync.h"
#include "que0que.h"
#include "log0online.h"
#include "log0recv.h"
#include "pars0pars.h"
#include "usr0sess.h"
...
...
@@ -176,6 +177,10 @@ UNIV_INTERN char* srv_doublewrite_file = NULL;
UNIV_INTERN
ibool
srv_recovery_stats
=
FALSE
;
UNIV_INTERN
my_bool
srv_track_changed_pages
=
TRUE
;
UNIV_INTERN
ulonglong
srv_changed_pages_limit
=
0
;
/* if TRUE, then we auto-extend the last data file */
UNIV_INTERN
ibool
srv_auto_extend_last_data_file
=
FALSE
;
/* if != 0, this tells the max size auto-extending may increase the
...
...
@@ -769,6 +774,10 @@ UNIV_INTERN os_event_t srv_error_event;
UNIV_INTERN
os_event_t
srv_lock_timeout_thread_event
;
UNIV_INTERN
os_event_t
srv_checkpoint_completed_event
;
UNIV_INTERN
os_event_t
srv_redo_log_thread_finished_event
;
UNIV_INTERN
srv_sys_t
*
srv_sys
=
NULL
;
/* padding to prevent other memory update hotspots from residing on
...
...
@@ -1107,6 +1116,9 @@ srv_init(void)
srv_lock_timeout_thread_event
=
os_event_create
(
NULL
);
srv_checkpoint_completed_event
=
os_event_create
(
NULL
);
srv_redo_log_thread_finished_event
=
os_event_create
(
NULL
);
for
(
i
=
0
;
i
<
SRV_MASTER
+
1
;
i
++
)
{
srv_n_threads_active
[
i
]
=
0
;
srv_n_threads
[
i
]
=
0
;
...
...
@@ -3031,6 +3043,46 @@ srv_shutdown_print_master_pending(
}
}
/******************************************************************//**
A thread which follows the redo log and outputs the changed page bitmap.
@return a dummy value */
os_thread_ret_t
srv_redo_log_follow_thread
(
/*=======================*/
void
*
arg
__attribute__
((
unused
)))
/*!< in: a dummy parameter
required by
os_thread_create */
{
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf
(
stderr
,
"Redo log follower thread starts, id %lu
\n
"
,
os_thread_pf
(
os_thread_get_curr_id
()));
#endif
#ifdef UNIV_PFS_THREAD
pfs_register_thread
(
srv_log_tracking_thread_key
);
#endif
my_thread_init
();
do
{
os_event_wait
(
srv_checkpoint_completed_event
);
os_event_reset
(
srv_checkpoint_completed_event
);
if
(
srv_shutdown_state
<
SRV_SHUTDOWN_LAST_PHASE
)
{
log_online_follow_redo_log
();
}
}
while
(
srv_shutdown_state
<
SRV_SHUTDOWN_LAST_PHASE
);
log_online_read_shutdown
();
os_event_set
(
srv_redo_log_thread_finished_event
);
my_thread_end
();
os_thread_exit
(
NULL
);
OS_THREAD_DUMMY_RETURN
;
}
/*******************************************************************//**
Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
...
...
srv/srv0start.c
View file @
96d3a797
...
...
@@ -51,6 +51,7 @@ Created 2/16/1996 Heikki Tuuri
#include "rem0rec.h"
#include "mtr0mtr.h"
#include "log0log.h"
#include "log0online.h"
#include "log0recv.h"
#include "page0page.h"
#include "page0cur.h"
...
...
@@ -121,9 +122,9 @@ UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
static
os_file_t
files
[
1000
];
/** io_handler_thread parameters for thread identification */
static
ulint
n
[
SRV_MAX_N_IO_THREADS
+
7
];
static
ulint
n
[
SRV_MAX_N_IO_THREADS
+
8
];
/** io_handler_thread identifiers */
static
os_thread_id_t
thread_ids
[
SRV_MAX_N_IO_THREADS
+
7
];
static
os_thread_id_t
thread_ids
[
SRV_MAX_N_IO_THREADS
+
8
];
/** We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */
...
...
@@ -145,6 +146,7 @@ UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
UNIV_INTERN
mysql_pfs_key_t
srv_monitor_thread_key
;
UNIV_INTERN
mysql_pfs_key_t
srv_master_thread_key
;
UNIV_INTERN
mysql_pfs_key_t
srv_purge_thread_key
;
UNIV_INTERN
mysql_pfs_key_t
srv_log_tracking_thread_key
;
#endif
/* UNIV_PFS_THREAD */
/*********************************************************************//**
...
...
@@ -2034,6 +2036,19 @@ innobase_start_or_create_for_mysql(void)
if
(
srv_auto_lru_dump
&&
srv_blocking_lru_restore
)
buf_LRU_file_restore
();
if
(
srv_track_changed_pages
)
{
/* Initialize the log tracking subsystem here to block
server startup until it's completed due to the potential
need to re-read previous server run's log. */
log_online_read_init
();
/* Create the thread that follows the redo log to output the
changed page bitmap */
os_thread_create
(
&
srv_redo_log_follow_thread
,
NULL
,
thread_ids
+
6
+
SRV_MAX_N_IO_THREADS
);
}
srv_is_being_started
=
FALSE
;
err
=
dict_create_or_check_foreign_constraint_tables
();
...
...
ut/ut0rbt.c
View file @
96d3a797
...
...
@@ -55,7 +55,6 @@ red-black properties:
#endif
#define ROOT(t) (t->root->left)
#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
/**********************************************************************//**
Print out the sub-tree recursively. */
...
...
@@ -834,6 +833,21 @@ rbt_add_node(
node
=
(
ib_rbt_node_t
*
)
ut_malloc
(
SIZEOF_NODE
(
tree
));
memcpy
(
node
->
value
,
value
,
tree
->
sizeof_value
);
return
(
rbt_add_preallocated_node
(
tree
,
parent
,
node
));
}
/****************************************************************//**
Add a new caller-provided node to tree at the specified position.
The node must have its key fields initialized correctly.
@return added node */
UNIV_INTERN
const
ib_rbt_node_t
*
rbt_add_preallocated_node
(
/*======================*/
ib_rbt_t
*
tree
,
/*!< in: rb tree */
ib_rbt_bound_t
*
parent
,
/*!< in: parent */
ib_rbt_node_t
*
node
)
/*!< in: node */
{
node
->
parent
=
node
->
left
=
node
->
right
=
tree
->
nil
;
/* If tree is empty */
...
...
@@ -842,7 +856,7 @@ rbt_add_node(
}
/* Append the node, the hope here is that the caller knows
what s/he is doing. */
what s/he is doing. */
rbt_tree_add_child
(
tree
,
parent
,
node
);
rbt_balance_tree
(
tree
,
node
);
...
...
@@ -854,6 +868,7 @@ rbt_add_node(
return
(
node
);
}
/**********************************************************************//**
Find a matching node in the rb tree.
@return NULL if not found else the node where key was found */
...
...
@@ -1142,7 +1157,17 @@ rbt_clear(
ib_rbt_t
*
tree
)
/*!< in: rb tree */
{
rbt_free_node
(
ROOT
(
tree
),
tree
->
nil
);
rbt_reset
(
tree
);
}
/****************************************************************//**
Clear the tree without deleting and freeing its nodes. */
UNIV_INTERN
void
rbt_reset
(
/*======*/
ib_rbt_t
*
tree
)
/*!< in: rb tree */
{
tree
->
n_nodes
=
0
;
tree
->
root
->
left
=
tree
->
root
->
right
=
tree
->
nil
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment