Commit a5cf3a80 authored by Jan Lindström's avatar Jan Lindström

Merged latest mt-flush code to xtradb. Cleaned up thread statistic output

code.
parent 18353c6a
......@@ -116,18 +116,13 @@ typedef struct wrk_itm
/* Thread syncronization data */
typedef struct thread_sync
{
ulint n_threads; /*!< Number of threads */
os_thread_id_t wthread_id; /*!< Identifier */
os_thread_t wthread; /*!< Thread id */
ib_wqueue_t *wq; /*!< Work Queue */
ib_wqueue_t *wr_cq; /*!< Write Completion Queue */
ib_wqueue_t *rd_cq; /*!< Read Completion Queue */
wthr_status_t wt_status; /*!< Worker thread status */
ulint stat_universal_num_processed;
/*!< Total number of pages
processed by this thread */
ulint stat_cycle_num_processed;
/*!< Number of pages processed
on this cycle */
mem_heap_t* wheap; /*!< Work heap where memory
is allocated */
wrk_t* work_item; /*!< Work items to be processed */
......@@ -231,6 +226,7 @@ buf_mtflu_flush_pool_instance(
work_item->wr.min,
work_item->wr.lsn_limit);
buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
......@@ -239,28 +235,29 @@ buf_mtflu_flush_pool_instance(
#ifdef UNIV_DEBUG
/******************************************************************//**
Output work item list status,
Print flush statistics of work items.
*/
static
void
mtflu_print_work_list(
/*==================*/
wrk_t* wi_list) /*!< in: Work item list */
mtflu_print_thread_stat(
/*====================*/
wrk_t* work_item) /*!< in: Work items */
{
wrk_t* wi = wi_list;
ulint stat_tot=0;
ulint i=0;
if(!wi_list) {
fprintf(stderr, "list NULL\n");
}
for(i=0; i< MTFLUSH_MAX_WORKER; i++) {
stat_tot+=work_item[i].n_flushed;
while(wi) {
fprintf(stderr, "-\t[%p]\t[%s]\t[%lu] > %p\n",
wi, (wi->id_usr == -1)?"free":"Busy", wi->n_flushed, wi->next);
wi = wi->next;
i++;
fprintf(stderr, "MTFLUSH: Thread[%lu] stat [%lu]\n",
work_item[i].id_usr,
work_item[i].n_flushed);
if (work_item[i].next == NULL) {
break; /* No more filled work items */
}
}
fprintf(stderr, "list len: %d\n", i);
fprintf(stderr, "MTFLUSH: Stat-Total:%lu\n", stat_tot);
}
#endif /* UNIV_DEBUG */
......@@ -282,10 +279,6 @@ mtflush_service_io(
mtflush_io->wt_status = WTHR_SIG_WAITING;
work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
#ifdef UNIV_DEBUG
mtflu_print_work_list(mtflush_io->work_item);
#endif
if (work_item) {
mtflush_io->wt_status = WTHR_RUNNING;
} else {
......@@ -345,10 +338,28 @@ DECLARE_THREAD(mtflush_io_thread)(
void * arg)
{
thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
#ifdef UNIV_DEBUG
ib_uint64_t stat_universal_num_processed = 0;
ib_uint64_t stat_cycle_num_processed = 0;
wrk_t* work_item = mtflush_io[0].work_item;
ulint i;
#endif
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
mtflush_service_io(mtflush_io);
mtflush_io->stat_cycle_num_processed = 0;
#ifdef UNIV_DEBUG
for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
stat_cycle_num_processed+= work_item[i].n_flushed;
}
stat_universal_num_processed+=stat_cycle_num_processed;
stat_cycle_num_processed = 0;
fprintf(stderr, "MTFLUSH_IO_THREAD: total %lu cycle %lu\n",
stat_universal_num_processed,
stat_cycle_num_processed);
mtflu_print_thread_stat(work_item);
#endif
}
/* This should make sure that all current work items are
......@@ -458,13 +469,16 @@ buf_mtflu_handler_init(
work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
MTFLUSH_MAX_WORKER * sizeof(wrk_t));
ut_a(work_items != NULL);
memset(work_items, 0, sizeof(wrk_t) * MTFLUSH_MAX_WORKER);
memset(mtflush_ctx, 0, sizeof(thread_sync_t) * MTFLUSH_MAX_WORKER);
/* Initialize work items */
mtflu_setup_work_items(work_items, MTFLUSH_MAX_WORKER);
mtflu_setup_work_items(work_items, n_threads);
/* Create threads for page-compression-flush */
for(i=0; i < n_threads; i++) {
os_thread_id_t new_thread_id;
mtflush_ctx[i].n_threads = n_threads;
mtflush_ctx[i].wq = mtflush_work_queue;
mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
......@@ -531,19 +545,16 @@ buf_mtflu_flush_work_items(
per_pool_pages_flushed[i] = done_wi->n_flushed;
}
if(done_wi->id_usr == -1 &&
if((int)done_wi->id_usr == -1 &&
done_wi->wi_status == WRK_ITEM_SET ) {
fprintf(stderr,
"**Set/Unused work_item[%d] flush_type=%lu\n",
"**Set/Unused work_item[%lu] flush_type=%lu\n",
i,
done_wi->wr.flush_type);
ut_a(0);
}
n_flushed+= done_wi->n_flushed;
/* Reset for next round*/
mtflush_ctx->work_item[i].id_usr = -1;
i++;
}
}
......@@ -551,47 +562,6 @@ buf_mtflu_flush_work_items(
return(n_flushed);
}
/*******************************************************************//**
Flushes dirty blocks from the end of the LRU list and also
puts replaceable clean pages from the end of the LRU list to the free
list.
NOTE: The calling thread is not allowed to own any latches on pages!
@return true if a batch was queued successfully. false if another batch
of same type was already running. */
bool
buf_mtflu_flush_LRU(
/*================*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
ulint* n_processed) /*!< out: the number of pages
which were processed is passed
back to caller. Ignored if NULL */
{
ulint page_count;
if (n_processed) {
*n_processed = 0;
}
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
return(false);
}
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
buf_flush_common(BUF_FLUSH_LRU, page_count);
if (n_processed) {
*n_processed = page_count;
}
return(true);
}
/*******************************************************************//**
Multi-threaded version of buf_flush_list
*/
......
......@@ -284,8 +284,7 @@ SET(INNOBASE_SOURCES
buf/buf0flu.cc
buf/buf0lru.cc
buf/buf0rea.cc
# TODO: JAN uncomment
# buf/buf0mtflu.cc
buf/buf0mtflu.cc
data/data0data.cc
data/data0type.cc
dict/dict0boot.cc
......
......@@ -32,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
#include "buf0mtflu.h"
#include "buf0checksum.h"
#include "srv0start.h"
#include "srv0srv.h"
......@@ -1949,47 +1950,6 @@ void buf_pool_exit_LRU_mutex(
mutex_exit(&buf_pool->LRU_list_mutex);
}
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list and also
puts replaceable clean pages from the end of the LRU list to the free
list.
NOTE: The calling thread is not allowed to own any latches on pages!
@return true if a batch was queued successfully. false if another batch
of same type was already running. */
static
bool
pgcomp_buf_flush_LRU(
/*==========*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
ulint* n_processed) /*!< out: the number of pages
which were processed is passed
back to caller. Ignored if NULL */
{
flush_counters_t n;
if (n_processed) {
*n_processed = 0;
}
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
return(false);
}
buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0, false, &n);
buf_flush_end(buf_pool, BUF_FLUSH_LRU);
buf_flush_common(BUF_FLUSH_LRU, n.flushed);
if (n_processed) {
*n_processed = n.flushed;
}
return(true);
}
/* JAN: TODO: END: */
/*******************************************************************//**
......@@ -2029,126 +1989,6 @@ buf_flush_LRU(
return(true);
}
/* JAN: TODO: */
/*******************************************************************//**/
extern int is_pgcomp_wrk_init_done(void);
extern int pgcomp_flush_work_items(
int buf_pool_inst,
int *pages_flushed,
buf_flush_t flush_type,
int min_n,
lsn_t lsn_limit);
#define MT_COMP_WATER_MARK 50
#ifdef UNIV_DEBUG
#include <time.h>
int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
{
if (g_time->tv_usec < s_time->tv_usec)
{
int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
s_time->tv_usec -= 1000000 * nsec;
s_time->tv_sec += nsec;
}
if (g_time->tv_usec - s_time->tv_usec > 1000000)
{
int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
s_time->tv_usec += 1000000 * nsec;
s_time->tv_sec -= nsec;
}
d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
return 0;
}
#endif
static os_fast_mutex_t pgcomp_mtx;
void pgcomp_init(void)
{
os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
}
void pgcomp_deinit(void)
{
os_fast_mutex_free(&pgcomp_mtx);
}
/*******************************************************************//**
Multi-threaded version of buf_flush_list
*/
UNIV_INTERN
bool
pgcomp_buf_flush_list(
/*==================*/
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
ulint* n_processed) /*!< out: the number of pages
which were processed is passed
back to caller. Ignored if NULL */
{
ulint i;
bool success = true;
#ifdef UNIV_DEBUG
struct timeval p_start_time, p_end_time, d_time;
#endif
int cnt_flush[MTFLUSH_MAX_WORKER];
if (n_processed) {
*n_processed = 0;
}
if (min_n != ULINT_MAX) {
/* Ensure that flushing is spread evenly amongst the
buffer pool instances. When min_n is ULINT_MAX
we need to flush everything up to the lsn limit
so no limit here. */
min_n = (min_n + srv_buf_pool_instances - 1)
/ srv_buf_pool_instances;
}
#ifdef UNIV_DEBUG
gettimeofday(&p_start_time, 0x0);
#endif
// os_fast_mutex_lock(&pgcomp_mtx);
pgcomp_flush_work_items(srv_buf_pool_instances,
cnt_flush, BUF_FLUSH_LIST,
min_n, lsn_limit);
// os_fast_mutex_unlock(&pgcomp_mtx);
for (i = 0; i < srv_buf_pool_instances; i++) {
if (n_processed) {
*n_processed += cnt_flush[i];
}
if (cnt_flush[i]) {
MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_FLUSH_BATCH_TOTAL_PAGE,
MONITOR_FLUSH_BATCH_COUNT,
MONITOR_FLUSH_BATCH_PAGES,
cnt_flush[i]);
}
}
#ifdef UNIV_DEBUG
gettimeofday(&p_end_time, 0x0);
timediff(&p_end_time, &p_start_time, &d_time);
fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu %llu usec]\n",
__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed,
(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
#endif
return(success);
}
/* JAN: TODO: END: */
/*******************************************************************//**
This utility flushes dirty blocks from the end of the flush list of
all buffer pool instances.
......@@ -2181,11 +2021,9 @@ buf_flush_list(
bool timeout = false;
ulint flush_start_time = 0;
/* JAN: TODO: */
if (is_pgcomp_wrk_init_done()) {
return(pgcomp_buf_flush_list(min_n, lsn_limit, n_processed));
if (buf_mtflu_init_done()) {
return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
}
/* JAN: TODO: END: */
for (i = 0; i < srv_buf_pool_instances; i++) {
requested_pages[i] = 0;
......@@ -2380,60 +2218,6 @@ buf_flush_single_page_from_LRU(
return(freed);
}
/* JAN: TODO: */
/*********************************************************************//**
pgcomp_Clears up tail of the LRU lists:
* Put replaceable pages at the tail of LRU to the free list
* Flush dirty pages at the tail of LRU to the disk
The depth to which we scan each buffer pool is controlled by dynamic
config parameter innodb_LRU_scan_depth.
@return total pages flushed */
UNIV_INTERN
ulint
pgcomp_buf_flush_LRU_tail(void)
/*====================*/
{
#ifdef UNIV_DEBUG
struct timeval p_start_time, p_end_time, d_time;
#endif
ulint total_flushed=0, i=0;
int cnt_flush[32];
#ifdef UNIV_DEBUG
gettimeofday(&p_start_time, 0x0);
#endif
ut_ad(is_pgcomp_wrk_init_done());
os_fast_mutex_lock(&pgcomp_mtx);
pgcomp_flush_work_items(srv_buf_pool_instances,
cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
os_fast_mutex_unlock(&pgcomp_mtx);
for (i = 0; i < srv_buf_pool_instances; i++) {
if (cnt_flush[i]) {
total_flushed += cnt_flush[i];
MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_LRU_BATCH_TOTAL_PAGE,
MONITOR_LRU_BATCH_COUNT,
MONITOR_LRU_BATCH_PAGES,
cnt_flush[i]);
}
}
#if UNIV_DEBUG
gettimeofday(&p_end_time, 0x0);
timediff(&p_end_time, &p_start_time, &d_time);
fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
#endif
return(total_flushed);
}
/* JAN: TODO: END: */
/*********************************************************************//**
Clears up tail of the LRU lists:
* Put replaceable pages at the tail of LRU to the free list
......@@ -2458,12 +2242,10 @@ buf_flush_LRU_tail(void)
ulint free_list_lwm = srv_LRU_scan_depth / 100
* srv_cleaner_free_list_lwm;
/* JAN: TODO: */
if(is_pgcomp_wrk_init_done())
if(buf_mtflu_init_done())
{
return(pgcomp_buf_flush_LRU_tail());
return(buf_mtflu_flush_LRU_tail());
}
/* JAN: TODO: END */
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
......
This diff is collapsed.
......@@ -295,6 +295,63 @@ buf_flush_flush_list_in_progress(void)
/*==================================*/
__attribute__((warn_unused_result));
/******************************************************************//**
Start a buffer flush batch for LRU or flush list */
ibool
buf_flush_start(
/*============*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/******************************************************************//**
End a buffer flush batch for LRU or flush list */
void
buf_flush_end(
/*==========*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued */
__attribute__((nonnull))
void
buf_flush_batch(
/*============*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any
latches on pages */
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
all blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
bool limited_lru_scan,/*!< in: for LRU flushes, if true,
allow to scan only up to
srv_LRU_scan_depth pages in total */
flush_counters_t* n); /*!< out: flushed/evicted page
counts */
/******************************************************************//**
Gather the aggregated stats for both flush list and LRU list flushing */
void
buf_flush_common(
/*=============*/
buf_flush_t flush_type, /*!< in: type of flush */
ulint page_count); /*!< in: number of pages flushed */
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
......
/*****************************************************************************
Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
Copyright (C) 2014 Fusion-io. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/******************************************************************//**
@file include/buf0mtflu.h
Multi-threadef flush method interface function prototypes
Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
Dhananjoy Das DDas@fusionio.com
***********************************************************************/
#ifndef buf0mtflu_h
#define buf0mtflu_h
/******************************************************************//**
Add exit work item to work queue to signal multi-threded flush
threads that they should exit.
*/
void
buf_mtflu_io_thread_exit(void);
/*===========================*/
/******************************************************************//**
Initialize multi-threaded flush thread syncronization data.
@return Initialized multi-threaded flush thread syncroniztion data. */
void*
buf_mtflu_handler_init(
/*===================*/
ulint n_threads, /*!< in: Number of threads to create */
ulint wrk_cnt); /*!< in: Number of work items */
/******************************************************************//**
Return true if multi-threaded flush is initialized
@return true if initialized, false if not */
bool
buf_mtflu_init_done(void);
/*======================*/
/*********************************************************************//**
Clears up tail of the LRU lists:
* Put replaceable pages at the tail of LRU to the free list
* Flush dirty pages at the tail of LRU to the disk
The depth to which we scan each buffer pool is controlled by dynamic
config parameter innodb_LRU_scan_depth.
@return total pages flushed */
UNIV_INTERN
ulint
buf_mtflu_flush_LRU_tail(void);
/*===========================*/
/*******************************************************************//**
Multi-threaded version of buf_flush_list
*/
bool
buf_mtflu_flush_list(
/*=================*/
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
ulint* n_processed); /*!< out: the number of pages
which were processed is passed
back to caller. Ignored if NULL */
/*********************************************************************//**
Set correct thread identifiers to io thread array based on
information we have. */
void
buf_mtflu_set_thread_ids(
/*=====================*/
ulint n_threads, /*!<in: Number of threads to fill */
void* ctx, /*!<in: thread context */
os_thread_id_t* thread_ids); /*!<in: thread id array */
#endif
......@@ -279,7 +279,7 @@ extern my_bool srv_use_lz4;
/* Number of flush threads */
#define MTFLUSH_MAX_WORKER 64
extern ulint srv_mtflush_threads;
extern long srv_mtflush_threads;
/** Server undo tablespaces directory, can be absolute path. */
extern char* srv_undo_dir;
......
......@@ -180,6 +180,8 @@ UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE;
UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
UNIV_INTERN my_bool srv_use_lz4 = FALSE;
/* Number of threads used for multi-threaded flush */
UNIV_INTERN long srv_mtflush_threads = 0;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment