Commit 99a0b546 authored by Sergei Golubchik's avatar Sergei Golubchik

merge ft-index and ft-engine as of 7.1.5

......@@ -14,7 +14,7 @@ IF(NOT TOKUDB_OK OR WITHOUT_TOKUDB OR WITHOUT_TOKUDB_STORAGE_ENGINE)
ENDIF()
############################################
SET(ENV{TOKUDB_VERSION} "7.1.0")
SET(TOKUDB_VERSION "7.1.5")
SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-5.5/README-TOKUDB\nusr/share/doc/mariadb-server-5.5/README.md" PARENT_SCOPE)
SET(USE_BDB OFF CACHE BOOL "")
SET(USE_VALGRIND OFF CACHE BOOL "")
......@@ -31,14 +31,13 @@ MARK_AS_ADVANCED(USE_VALGRIND)
MARK_AS_ADVANCED(XZ_SOURCE_DIR)
############################################
IF(DEFINED ENV{TOKUDB_VERSION})
SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
ADD_DEFINITIONS("-DTOKUDB_VERSION=\"${TOKUDB_VERSION}\"")
IF(NOT DEFINED TOKUDB_VERSION)
IF(DEFINED ENV{TOKUDB_VERSION})
SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
ENDIF()
ENDIF()
IF(DEFINED ENV{TOKUDB_PATCHES})
SET(TOKUDB_PATCHES $ENV{TOKUDB_PATCHES})
ADD_DEFINITIONS("-DTOKUDB_PATCHES=${TOKUDB_PATCHES}")
IF(DEFINED TOKUDB_VERSION)
ADD_DEFINITIONS("-DTOKUDB_VERSION=\"${TOKUDB_VERSION}\"")
ENDIF()
ADD_SUBDIRECTORY(ft-index)
......
......@@ -24,26 +24,34 @@ working MySQL or MariaDB with Tokutek patches, and with the TokuDB storage
engine, called `make.mysql.bash`. This script will download copies of the
needed source code from github and build everything.
To build MySQL with TokuDB 7.0.4:
To build MySQL with TokuDB 7.1.0:
```sh
scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.0.4-linux-x86_64
scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.1.0-linux-x86_64
```
To build MariaDB with TokuDB 7.0.4:
To build MariaDB with TokuDB 7.1.0:
```sh
scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.0.4-linux-x86_64
scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.1.0-linux-x86_64
```
Before you start, make sure you have a C++11-compatible compiler (GCC >=
4.7 is recommended), as well as CMake >=2.8.8, and the libraries and
header files for valgrind,zlib, and Berkeley DB. On Centos, `yum install
valgrind-devel zlib-devel libdb-devel`, on Ubuntu, `apt-get install
valgrind zlib1g-dev libdb-dev`.
header files for valgrind,zlib, and Berkeley DB.
On CentOS, `yum install valgrind-devel zlib-devel libdb-devel`
On Ubuntu, `apt-get install valgrind zlib1g-dev libdb-dev`
You can set the compiler by passing `--cc` and `--cxx` to the script, to
select one that's new enough. The default is `scripts/make.mysql.bash
--cc=gcc47 --cxx=g++47`, which may not exist on your system.
To build a debug MySQL with TokuDB using the head of the Tokutek github
repositories, run this:
```sh
scripts/make.mysql.debug.env.bash
```
Contributing
------------
......
......@@ -18,10 +18,12 @@ include(TokuMergeLibs)
set(LIBTOKUPORTABILITY "tokuportability" CACHE STRING "Name of libtokuportability.so")
set(LIBTOKUDB "tokufractaltree" CACHE STRING "Name of libtokufractaltree.so")
if(USE_VALGRIND)
include_directories(
${VALGRIND_INCLUDE_DIR}
)
set(INSTALL_LIBDIR "lib" CACHE STRING "where to install libs")
if (USE_VALGRIND AND NOT VALGRIND_INCLUDE_DIR MATCHES NOTFOUND)
include_directories(
${VALGRIND_INCLUDE_DIR}
)
endif()
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/include
......
......@@ -2,15 +2,18 @@ TokuKV
======
TokuKV is a high-performance, transactional key-value store, used in the
TokuDB storage engine for MySQL and MariaDB.
TokuDB storage engine for MySQL and MariaDB and in TokuMX, the
high-performance MongoDB distribution.
TokuKV is provided as a shared library with an interface similar to
Berkeley DB.
To build the full MySQL product, see the instructions for
[ft-engine][ft-engine]. This document covers TokuKV only.
[Tokutek/ft-engine][ft-engine]. To build TokuMX, see the instructions
for [Tokutek/mongo][mongo]. This document covers TokuKV only.
[ft-engine]: http://github.com/Tokutek/ft-engine
[ft-engine]: https://github.com/Tokutek/ft-engine
[mongo]: https://github.com/Tokutek/mongo
Building
......@@ -18,9 +21,8 @@ Building
TokuKV is built using CMake >= 2.8.9. Out-of-source builds are
recommended. You need a C++11 compiler, though only GCC >= 4.7 and
Apple's Clang are tested. You also need zlib and valgrind development
packages (`yum install valgrind-devel zlib-devel` or `apt-get install
valgrind zlib1g-dev`).
Apple's Clang are tested. You also need zlib development packages
(`yum install zlib-devel` or `apt-get install zlib1g-dev`).
You will also need the source code for jemalloc, checked out in
`third_party/`.
......@@ -35,6 +37,7 @@ CC=gcc47 CXX=g++47 cmake \
-D CMAKE_BUILD_TYPE=Debug \
-D USE_BDB=OFF \
-D BUILD_TESTING=OFF \
-D USE_VALGRIND=OFF \
-D CMAKE_INSTALL_PREFIX=../prefix/ \
..
cmake --build . --target install
......@@ -102,13 +105,20 @@ Contributing
Please report bugs in TokuKV here on github.
We have two publicly accessible mailing lists:
We have two publicly accessible mailing lists for TokuDB:
- tokudb-user@googlegroups.com is for general and support related
questions about the use of TokuDB.
- tokudb-dev@googlegroups.com is for discussion of the development of
TokuDB.
and two for TokuMX:
- tokumx-user@googlegroups.com is for general and support related
questions about the use of TokuMX.
- tokumx-dev@googlegroups.com is for discussion of the development of
TokuMX.
We are also available on IRC on freenode.net, in the #tokutek channel.
......
......@@ -19,9 +19,11 @@ if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
DESTINATION include
RENAME tokudb.h
COMPONENT tokukv_headers
)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
DESTINATION include
COMPONENT tokukv_headers
)
endif ()
......@@ -207,6 +207,7 @@ enum {
TOKUDB_BAD_CHECKSUM = -100015,
TOKUDB_HUGE_PAGES_ENABLED = -100016,
TOKUDB_OUT_OF_RANGE = -100017,
TOKUDB_INTERRUPTED = -100018,
DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM
};
......@@ -359,6 +360,7 @@ static void print_defines (void) {
dodefine(TOKUDB_BAD_CHECKSUM);
dodefine(TOKUDB_HUGE_PAGES_ENABLED);
dodefine(TOKUDB_OUT_OF_RANGE);
dodefine(TOKUDB_INTERRUPTED);
/* LOADER flags */
printf("/* LOADER flags */\n");
......@@ -449,7 +451,7 @@ static void print_db_env_struct (void) {
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
"int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max)",
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
"int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec)",
"int (*set_lock_timeout) (DB_ENV *env, uint64_t default_lock_wait_time_msec, uint64_t (*get_lock_wait_time_cb)(uint64_t default_lock_wait_time))",
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
"int (*set_lock_timeout_callback) (DB_ENV *env, lock_timeout_callback callback)",
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
......@@ -459,8 +461,9 @@ static void print_db_env_struct (void) {
"void (*change_fsync_log_period) (DB_ENV*, uint32_t)",
"int (*iterate_live_transactions) (DB_ENV *env, iterate_transactions_callback callback, void *extra)",
"int (*iterate_pending_lock_requests) (DB_ENV *env, iterate_requests_callback callback, void *extra)",
"void (*set_loader_memory_size)(DB_ENV *env, uint64_t loader_memory_size)",
"void (*set_loader_memory_size)(DB_ENV *env, uint64_t (*get_loader_memory_size_callback)(void))",
"uint64_t (*get_loader_memory_size)(DB_ENV *env)",
"void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))",
NULL};
sort_and_dump_fields("db_env", true, extra);
......@@ -529,7 +532,7 @@ static void print_db_struct (void) {
"int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, uint32_t) /* change row/dictionary descriptor for a db. Available only while db is open */",
"int (*getf_set)(DB*, DB_TXN*, uint32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
"int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
"int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, uint64_t* loops_run)",
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
"int (*change_pagesize)(DB*,uint32_t)",
"int (*change_readpagesize)(DB*,uint32_t)",
......@@ -538,6 +541,9 @@ static void print_db_struct (void) {
"int (*change_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
"int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*)",
"int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
"int (*change_fanout)(DB *db, uint32_t fanout)",
"int (*get_fanout)(DB *db, uint32_t *fanout)",
"int (*set_fanout)(DB *db, uint32_t fanout)",
"int (*set_indexer)(DB*, DB_INDEXER*)",
"void (*get_indexer)(DB*, DB_INDEXER**)",
"int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
......@@ -546,6 +552,7 @@ static void print_db_struct (void) {
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
"int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
"const char *(*get_dname)(DB *db)",
"int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",
NULL};
sort_and_dump_fields("db", true, extra);
}
......@@ -602,6 +609,7 @@ static void print_dbc_struct (void) {
"int (*c_getf_set_range)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
"int (*c_getf_set_range_reverse)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
"void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)",
"void (*c_remove_restriction)(DBC*)",
NULL};
sort_and_dump_fields("dbc", false, extra);
......@@ -729,10 +737,10 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
printf(" CHARSTR, // interpret as char * \n");
printf(" UNIXTIME, // interpret as time_t \n");
printf(" TOKUTIME, // interpret as tokutime_t \n");
printf(" PARCOUNT // interpret as PARTITIONED_COUNTER\n");
printf(" PARCOUNT, // interpret as PARTITIONED_COUNTER\n");
printf(" DOUBLE // interpret as double\n");
printf("} toku_engine_status_display_type; \n");
printf("typedef enum {\n");
printf(" TOKU_ENGINE_STATUS = (1ULL<<0), // Include when asking for engine status\n");
printf(" TOKU_GLOBAL_STATUS = (1ULL<<1), // Include when asking for information_schema.global_status\n");
......@@ -821,6 +829,7 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
printf("int toku_set_trace_file (const char *fname) %s;\n", VISIBLE);
printf("int toku_close_trace_file (void) %s;\n", VISIBLE);
printf("void db_env_set_direct_io (bool direct_io_on) %s;\n", VISIBLE);
printf("void db_env_set_compress_buffers_before_eviction (bool compress_buffers) %s;\n", VISIBLE);
printf("void db_env_set_func_fsync (int (*)(int)) %s;\n", VISIBLE);
printf("void db_env_set_func_free (void (*)(void*)) %s;\n", VISIBLE);
printf("void db_env_set_func_malloc (void *(*)(size_t)) %s;\n", VISIBLE);
......
......@@ -85,8 +85,8 @@ include(CTest)
set(TOKUDB_DATA "${TokuDB_SOURCE_DIR}/../tokudb.data" CACHE FILEPATH "Path to data files for tests")
if (BUILD_TESTING OR BUILD_FT_TESTS OR BUILD_SRC_TESTS)
set(WARNED_ABOUT_DATA 1) # disable the warning below
if (NOT EXISTS "${TOKUDB_DATA}/" AND NOT WARNED_ABOUT_DATA)
set(WARNED_ABOUT_DATA 0)
if (NOT EXISTS "${TOKUDB_DATA}/" AND NOT WARNED_ABOUT_DATA AND CMAKE_PROJECT_NAME STREQUAL TokuDB)
message(WARNING "Test data files are missing from ${TOKUDB_DATA}, which will cause some tests to fail. Please put them there or modify TOKUDB_DATA to avoid this.")
set(WARNED_ABOUT_DATA 1)
endif ()
......
......@@ -88,6 +88,7 @@ set_cflags_if_supported(
-Wno-error=missing-format-attribute
-Wno-error=address-of-array-temporary
-Wno-error=tautological-constant-out-of-range-compare
-Wno-ignored-attributes
-fno-rtti
-fno-exceptions
)
......
include(ExternalProject)
if (CMAKE_PROJECT_NAME STREQUAL TokuDB)
## add jemalloc with an external project
set(JEMALLOC_SOURCE_DIR "${TokuDB_SOURCE_DIR}/third_party/jemalloc" CACHE FILEPATH "Where to find jemalloc sources.")
if (NOT EXISTS "${JEMALLOC_SOURCE_DIR}/configure")
message(FATAL_ERROR "Can't find jemalloc sources. Please check them out to ${JEMALLOC_SOURCE_DIR} or modify JEMALLOC_SOURCE_DIR.")
endif ()
set(jemalloc_configure_opts "CC=${CMAKE_C_COMPILER}" "--with-jemalloc-prefix=" "--with-private-namespace=tokudb_jemalloc_internal_" "--enable-cc-silence")
option(JEMALLOC_DEBUG "Build jemalloc with --enable-debug." OFF)
if (JEMALLOC_DEBUG)
list(APPEND jemalloc_configure_opts --enable-debug)
endif ()
ExternalProject_Add(build_jemalloc
PREFIX jemalloc
SOURCE_DIR "${JEMALLOC_SOURCE_DIR}"
CONFIGURE_COMMAND
"${JEMALLOC_SOURCE_DIR}/configure" ${jemalloc_configure_opts}
"--prefix=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc"
)
add_library(jemalloc STATIC IMPORTED GLOBAL)
set_target_properties(jemalloc PROPERTIES IMPORTED_LOCATION
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib/libjemalloc_pic.a")
add_dependencies(jemalloc build_jemalloc)
add_library(jemalloc_nopic STATIC IMPORTED GLOBAL)
set_target_properties(jemalloc_nopic PROPERTIES IMPORTED_LOCATION
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib/libjemalloc.a")
add_dependencies(jemalloc_nopic build_jemalloc)
# detect when we are being built as a subproject
if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib" DESTINATION .
COMPONENT tokukv_libs_extra)
endif ()
endif ()
## add lzma with an external project
set(xz_configure_opts --with-pic --enable-static)
if (APPLE)
......
......@@ -10,5 +10,7 @@ if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
README.examples
DESTINATION
examples
COMPONENT
tokukv_examples
)
endif ()
\ No newline at end of file
......@@ -8,7 +8,7 @@ set_source_files_properties(
PROPERTIES GENERATED TRUE)
add_executable(logformat logformat.cc)
target_link_libraries(logformat ${LIBTOKUPORTABILITY})
target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc"
......
......@@ -105,7 +105,7 @@ void bn_data::init_zero() {
void bn_data::initialize_empty() {
toku_mempool_zero(&m_buffer_mempool);
m_buffer.create();
m_buffer.create_no_array();
}
void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size) {
......@@ -230,11 +230,11 @@ static int move_it (const KLPAIR &klpair, const uint32_t idx, struct omt_compres
// Compress things, and grow the mempool if needed.
void bn_data::omt_compress_kvspace(size_t added_size, void **maybe_free) {
uint32_t total_size_needed = toku_mempool_get_used_space(&m_buffer_mempool) + added_size;
if (total_size_needed+total_size_needed >= m_buffer_mempool.size) {
m_buffer_mempool.size = total_size_needed+total_size_needed;
}
// set the new mempool size to be twice of the space we actually need.
// On top of the 25% that is padded within toku_mempool_construct (which we
// should consider getting rid of), that should be good enough.
struct mempool new_kvspace;
toku_mempool_construct(&new_kvspace, m_buffer_mempool.size);
toku_mempool_construct(&new_kvspace, 2*total_size_needed);
uint32_t numvals = omt_size();
KLPAIR *XMALLOC_N(numvals, newvals);
struct omt_compressor_state oc = { &new_kvspace, newvals };
......
......@@ -116,7 +116,7 @@ struct klpair_struct {
typedef struct klpair_struct *KLPAIR;
static LEAFENTRY get_le_from_klpair(KLPAIR klpair){
static inline LEAFENTRY get_le_from_klpair(KLPAIR klpair){
uint32_t keylen = klpair->keylen;
LEAFENTRY le = (LEAFENTRY)(klpair->key_le + keylen);
return le;
......@@ -250,5 +250,7 @@ class bn_data {
klpair_omt_t m_buffer; // pointers to individual leaf entries
struct mempool m_buffer_mempool; // storage for all leaf entries
friend class bndata_bugfix_test;
};
......@@ -458,7 +458,7 @@ class cachefile_list {
//
class checkpointer {
public:
void init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
void destroy();
void set_checkpoint_period(uint32_t new_period);
uint32_t get_checkpoint_period();
......@@ -480,6 +480,8 @@ class checkpointer {
cachefile_list *m_cf_list;
pair_list *m_list;
evictor *m_ev;
bool m_checkpointer_cron_init;
bool m_checkpointer_init;
// variable used by the checkpoint thread to know
// when all work induced by cloning on client threads is done
......@@ -512,7 +514,7 @@ const int EVICTION_PERIOD = 1;
//
class evictor {
public:
void init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
void destroy();
void add_pair_attr(PAIR_ATTR attr);
void remove_pair_attr(PAIR_ATTR attr);
......@@ -522,7 +524,7 @@ class evictor {
uint64_t reserve_memory(double fraction, uint64_t upper_bound);
void release_reserved_memory(uint64_t reserved_memory);
void run_eviction_thread();
void do_partial_eviction(PAIR p, bool pair_mutex_held);
void do_partial_eviction(PAIR p);
void evict_pair(PAIR p, bool checkpoint_pending);
void wait_for_cache_pressure_to_subside();
void signal_eviction_thread();
......@@ -597,6 +599,10 @@ class evictor {
// this variable is ONLY used for testing purposes
uint64_t m_num_eviction_thread_runs;
bool m_ev_thread_init;
bool m_evictor_init;
friend class evictor_test_helpers;
friend class evictor_unit_test;
};
......@@ -608,7 +614,7 @@ class evictor {
//
class cleaner {
public:
void init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
void destroy(void);
uint32_t get_iterations(void);
void set_iterations(uint32_t new_iterations);
......@@ -625,6 +631,8 @@ class cleaner {
// minimum period of 1s so if you want
// more frequent cleaner runs you must
// use this)
bool m_cleaner_cron_init;
bool m_cleaner_init;
};
///////////////////////////////////////////////////////////////////////////////
......
This diff is collapsed.
......@@ -122,7 +122,7 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
// create and initialize a cache table
// size_limit is the upper limit on the size of the size of the values in the table
// pass 0 if you want the default
void toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER);
int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER);
// Create a new cachetable.
// Effects: a new cachetable is created and initialized.
......@@ -223,11 +223,15 @@ typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void*
// The cachetable calls the partial eviction callback is to possibly try and partially evict pieces
// of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free
// nothing, or may choose to free as much as possible.
// old_attr is the PAIR_ATTR of the PAIR when the callback is called.
// new_attr is set to the new PAIR_ATTR after the callback executes partial eviction
// Requires a write lock to be held on the PAIR in the cachetable while this function is called
typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *write_extraargs);
// nothing, or may choose to free as much as possible. When the partial eviction callback is finished,
// it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the
// write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments.
// This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc)
//
// on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called
// on exit, the finalize continuation is called
typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
// The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true,
// then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform
......
......@@ -136,6 +136,7 @@ PATENT RIGHTS GRANT:
#include "checkpoint.h"
#include <portability/toku_atomic.h>
#include <util/status.h>
#include <util/frwlock.h>
///////////////////////////////////////////////////////////////////////////////////
// Engine status
......@@ -187,7 +188,8 @@ toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
static LSN last_completed_checkpoint_lsn;
static toku_pthread_rwlock_t checkpoint_safe_lock;
static toku_mutex_t checkpoint_safe_mutex;
static toku::frwlock checkpoint_safe_lock;
static toku_pthread_rwlock_t multi_operation_lock;
static toku_pthread_rwlock_t low_priority_multi_operation_lock;
......@@ -237,28 +239,33 @@ multi_operation_checkpoint_unlock(void) {
static void
checkpoint_safe_lock_init(void) {
toku_pthread_rwlock_init(&checkpoint_safe_lock, NULL);
toku_mutex_init(&checkpoint_safe_mutex, NULL);
checkpoint_safe_lock.init(&checkpoint_safe_mutex);
locked_cs = false;
}
static void
checkpoint_safe_lock_destroy(void) {
toku_pthread_rwlock_destroy(&checkpoint_safe_lock);
checkpoint_safe_lock.deinit();
toku_mutex_destroy(&checkpoint_safe_mutex);
}
static void
checkpoint_safe_checkpoint_lock(void) {
toku_pthread_rwlock_wrlock(&checkpoint_safe_lock);
toku_mutex_lock(&checkpoint_safe_mutex);
checkpoint_safe_lock.write_lock(false);
toku_mutex_unlock(&checkpoint_safe_mutex);
locked_cs = true;
}
static void
checkpoint_safe_checkpoint_unlock(void) {
locked_cs = false;
toku_pthread_rwlock_wrunlock(&checkpoint_safe_lock);
toku_mutex_lock(&checkpoint_safe_mutex);
checkpoint_safe_lock.write_unlock();
toku_mutex_unlock(&checkpoint_safe_mutex);
}
// toku_xxx_client_(un)lock() functions are only called from client code,
// never from checkpoint code, and use the "reader" interface to the lock functions.
......@@ -286,18 +293,20 @@ void
toku_checkpoint_safe_client_lock(void) {
if (locked_cs)
(void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
toku_mutex_lock(&checkpoint_safe_mutex);
checkpoint_safe_lock.read_lock();
toku_mutex_unlock(&checkpoint_safe_mutex);
toku_multi_operation_client_lock();
}
void
toku_checkpoint_safe_client_unlock(void) {
toku_pthread_rwlock_rdunlock(&checkpoint_safe_lock);
toku_mutex_lock(&checkpoint_safe_mutex);
checkpoint_safe_lock.read_unlock();
toku_mutex_unlock(&checkpoint_safe_mutex);
toku_multi_operation_client_unlock();
}
// Initialize the checkpoint mechanism, must be called before any client operations.
void
toku_checkpoint_init(void) {
......
......@@ -89,6 +89,8 @@ PATENT RIGHTS GRANT:
#ident "$Id$"
#include <toku_portability.h>
#include <util/scoped_malloc.h>
#include <zlib.h>
#include <lzma.h>
......@@ -241,10 +243,10 @@ void toku_decompress (Bytef *dest, uLongf destLen,
}
case TOKU_QUICKLZ_METHOD:
if (sourceLen>1) {
qlz_state_decompress *XCALLOC(qsd);
toku::scoped_calloc state_buf(sizeof(qlz_state_decompress));
qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get());
uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd);
assert(actual_destlen == destLen);
toku_free(qsd);
} else {
// length 1 means there is no data, so do nothing.
assert(destLen==0);
......
......@@ -135,6 +135,11 @@ int toku_fifo_create(FIFO *ptr) {
return 0;
}
void toku_fifo_resize(FIFO fifo, size_t new_size) {
XREALLOC_N(new_size, fifo->memory);
fifo->memory_size = new_size;
}
void toku_fifo_free(FIFO *ptr) {
FIFO fifo = *ptr;
if (fifo->memory) toku_free(fifo->memory);
......@@ -162,16 +167,10 @@ int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *d
+ xids_get_size(xids)
- sizeof(XIDS_S); //Prevent double counting
int need_space_total = fifo->memory_used+need_space_here;
if (fifo->memory == NULL) {
fifo->memory_size = next_power_of_two(need_space_total);
XMALLOC_N(fifo->memory_size, fifo->memory);
}
if (need_space_total > fifo->memory_size) {
// Out of memory at the end.
if (fifo->memory == NULL || need_space_total > fifo->memory_size) {
// resize the fifo to the next power of 2 greater than the needed space
int next_2 = next_power_of_two(need_space_total);
// resize the fifo
XREALLOC_N(next_2, fifo->memory);
fifo->memory_size = next_2;
toku_fifo_resize(fifo, next_2);
}
struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
fifo_entry_set_msg_type(entry, type);
......
......@@ -136,6 +136,8 @@ typedef struct fifo *FIFO;
int toku_fifo_create(FIFO *);
void toku_fifo_resize(FIFO fifo, size_t new_size);
void toku_fifo_free(FIFO *);
int toku_fifo_n_entries(FIFO);
......
......@@ -94,7 +94,8 @@ PATENT RIGHTS GRANT:
#include <fttypes.h>
#include <ft-flusher.h>
#include <ft-internal.h>
#include "ft.h"
#include <ft.h>
#include <util/context.h>
static void
ftnode_get_key_and_fullhash(
......@@ -252,6 +253,8 @@ toku_pin_ftnode_batched(
bfe->child_to_read
);
if (needs_ancestors_messages) {
toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
toku_unpin_ftnode_read_only(brt->ft, node);
int rr = toku_cachetable_get_and_pin_nonblocking_batched(
brt->ft->cf,
......
......@@ -97,6 +97,7 @@ PATENT RIGHTS GRANT:
#include <toku_assert.h>
#include <portability/toku_atomic.h>
#include <util/status.h>
#include <util/context.h>
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
......@@ -544,11 +545,13 @@ ct_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_ext
// a leaf node that is not entirely in memory. If so, then
// we cannot be sure if the node is reactive.
//
static bool may_node_be_reactive(FTNODE node)
static bool may_node_be_reactive(FT ft, FTNODE node)
{
if (node->height == 0) return true;
if (node->height == 0) {
return true;
}
else {
return (get_nonleaf_reactivity(node) != RE_STABLE);
return (get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE);
}
}
......@@ -1541,11 +1544,7 @@ ft_merge_child(
}
}
static void ft_flush_some_child(
FT ft,
FTNODE parent,
struct flusher_advice *fa
)
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// Effect: This function does the following:
// - Pick a child of parent (the heaviest child),
// - flush from parent to child,
......@@ -1559,7 +1558,7 @@ static void ft_flush_some_child(
NONLEAF_CHILDINFO bnc = NULL;
paranoid_invariant(parent->height>0);
toku_assert_entire_node_in_memory(parent);
TXNID oldest_referenced_xid = parent->oldest_referenced_xid_known;
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// pick the child we want to flush to
int childnum = fa->pick_child(ft, parent, fa->extra);
......@@ -1589,7 +1588,7 @@ static void ft_flush_some_child(
// Let's do a quick check to see if the child may be reactive
// If the child cannot be reactive, then we can safely unlock
// the parent before finishing reading in the entire child node.
bool may_child_be_reactive = may_node_be_reactive(child);
bool may_child_be_reactive = may_node_be_reactive(ft, child);
paranoid_invariant(child->thisnodename.b!=0);
//VERIFY_NODE(brt, child);
......@@ -1631,7 +1630,7 @@ static void ft_flush_some_child(
// we wont be splitting/merging child
// and we have already replaced the bnc
// for the root with a fresh one
enum reactivity child_re = get_node_reactivity(child, ft->h->nodesize);
enum reactivity child_re = get_node_reactivity(ft, child);
if (parent && child_re == RE_STABLE) {
toku_unpin_ftnode_off_client_thread(ft, parent);
parent = NULL;
......@@ -1652,7 +1651,7 @@ static void ft_flush_some_child(
ft,
bnc,
child,
oldest_referenced_xid
parent_oldest_referenced_xid_known
);
destroy_nonleaf_childinfo(bnc);
}
......@@ -1661,7 +1660,7 @@ static void ft_flush_some_child(
// let's get the reactivity of the child again,
// it is possible that the flush got rid of some values
// and now the parent is no longer reactive
child_re = get_node_reactivity(child, ft->h->nodesize);
child_re = get_node_reactivity(ft, child);
// if the parent has been unpinned above, then
// this is our only option, even if the child is not stable
// if the child is not stable, we'll handle it the next
......@@ -1676,10 +1675,10 @@ static void ft_flush_some_child(
parent = NULL;
}
//
// it is the responsibility of ft_flush_some_child to unpin child
// it is the responsibility of toku_ft_flush_some_child to unpin child
//
if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
ft_flush_some_child(ft, child, fa);
toku_ft_flush_some_child(ft, child, fa);
}
else {
toku_unpin_ftnode_off_client_thread(ft, child);
......@@ -1706,13 +1705,6 @@ static void ft_flush_some_child(
}
}
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) {
// Vanilla flush_some_child flushes from parent to child without
// providing a meaningful oldest_referenced_xid. No simple garbage
// collection is performed.
return ft_flush_some_child(ft, parent, fa);
}
static void
update_cleaner_status(
FTNODE node,
......@@ -1854,7 +1846,7 @@ struct flusher_extra {
FT h;
FTNODE node;
NONLEAF_CHILDINFO bnc;
TXNID oldest_referenced_xid;
TXNID parent_oldest_referenced_xid_known;
};
//
......@@ -1864,6 +1856,7 @@ struct flusher_extra {
//
static void flush_node_fun(void *fe_v)
{
toku::context flush_ctx(CTX_FLUSH);
struct flusher_extra* fe = (struct flusher_extra *) fe_v;
// The node that has been placed on the background
// thread may not be fully in memory. Some message
......@@ -1892,16 +1885,16 @@ static void flush_node_fun(void *fe_v)
fe->h,
fe->bnc,
fe->node,
fe->oldest_referenced_xid
fe->parent_oldest_referenced_xid_known
);
destroy_nonleaf_childinfo(fe->bnc);
// after the flush has completed, now check to see if the node needs flushing
// If so, call ft_flush_some_child on the node (because this flush intends to
// If so, call toku_ft_flush_some_child on the node (because this flush intends to
// pass a meaningful oldest referenced xid for simple garbage collection), and it is the
// responsibility of the flush to unlock the node. otherwise, we unlock it here.
if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
}
else {
toku_unpin_ftnode_off_client_thread(fe->h,fe->node);
......@@ -1912,7 +1905,7 @@ static void flush_node_fun(void *fe_v)
// bnc, which means we are tasked with flushing some
// buffer in the node.
// It is the responsibility of flush some child to unlock the node
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
}
remove_background_job_from_cf(fe->h->cf);
toku_free(fe);
......@@ -1923,13 +1916,13 @@ place_node_and_bnc_on_background_thread(
FT h,
FTNODE node,
NONLEAF_CHILDINFO bnc,
TXNID oldest_referenced_xid)
TXNID parent_oldest_referenced_xid_known)
{
struct flusher_extra *XMALLOC(fe);
fe->h = h;
fe->node = node;
fe->bnc = bnc;
fe->oldest_referenced_xid = oldest_referenced_xid;
fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
}
......@@ -1948,7 +1941,8 @@ place_node_and_bnc_on_background_thread(
//
void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
{
TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
toku::context flush_ctx(CTX_FLUSH);
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
//
// first let's see if we can detach buffer on client thread
// and pick the child we want to flush to
......@@ -1965,13 +1959,13 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// In this case, we could not lock the child, so just place the parent on the background thread
// In the callback, we will use toku_ft_flush_some_child, which checks to
// see if we should blow away the old basement nodes.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
}
else {
//
// successfully locked child
//
bool may_child_be_reactive = may_node_be_reactive(child);
bool may_child_be_reactive = may_node_be_reactive(h, child);
if (!may_child_be_reactive) {
// We're going to unpin the parent, so before we do, we must
// check to see if we need to blow away the basement nodes to
......@@ -1994,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// so, because we know for sure the child is not
// reactive, we can unpin the parent
//
place_node_and_bnc_on_background_thread(h, child, bnc, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
toku_unpin_ftnode(h, parent);
}
else {
......@@ -2004,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
toku_unpin_ftnode(h, child);
// Again, we'll have the parent on the background thread, so
// we don't need to destroy the basement nodes yet.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
}
}
}
......
......@@ -232,6 +232,6 @@ void toku_ft_hot_get_status(FT_HOT_STATUS);
int
toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
int (*progress_callback)(void *extra, float progress),
void *progress_extra);
void *progress_extra, uint64_t* loops_run);
#endif // End of header guardian.
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
#include <ft.h>
#include <portability/toku_atomic.h>
#include <util/status.h>
#include <util/context.h>
// Member Descirption:
// 1. highest_pivot_key - this is the key that corresponds to the
......@@ -299,8 +300,10 @@ hot_flusher_destroy(struct hot_flusher_extra *flusher)
int
toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
int (*progress_callback)(void *extra, float progress),
void *progress_extra)
void *progress_extra, uint64_t* loops_run)
{
toku::context flush_ctx(CTX_FLUSH);
int r = 0;
struct hot_flusher_extra flusher;
struct flusher_advice advice;
......@@ -403,6 +406,7 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
// Loop until the max key has been updated to positive
// infinity.
} while (!flusher.rightmost_leaf_seen);
*loops_run = loop_count;
// Cleanup.
hot_flusher_destroy(&flusher);
......
......@@ -117,15 +117,10 @@ PATENT RIGHTS GRANT:
#include <util/omt.h>
#include "bndata.h"
#ifndef FT_FANOUT
#define FT_FANOUT 16
#endif
enum { TREE_FANOUT = FT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { FT_CMD_OVERHEAD = (2 + sizeof(MSN)) // the type plus freshness plus MSN
};
enum { FT_DEFAULT_NODE_SIZE = 1 << 22 };
enum { FT_CMD_OVERHEAD = (2 + sizeof(MSN)) }; // the type plus freshness plus MSN
enum { FT_DEFAULT_FANOUT = 16 };
enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 };
enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };
//
......@@ -234,16 +229,14 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID oldest_referenced_xid);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
enum reactivity get_nonleaf_reactivity (FTNODE node);
enum reactivity get_node_reactivity (FTNODE node, uint32_t nodesize);
enum reactivity get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
enum reactivity get_node_reactivity(FT ft, FTNODE node);
uint32_t get_leaf_num_entries(FTNODE node);
// data of an available partition of a leaf ftnode
struct ftnode_leaf_basement_node {
bn_data data_buffer;
......@@ -336,7 +329,7 @@ struct ftnode {
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
int dirty;
uint32_t fullhash;
int n_children; //for internal nodes, if n_children==TREE_FANOUT+1 then the tree needs to be rebalanced.
int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
// for leaf nodes, represents number of basement nodes
unsigned int totalchildkeylens;
DBT *childkeys; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1].
......@@ -509,6 +502,7 @@ struct ft_header {
unsigned int nodesize;
unsigned int basementnodesize;
enum toku_compression_method compression_method;
unsigned int fanout;
// Current Minimum MSN to be used when upgrading pre-MSN BRT's.
// This is decremented from our currnt MIN_MSN so as not to clash
......@@ -590,6 +584,7 @@ struct ft_options {
unsigned int nodesize;
unsigned int basementnodesize;
enum toku_compression_method compression_method;
unsigned int fanout;
unsigned int flags;
ft_compare_func compare_fun;
ft_update_func update_fun;
......@@ -632,7 +627,7 @@ int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* nd
int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
FT h, bool for_checkpoint);
void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, uint32_t fullhash, ROLLBACK_LOG_NODE *logp, FT h);
int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h);
int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
......@@ -735,7 +730,8 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data);
void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *extraargs);
int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
......@@ -787,6 +783,8 @@ struct ft_cursor {
int out_of_range_error;
int direction;
TOKUTXN ttxn;
FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
void *interrupt_cb_extra;
};
//
......@@ -1029,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type,
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info);
void
toku_get_node_for_verify(
......@@ -1067,6 +1065,10 @@ typedef enum {
LE_MAX_PROVISIONAL_XR,
LE_EXPANDED,
LE_MAX_MEMSIZE,
LE_APPLY_GC_BYTES_IN,
LE_APPLY_GC_BYTES_OUT,
LE_NORMAL_GC_BYTES_IN,
LE_NORMAL_GC_BYTES_OUT,
LE_STATUS_NUM_ROWS
} le_status_entry;
......@@ -1101,6 +1103,9 @@ typedef enum {
FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, // effective compression ratio for leaf bytes flushed to disk
FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, // effective compression ratio for nonleaf bytes flushed to disk
FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, // effective compression ratio for all bytes flushed to disk
FT_PARTIAL_EVICTIONS_NONLEAF, // number of nonleaf node partial evictions
FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, // number of nonleaf node partial evictions
FT_PARTIAL_EVICTIONS_LEAF, // number of leaf node partial evictions
......@@ -1196,8 +1201,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd,
uint32_t idx,
LEAFENTRY le,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_gc_info *gc_info,
uint64_t *workdonep,
STAT64INFO stats_to_update
);
......@@ -1209,8 +1213,7 @@ toku_ft_bn_apply_cmd (
DESCRIPTOR desc,
BASEMENTNODE bn,
FT_MSG cmd,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1223,7 +1226,7 @@ toku_ft_leaf_apply_cmd (
FTNODE node,
int target_childnum,
FT_MSG cmd,
GC_INFO gc_info,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1237,7 +1240,7 @@ toku_ft_node_put_cmd (
int target_childnum,
FT_MSG cmd,
bool is_fresh,
GC_INFO gc_info,
txn_gc_info *gc_info,
size_t flow_deltas[],
STAT64INFO stats_to_update
);
......
This diff is collapsed.
......@@ -114,6 +114,8 @@ PATENT RIGHTS GRANT:
// When lock_only is true, the callback only does optional lock tree locking.
typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);
typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra);
int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
// effect: changes the descriptor for the ft of the given handle.
......@@ -135,6 +137,8 @@ void toku_ft_handle_set_basementnodesize(FT_HANDLE, unsigned int basementnodesiz
void toku_ft_handle_get_basementnodesize(FT_HANDLE, unsigned int *basementnodesize);
void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_method);
void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
void toku_ft_set_bt_compare(FT_HANDLE, ft_compare_func);
ft_compare_func toku_ft_get_bt_compare (FT_HANDLE brt);
......@@ -239,9 +243,12 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xids, GC_INFO gc_info);
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result));
......@@ -258,6 +265,7 @@ void toku_ft_cursor_set_leaf_mode(FT_CURSOR);
// the cursor duing a one query.
void toku_ft_cursor_set_temporary(FT_CURSOR);
void toku_ft_cursor_remove_restriction(FT_CURSOR);
void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
int toku_ft_cursor_is_leaf_mode(FT_CURSOR);
void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);
......@@ -346,5 +354,8 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen);
extern bool garbage_collection_debug;
// This is a poor place to put global options like these.
void toku_ft_set_direct_io(bool direct_io_on);
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
#endif
......@@ -404,6 +404,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
.nodesize = nodesize,
.basementnodesize = basementnodesize,
.compression_method = compression_method,
.fanout = FT_DEFAULT_FANOUT, // fanout is not serialized, must be set at startup
.highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade,
.max_msn_in_ft = max_msn_in_ft,
.time_of_last_optimize_begin = time_of_last_optimize_begin,
......@@ -461,6 +462,7 @@ serialize_ft_min_size (uint32_t version) {
size_t size = 0;
switch(version) {
case FT_LAYOUT_VERSION_25:
case FT_LAYOUT_VERSION_24:
case FT_LAYOUT_VERSION_23:
case FT_LAYOUT_VERSION_22:
......
......@@ -142,7 +142,6 @@ int toku_testsetup_leaf(FT_HANDLE brt, BLOCKNUM *blocknum, int n_children, char
int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens) {
FTNODE node;
assert(testsetup_initialized);
assert(n_children<=FT_FANOUT);
toku_create_new_ftnode(brt, &node, height, n_children);
int i;
for (i=0; i<n_children; i++) {
......@@ -222,6 +221,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
toku_fill_dbt(&valdbt, val, vallen) } } };
static size_t zero_flow_deltas[] = { 0, 0 };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_node_put_cmd (
brt->ft->compare_fun,
brt->ft->update_fun,
......@@ -230,7 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
-1,
&cmd,
true,
make_gc_info(true),
&gc_info,
zero_flow_deltas,
NULL
);
......
......@@ -431,6 +431,7 @@ ft_header_create(FT_OPTIONS options, BLOCKNUM root_blocknum, TXNID root_xid_that
.nodesize = options->nodesize,
.basementnodesize = options->basementnodesize,
.compression_method = options->compression_method,
.fanout = options->fanout,
.highest_unused_msn_for_upgrade = { .msn = (MIN_MSN.msn - 1) },
.max_msn_in_ft = ZERO_MSN,
.time_of_last_optimize_begin = 0,
......@@ -606,13 +607,16 @@ toku_ft_init(FT ft,
TXNID root_xid_that_created,
uint32_t target_nodesize,
uint32_t target_basementnodesize,
enum toku_compression_method compression_method)
enum toku_compression_method compression_method,
uint32_t fanout
)
{
memset(ft, 0, sizeof *ft);
struct ft_options options = {
.nodesize = target_nodesize,
.basementnodesize = target_basementnodesize,
.compression_method = compression_method,
.fanout = fanout,
.flags = 0,
.compare_fun = NULL,
.update_fun = NULL
......@@ -633,6 +637,7 @@ ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTX
toku_ft_handle_set_nodesize(t, old_h->h->nodesize);
toku_ft_handle_set_basementnodesize(t, old_h->h->basementnodesize);
toku_ft_handle_set_compression_method(t, old_h->h->compression_method);
toku_ft_handle_set_fanout(t, old_h->h->fanout);
CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf);
int r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id);
if (r != 0) {
......@@ -1022,6 +1027,19 @@ void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp
toku_ft_unlock(ft);
}
void toku_ft_set_fanout(FT ft, unsigned int fanout) {
toku_ft_lock(ft);
ft->h->fanout = fanout;
ft->h->dirty = 1;
toku_ft_unlock(ft);
}
void toku_ft_get_fanout(FT ft, unsigned int *fanout) {
toku_ft_lock(ft);
*fanout = ft->h->fanout;
toku_ft_unlock(ft);
}
// mark the ft as a blackhole. any message injections will be a no op.
void toku_ft_set_blackhole(FT_HANDLE ft_handle) {
ft_handle->ft->blackhole = true;
......
......@@ -130,13 +130,14 @@ void toku_ft_note_hot_complete(FT_HANDLE brt, bool success, MSN msn_at_start_of_
void
toku_ft_init(
FT h,
FT ft,
BLOCKNUM root_blocknum_on_disk,
LSN checkpoint_lsn,
TXNID root_xid_that_created,
uint32_t target_nodesize,
uint32_t target_basementnodesize,
enum toku_compression_method compression_method
enum toku_compression_method compression_method,
uint32_t fanout
);
int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result));
......@@ -186,6 +187,8 @@ void toku_ft_set_basementnodesize(FT ft, unsigned int basementnodesize);
void toku_ft_get_basementnodesize(FT ft, unsigned int *basementnodesize);
void toku_ft_set_compression_method(FT ft, enum toku_compression_method method);
void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp);
void toku_ft_set_fanout(FT ft, unsigned int fanout);
void toku_ft_get_fanout(FT ft, unsigned int *fanout);
void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p);
// mark the ft as a blackhole. any message injections will be a no op.
......
......@@ -118,6 +118,7 @@ enum ft_layout_version_e {
FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection
FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902
FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs
FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry
FT_NEXT_VERSION, // the version after the current version
FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported
......
......@@ -288,6 +288,7 @@ struct fractal_thread_args {
uint32_t target_nodesize;
uint32_t target_basementnodesize;
enum toku_compression_method target_compression_method;
uint32_t target_fanout;
};
void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows);
......@@ -319,7 +320,8 @@ int toku_loader_write_brt_from_q_in_C (FTLOADER bl,
int which_db,
uint32_t target_nodesize,
uint32_t target_basementnodesize,
enum toku_compression_method target_compression_method);
enum toku_compression_method target_compression_method,
uint32_t fanout);
int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *);
......
......@@ -561,7 +561,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
}
bl->compress_intermediates = compress_intermediates;
if (0) { // debug
fprintf(stderr, "%s Reserved memory=%ld\n", __FUNCTION__, bl->reserved_memory);
fprintf(stderr, "%s Reserved memory=%" PRId64 "\n", __FUNCTION__, bl->reserved_memory);
}
bl->src_db = src_db;
......@@ -2405,7 +2405,8 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
int which_db,
uint32_t target_nodesize,
uint32_t target_basementnodesize,
enum toku_compression_method target_compression_method)
enum toku_compression_method target_compression_method,
uint32_t target_fanout)
// Effect: Consume a sequence of rowsets work from a queue, creating a fractal tree. Closes fd.
{
// set the number of fractal tree writer threads so that we can partition memory in the merger
......@@ -2434,7 +2435,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
// TODO: (Zardosht/Yoni/Leif), do this code properly
struct ft ft;
toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method);
toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method, target_fanout);
struct dbout out;
ZERO_STRUCT(out);
......@@ -2680,18 +2681,19 @@ int toku_loader_write_brt_from_q_in_C (FTLOADER bl,
int which_db,
uint32_t target_nodesize,
uint32_t target_basementnodesize,
enum toku_compression_method target_compression_method)
enum toku_compression_method target_compression_method,
uint32_t target_fanout)
// This is probably only for testing.
{
target_nodesize = target_nodesize == 0 ? default_loader_nodesize : target_nodesize;
target_basementnodesize = target_basementnodesize == 0 ? default_loader_basementnodesize : target_basementnodesize;
return toku_loader_write_ft_from_q (bl, descriptor, fd, progress_allocation, q, total_disksize_estimate, which_db, target_nodesize, target_basementnodesize, target_compression_method);
return toku_loader_write_ft_from_q (bl, descriptor, fd, progress_allocation, q, total_disksize_estimate, which_db, target_nodesize, target_basementnodesize, target_compression_method, target_fanout);
}
static void* fractal_thread (void *ftav) {
struct fractal_thread_args *fta = (struct fractal_thread_args *)ftav;
int r = toku_loader_write_ft_from_q (fta->bl, fta->descriptor, fta->fd, fta->progress_allocation, fta->q, fta->total_disksize_estimate, fta->which_db, fta->target_nodesize, fta->target_basementnodesize, fta->target_compression_method);
int r = toku_loader_write_ft_from_q (fta->bl, fta->descriptor, fta->fd, fta->progress_allocation, fta->q, fta->total_disksize_estimate, fta->which_db, fta->target_nodesize, fta->target_basementnodesize, fta->target_compression_method, fta->target_fanout);
fta->errno_result = r;
return NULL;
}
......@@ -2727,7 +2729,7 @@ static int loader_do_i (FTLOADER bl,
r = get_error_errno(); goto error;
}
uint32_t target_nodesize, target_basementnodesize;
uint32_t target_nodesize, target_basementnodesize, target_fanout;
enum toku_compression_method target_compression_method;
r = dest_db->get_pagesize(dest_db, &target_nodesize);
invariant_zero(r);
......@@ -2735,6 +2737,8 @@ static int loader_do_i (FTLOADER bl,
invariant_zero(r);
r = dest_db->get_compression_method(dest_db, &target_compression_method);
invariant_zero(r);
r = dest_db->get_fanout(dest_db, &target_fanout);
invariant_zero(r);
// This structure must stay live until the join below.
struct fractal_thread_args fta = { bl,
......@@ -2748,6 +2752,7 @@ static int loader_do_i (FTLOADER bl,
target_nodesize,
target_basementnodesize,
target_compression_method,
target_fanout
};
r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta);
......@@ -2920,7 +2925,9 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } };
uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), &workdone, stats_to_update);
// there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update);
}
static int write_literal(struct dbout *out, void*data, size_t len) {
......
......@@ -143,10 +143,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
typedef struct gc_info_s { bool mvcc_needed; } GC_INFO;
static inline GC_INFO make_gc_info(bool mvcc_needed) { GC_INFO result = {mvcc_needed}; return result; }
// This struct hold information about values stored in the cachetable.
// As one can tell from the names, we are probably violating an
// abstraction layer by placing names.
......
......@@ -247,12 +247,11 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_gc_info *gc_info,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
void
toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
......@@ -260,11 +259,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx,
void* keyp,
uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p);
#endif /* TOKU_LEAFENTRY_H */
......
......@@ -177,6 +177,7 @@ struct tokulogger {
uint64_t num_writes_to_disk; // how many times did we write to disk?
uint64_t bytes_written_to_disk; // how many bytes have been written to disk?
tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf
void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ...
void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted.
......@@ -209,15 +210,12 @@ struct txn_roll_info {
// the spilled rollback head is the block number of the first rollback node
// that makes up the rollback log chain
BLOCKNUM spilled_rollback_head;
uint32_t spilled_rollback_head_hash;
// the spilled rollback is the block number of the last rollback node that
// makes up the rollback log chain.
BLOCKNUM spilled_rollback_tail;
uint32_t spilled_rollback_tail_hash;
// the current rollback node block number we may use. if this is ROLLBACK_NONE,
// then we need to create one and set it here before using it.
BLOCKNUM current_rollback;
uint32_t current_rollback_hash;
};
struct tokutxn {
......@@ -249,7 +247,6 @@ struct tokutxn {
DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
XIDS xids; // Represents the xid list
TXNID oldest_referenced_xid;
TOKUTXN snapshot_next;
TOKUTXN snapshot_prev;
......
......@@ -155,9 +155,7 @@ const struct logtype rollbacks[] = {
{"rollinclude", 'r', FA{{"TXNID_PAIR", "xid", 0},
{"uint64_t", "num_nodes", 0},
{"BLOCKNUM", "spilled_head", 0},
{"uint32_t", "spilled_head_hash", 0},
{"BLOCKNUM", "spilled_tail", 0},
{"uint32_t", "spilled_tail_hash", 0},
NULLFIELD}, LOG_BEGIN_ACTION_NA},
{"load", 'l', FA{{"FILENUM", "old_filenum", 0},
{"BYTESTRING", "new_iname", 0},
......
......@@ -422,9 +422,13 @@ wait_till_output_available (TOKULOGGER logger)
// Exit: Holds the output_condition_lock and logger->output_is_available
//
{
tokutime_t t0 = toku_time_now();
while (!logger->output_is_available) {
toku_cond_wait(&logger->output_condition, &logger->output_condition_lock);
}
if (tokutime_to_seconds(toku_time_now() - t0) >= 0.100) {
logger->num_wait_buf_long++;
}
}
static void
......@@ -1397,6 +1401,7 @@ status_init(void) {
STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
logger_status.initialized = true;
}
#undef STATUS_INIT
......@@ -1414,6 +1419,7 @@ toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
// No compression on logfiles so the uncompressed size is just number of bytes written
STATUS_VALUE(LOGGER_UNCOMPRESSED_BYTES_WRITTEN) = logger->bytes_written_to_disk;
STATUS_VALUE(LOGGER_TOKUTIME_WRITES) = logger->time_spent_writing_to_disk;
STATUS_VALUE(LOGGER_WAIT_BUF_LONG) = logger->num_wait_buf_long;
}
*statp = logger_status;
}
......
......@@ -244,6 +244,7 @@ typedef enum {
LOGGER_BYTES_WRITTEN,
LOGGER_UNCOMPRESSED_BYTES_WRITTEN,
LOGGER_TOKUTIME_WRITES,
LOGGER_WAIT_BUF_LONG,
LOGGER_STATUS_NUM_ROWS
} logger_status_entry;
......
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
#include "ft.h"
#include "ft-ops.h"
#include "log-internal.h"
//#include "txn_manager.h"
#include "xids.h"
#include "rollback-apply.h"
......@@ -115,6 +116,10 @@ PATENT RIGHTS GRANT:
// for each FT_DELETE_ANY message sent earlier by the transaction?
#define TOKU_DO_COMMIT_CMD_DELETE 1
// When a transaction is committed, should we send a FT_COMMIT message
// for each FT_UPDATE message sent earlier by the transaction?
#define TOKU_DO_COMMIT_CMD_UPDATE 0
int
toku_commit_fdelete (FILENUM filenum,
TOKUTXN txn,
......@@ -261,7 +266,16 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
? toku_fill_dbt(&data_dbt, data->data, data->len)
: toku_init_dbt(&data_dbt) } } };
toku_ft_root_put_cmd(h, &ftcmd, txn->oldest_referenced_xid, make_gc_info(!txn->for_recovery));
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
txn_manager_state txn_state_for_gc(txn_manager);
TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
!txn->for_recovery);
toku_ft_root_put_cmd(h, &ftcmd, &gc_info);
if (reset_root_xid_that_created) {
TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
toku_reset_root_xid_that_created(h, new_root_xid_that_created);
......@@ -296,11 +310,15 @@ toku_rollback_cmdinsert (FILENUM filenum,
int
toku_commit_cmdupdate(FILENUM filenum,
BYTESTRING key,
BYTESTRING UU(key),
TOKUTXN txn,
LSN oplsn)
LSN UU(oplsn))
{
#if TOKU_DO_COMMIT_CMD_UPDATE
return do_insertion(FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false);
#else
return do_nothing_with_filenum(txn, filenum);
#endif
}
int
......@@ -365,9 +383,7 @@ static int
toku_apply_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash __attribute__((__unused__)),
BLOCKNUM spilled_tail,
uint32_t spilled_tail_hash,
TOKUTXN txn,
LSN oplsn,
apply_rollback_item func) {
......@@ -375,7 +391,6 @@ toku_apply_rollinclude (TXNID_PAIR xid,
struct roll_entry *item;
BLOCKNUM next_log = spilled_tail;
uint32_t next_log_hash = spilled_tail_hash;
uint64_t last_sequence = num_nodes;
bool found_head = false;
......@@ -383,7 +398,7 @@ toku_apply_rollinclude (TXNID_PAIR xid,
while (next_log.b != ROLLBACK_NONE.b) {
//pin log
ROLLBACK_LOG_NODE log;
toku_get_and_pin_rollback_log(txn, next_log, next_log_hash, &log);
toku_get_and_pin_rollback_log(txn, next_log, &log);
toku_rollback_verify_contents(log, xid, last_sequence - 1);
last_sequence = log->sequence;
......@@ -400,16 +415,13 @@ toku_apply_rollinclude (TXNID_PAIR xid,
assert(log->sequence == 0);
}
next_log = log->previous;
next_log_hash = log->previous_hash;
{
//Clean up transaction structure to prevent
//toku_txn_close from double-freeing
spilled_tail = next_log;
spilled_tail_hash = next_log_hash;
if (found_head) {
assert(next_log.b == ROLLBACK_NONE.b);
spilled_head = next_log;
spilled_head_hash = next_log_hash;
}
}
toku_rollback_log_unpin_and_remove(txn, log);
......@@ -421,15 +433,13 @@ int
toku_commit_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash,
BLOCKNUM spilled_tail,
uint32_t spilled_tail_hash,
TOKUTXN txn,
LSN oplsn) {
int r;
r = toku_apply_rollinclude(xid, num_nodes,
spilled_head, spilled_head_hash,
spilled_tail, spilled_tail_hash,
spilled_head,
spilled_tail,
txn, oplsn,
toku_commit_rollback_item);
return r;
......@@ -439,15 +449,13 @@ int
toku_rollback_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash,
BLOCKNUM spilled_tail,
uint32_t spilled_tail_hash,
TOKUTXN txn,
LSN oplsn) {
int r;
r = toku_apply_rollinclude(xid, num_nodes,
spilled_head, spilled_head_hash,
spilled_tail, spilled_tail_hash,
spilled_head,
spilled_tail,
txn, oplsn,
toku_abort_rollback_item);
return r;
......
......@@ -143,17 +143,14 @@ apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
//printf("%s:%d abort\n", __FILE__, __LINE__);
BLOCKNUM next_log = ROLLBACK_NONE;
uint32_t next_log_hash = 0;
bool is_current = false;
if (txn_has_current_rollback_log(txn)) {
next_log = txn->roll_info.current_rollback;
next_log_hash = txn->roll_info.current_rollback_hash;
is_current = true;
}
else if (txn_has_spilled_rollback_logs(txn)) {
next_log = txn->roll_info.spilled_rollback_tail;
next_log_hash = txn->roll_info.spilled_rollback_tail_hash;
}
uint64_t last_sequence = txn->roll_info.num_rollback_nodes;
......@@ -161,7 +158,7 @@ apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
while (next_log.b != ROLLBACK_NONE.b) {
ROLLBACK_LOG_NODE log;
//pin log
toku_get_and_pin_rollback_log(txn, next_log, next_log_hash, &log);
toku_get_and_pin_rollback_log(txn, next_log, &log);
toku_rollback_verify_contents(log, txn->txnid, last_sequence - 1);
toku_maybe_prefetch_previous_rollback_log(txn, log);
......@@ -180,23 +177,19 @@ apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
assert(log->sequence == 0);
}
next_log = log->previous;
next_log_hash = log->previous_hash;
{
//Clean up transaction structure to prevent
//toku_txn_close from double-freeing
if (is_current) {
txn->roll_info.current_rollback = ROLLBACK_NONE;
txn->roll_info.current_rollback_hash = 0;
is_current = false;
}
else {
txn->roll_info.spilled_rollback_tail = next_log;
txn->roll_info.spilled_rollback_tail_hash = next_log_hash;
}
if (found_head) {
assert(next_log.b == ROLLBACK_NONE.b);
txn->roll_info.spilled_rollback_head = next_log;
txn->roll_info.spilled_rollback_head_hash = next_log_hash;
}
}
bool give_back = false;
......@@ -228,13 +221,11 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
num_nodes--; //Don't count the in-progress rollback log.
}
toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid, num_nodes,
txn->roll_info.spilled_rollback_head, txn->roll_info.spilled_rollback_head_hash,
txn->roll_info.spilled_rollback_tail, txn->roll_info.spilled_rollback_tail_hash);
txn->roll_info.spilled_rollback_head,
txn->roll_info.spilled_rollback_tail);
//Remove ownership from child.
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
txn->roll_info.spilled_rollback_head_hash = 0;
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
txn->roll_info.spilled_rollback_tail_hash = 0;
}
// if we're commiting a child rollback, put its entries into the parent
// by pinning both child and parent and then linking the child log entry
......@@ -247,8 +238,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
//Pin child log
ROLLBACK_LOG_NODE child_log;
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback,
txn->roll_info.current_rollback_hash, &child_log);
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &child_log);
toku_rollback_verify_contents(child_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1);
// Append the list to the front of the parent.
......@@ -284,7 +274,6 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
toku_rollback_log_unpin_and_remove(txn, child_log);
}
txn->roll_info.current_rollback = ROLLBACK_NONE;
txn->roll_info.current_rollback_hash = 0;
toku_maybe_spill_rollbacks(txn->parent, parent_log);
toku_rollback_log_unpin(txn->parent, parent_log);
......
......@@ -219,13 +219,13 @@ void toku_rollback_flush_callback (
}
}
int toku_rollback_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash,
int toku_rollback_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash UU(),
void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs) {
int r;
FT CAST_FROM_VOIDP(h, extraargs);
assert(h->cf == cachefile);
ROLLBACK_LOG_NODE *result = (ROLLBACK_LOG_NODE*)rollback_pv;
r = toku_deserialize_rollback_log_from(fd, logname, fullhash, result, h);
r = toku_deserialize_rollback_log_from(fd, logname, result, h);
if (r==0) {
(*result)->ct_pair = p;
*sizep = rollback_memory_size(*result);
......@@ -249,13 +249,14 @@ void toku_rollback_pe_est_callback(
// callback for partially evicting a cachetable entry
int toku_rollback_pe_callback (
void *rollback_v,
PAIR_ATTR UU(old_attr),
PAIR_ATTR* new_attr,
void* UU(extraargs)
PAIR_ATTR old_attr,
void* UU(extraargs),
void (*finalize)(PAIR_ATTR new_attr, void * extra),
void *finalize_extra
)
{
assert(rollback_v != NULL);
*new_attr = old_attr;
finalize(old_attr, finalize_extra);
return 0;
}
......
......@@ -107,10 +107,11 @@ void toku_rollback_pe_est_callback(
);
int toku_rollback_pe_callback (
void *rollback_v,
PAIR_ATTR UU(old_attr),
PAIR_ATTR* new_attr,
void* UU(extraargs)
) ;
PAIR_ATTR old_attr,
void* UU(extraargs),
void (*finalize)(PAIR_ATTR new_attr, void * extra),
void *finalize_extra
);
bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) ;
int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep));
void toku_rollback_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
......
......@@ -158,7 +158,7 @@ static void toku_rollback_node_save_ct_pair(CACHEKEY UU(key), void *value_data,
//
// initializes an empty rollback log node
// Does not touch the blocknum or hash, that is the
// Does not touch the blocknum, that is the
// responsibility of the caller
//
void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
......@@ -173,7 +173,6 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
log->dirty = true;
log->sequence = 0;
log->previous = make_blocknum(0);
log->previous_hash = 0;
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = NULL;
......@@ -185,14 +184,12 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
static void rollback_initialize_for_txn(
ROLLBACK_LOG_NODE log,
TOKUTXN txn,
BLOCKNUM previous,
uint32_t previous_hash
BLOCKNUM previous
)
{
log->txnid = txn->txnid;
log->sequence = txn->roll_info.num_rollback_nodes++;
log->previous = previous;
log->previous_hash = previous_hash;
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = memarena_create();
......@@ -206,12 +203,11 @@ void make_rollback_log_empty(ROLLBACK_LOG_NODE log) {
}
// create and pin a new rollback log node. chain it to the other rollback nodes
// by providing a previous blocknum/ hash and assigning the new rollback log
// by providing a previous blocknum and assigning the new rollback log
// node the next sequence number
static void rollback_log_create (
TOKUTXN txn,
BLOCKNUM previous,
uint32_t previous_hash,
ROLLBACK_LOG_NODE *result
)
{
......@@ -220,16 +216,15 @@ static void rollback_log_create (
CACHEFILE cf = txn->logger->rollback_cachefile;
FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf));
rollback_initialize_for_txn(log, txn, previous, previous_hash);
rollback_initialize_for_txn(log, txn, previous);
toku_allocate_blocknum(ft->blocktable, &log->blocknum, ft);
log->hash = toku_cachetable_hash(ft->cf, log->blocknum);
const uint32_t hash = toku_cachetable_hash(ft->cf, log->blocknum);
*result = log;
toku_cachetable_put(cf, log->blocknum, log->hash,
toku_cachetable_put(cf, log->blocknum, hash,
log, rollback_memory_size(log),
get_write_callbacks_for_rollback_log(ft),
toku_rollback_node_save_ct_pair);
txn->roll_info.current_rollback = log->blocknum;
txn->roll_info.current_rollback_hash = log->hash;
}
void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
......@@ -255,14 +250,11 @@ void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
if (!txn_has_spilled_rollback_logs(txn)) {
//First spilled. Copy to head.
txn->roll_info.spilled_rollback_head = txn->roll_info.current_rollback;
txn->roll_info.spilled_rollback_head_hash = txn->roll_info.current_rollback_hash;
}
//Unconditionally copy to tail. Old tail does not need to be cached anymore.
txn->roll_info.spilled_rollback_tail = txn->roll_info.current_rollback;
txn->roll_info.spilled_rollback_tail_hash = txn->roll_info.current_rollback_hash;
txn->roll_info.current_rollback = ROLLBACK_NONE;
txn->roll_info.current_rollback_hash = 0;
}
}
......@@ -311,8 +303,8 @@ void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE lo
BLOCKNUM name = log->previous;
int r = 0;
if (name.b != ROLLBACK_NONE.b) {
uint32_t hash = log->previous_hash;
CACHEFILE cf = txn->logger->rollback_cachefile;
uint32_t hash = toku_cachetable_hash(cf, name);
FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf));
bool doing_prefetch = false;
r = toku_cachefile_prefetch(cf, name, hash,
......@@ -334,10 +326,11 @@ void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log,
assert(log->sequence == sequence);
}
void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, uint32_t hash, ROLLBACK_LOG_NODE *log) {
void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log) {
void * value;
CACHEFILE cf = txn->logger->rollback_cachefile;
FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf));
uint32_t hash = toku_cachetable_hash(cf, blocknum);
int r = toku_cachetable_get_and_pin_with_dep_pairs(cf, blocknum, hash,
&value, NULL,
get_write_callbacks_for_rollback_log(h),
......@@ -351,7 +344,6 @@ void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, uint32_t hash
assert(r == 0);
ROLLBACK_LOG_NODE CAST_FROM_VOIDP(pinned_log, value);
assert(pinned_log->blocknum.b == blocknum.b);
assert(pinned_log->hash == hash);
*log = pinned_log;
}
......@@ -359,7 +351,7 @@ void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE
ROLLBACK_LOG_NODE pinned_log = NULL;
invariant(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); // hot indexing may call this function for prepared transactions
if (txn_has_current_rollback_log(txn)) {
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, txn->roll_info.current_rollback_hash, &pinned_log);
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &pinned_log);
toku_rollback_verify_contents(pinned_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1);
} else {
// For each transaction, we try to acquire the first rollback log
......@@ -378,15 +370,13 @@ void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE
rollback_initialize_for_txn(
pinned_log,
txn,
txn->roll_info.spilled_rollback_tail,
txn->roll_info.spilled_rollback_tail_hash
txn->roll_info.spilled_rollback_tail
);
txn->roll_info.current_rollback = pinned_log->blocknum;
txn->roll_info.current_rollback_hash = pinned_log->hash;
}
}
if (pinned_log == NULL) {
rollback_log_create(txn, txn->roll_info.spilled_rollback_tail, txn->roll_info.spilled_rollback_tail_hash, &pinned_log);
rollback_log_create(txn, txn->roll_info.spilled_rollback_tail, &pinned_log);
}
}
assert(pinned_log->txnid.parent_id64 == txn->txnid.parent_id64);
......
......@@ -103,8 +103,8 @@ void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t sta
// is a current rollback node to use, pin it, otherwise create one.
void toku_get_and_pin_rollback_log_for_new_entry(TOKUTXN txn, ROLLBACK_LOG_NODE *log);
// get a specific rollback by blocknum and hash
void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, uint32_t hash, ROLLBACK_LOG_NODE *log);
// get a specific rollback by blocknum
void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log);
// unpin a rollback node from the cachetable
void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log);
......@@ -155,11 +155,9 @@ struct rollback_log_node {
// the sequence is between 0 and totalnodes-1
uint64_t sequence;
BLOCKNUM blocknum; // on which block does this node live?
uint32_t hash;
// which block number is the previous in the chain of rollback nodes
// that make up this rollback log?
BLOCKNUM previous;
uint32_t previous_hash;
struct roll_entry *oldest_logentry;
struct roll_entry *newest_logentry;
MEMARENA rollentry_arena;
......
......@@ -96,7 +96,6 @@ PATENT RIGHTS GRANT:
void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) {
XMALLOC_N(max_num_avail_nodes, m_avail_blocknums);
XMALLOC_N(max_num_avail_nodes, m_hashes);
m_max_num_avail = max_num_avail_nodes;
m_first = 0;
m_num_avail = 0;
......@@ -110,7 +109,6 @@ void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) {
void rollback_log_node_cache::destroy() {
toku_mutex_destroy(&m_mutex);
toku_free(m_avail_blocknums);
toku_free(m_hashes);
}
// returns true if rollback log node was successfully added,
......@@ -125,7 +123,6 @@ bool rollback_log_node_cache::give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_N
index -= m_max_num_avail;
}
m_avail_blocknums[index].b = log->blocknum.b;
m_hashes[index] = log->hash;
m_num_avail++;
}
toku_mutex_unlock(&m_mutex);
......@@ -144,11 +141,9 @@ bool rollback_log_node_cache::give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_N
// for getting a rollback log node
void rollback_log_node_cache::get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE* log){
BLOCKNUM b = ROLLBACK_NONE;
uint32_t hash;
toku_mutex_lock(&m_mutex);
if (m_num_avail > 0) {
b.b = m_avail_blocknums[m_first].b;
hash = m_hashes[m_first];
m_num_avail--;
if (++m_first >= m_max_num_avail) {
m_first = 0;
......@@ -156,7 +151,7 @@ void rollback_log_node_cache::get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NO
}
toku_mutex_unlock(&m_mutex);
if (b.b != ROLLBACK_NONE.b) {
toku_get_and_pin_rollback_log(txn, b, hash, log);
toku_get_and_pin_rollback_log(txn, b, log);
invariant(rollback_log_is_unused(*log));
} else {
*log = NULL;
......
......@@ -108,7 +108,6 @@ class rollback_log_node_cache {
private:
BLOCKNUM* m_avail_blocknums;
uint32_t* m_hashes;
uint32_t m_first;
uint32_t m_num_avail;
uint32_t m_max_num_avail;
......
......@@ -103,15 +103,16 @@ static int
pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = make_pair_attr(1);
if (check_pe_callback) {
pe_callback_called = true;
}
usleep(4*1024*1024);
finalize(make_pair_attr(1), finalize_extra);
return 0;
}
......
......@@ -97,7 +97,7 @@ PATENT RIGHTS GRANT:
// #5978 is fixed. Here is what we do. We have four pairs with
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
// We pin all four with expensive write locks. Then, on backgroud threads,
// We pin all four with expensive write locks. Then, on backgroud threads,
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
// enough times, and we should see a deadlock before the fix, and no deadlock
......
......@@ -155,15 +155,16 @@ static int
pe_callback (
void *ftnode_pv,
PAIR_ATTR UU(bytes_to_free),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = make_pair_attr(bytes_to_free.size-1);
expected_bytes_to_free--;
int* CAST_FROM_VOIDP(foo, ftnode_pv);
int blah = *foo;
*foo = blah-1;
finalize(make_pair_attr(bytes_to_free.size-1), finalize_extra);
return 0;
}
......@@ -171,10 +172,12 @@ static int
other_pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed __attribute__((__unused__)),
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
finalize(bytes_to_free, finalize_extra);
return 0;
}
......
......@@ -169,17 +169,18 @@ static int
pe_callback (
void *ftnode_pv,
PAIR_ATTR UU(bytes_to_free),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = make_pair_attr(bytes_to_free.size-1);
usleep(1*1024*1024);
if (verbose) printf("calling pe_callback\n");
expected_bytes_to_free--;
int* CAST_FROM_VOIDP(foo, ftnode_pv);
int blah = *foo;
*foo = blah-1;
finalize(make_pair_attr(bytes_to_free.size-1), finalize_extra);
return 0;
}
......@@ -187,11 +188,12 @@ static int
other_pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed __attribute__((__unused__)),
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = bytes_to_free;
finalize(bytes_to_free, finalize_extra);
return 0;
}
......
......@@ -164,12 +164,13 @@ static int
pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
assert(false);
*bytes_freed = bytes_to_free;
finalize(bytes_to_free, finalize_extra);
return 0;
}
......
......@@ -154,12 +154,13 @@ static int
pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = make_pair_attr(bytes_to_free.size-7);
sleep(2);
finalize(bytes_to_free, finalize_extra);
return 0;
}
......
......@@ -110,12 +110,13 @@ static int
pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR new_attr, void *extra),
void *finalize_extra
)
{
sleep(3);
*bytes_freed = make_pair_attr(bytes_to_free.size-7);
finalize(make_pair_attr(bytes_to_free.size - 7), finalize_extra);
return 0;
}
......
......@@ -116,7 +116,9 @@ void evictor_unit_test::init() {
ZERO_STRUCT(m_cf_list);
m_pl.init();
m_cf_list.init();
m_kb = toku_kibbutz_create(1);
m_kb = NULL;
int r = toku_kibbutz_create(1, &m_kb);
assert(r == 0);
}
// destroy class after tests have run
......
......@@ -146,7 +146,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,0) == PT_AVAIL);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_COMPRESSED);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
......@@ -168,7 +168,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
......@@ -190,7 +190,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_ON_DISK);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_ON_DISK);
......@@ -211,7 +211,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
......@@ -232,7 +232,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,0) == PT_AVAIL);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_COMPRESSED);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
......@@ -292,11 +292,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
// need to call this twice because we had a subset read before, that touched the clock
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
......@@ -317,11 +317,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_AVAIL);
// need to call this twice because we had a subset read before, that touched the clock
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
......@@ -341,11 +341,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) {
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_ON_DISK);
// need to call this twice because we had a subset read before, that touched the clock
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_AVAIL);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_ON_DISK);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(dn,0) == PT_COMPRESSED);
assert(BP_STATE(dn,1) == PT_COMPRESSED);
assert(BP_STATE(dn,2) == PT_ON_DISK);
......@@ -420,7 +420,8 @@ test_prefetching(void) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
......
......@@ -154,12 +154,12 @@ test1(int fd, FT brt_h, FTNODE *dn) {
// should sweep and NOT get rid of anything
PAIR_ATTR attr;
memset(&attr,0,sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
......@@ -172,12 +172,12 @@ test1(int fd, FT brt_h, FTNODE *dn) {
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
assert(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_AVAIL);
}
// should sweep and get compress all
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
if (!is_leaf) {
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
......@@ -190,15 +190,15 @@ test1(int fd, FT brt_h, FTNODE *dn) {
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
assert(req);
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_AVAIL);
}
(*dn)->dirty = 1;
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_AVAIL);
}
......@@ -252,11 +252,11 @@ test2(int fd, FT brt_h, FTNODE *dn) {
assert(!BP_SHOULD_EVICT(*dn, 1));
PAIR_ATTR attr;
memset(&attr,0,sizeof(attr));
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
assert(BP_STATE(*dn, 1) == PT_AVAIL);
assert(BP_SHOULD_EVICT(*dn, 1));
toku_ftnode_pe_callback(*dn, attr, &attr, brt_h);
toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr);
assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
......@@ -355,7 +355,8 @@ test_serialize_nonleaf(void) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......@@ -438,7 +439,8 @@ test_serialize_leaf(void) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......
......@@ -189,7 +189,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
brt_h->compare_fun = long_key_cmp;
......@@ -319,7 +320,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
brt_h->compare_fun = long_key_cmp;
......
......@@ -187,8 +187,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_
// if read_none, get rid of the compressed bp's
if (bft == read_none) {
if ((*dn)->height == 0) {
PAIR_ATTR attr;
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
// assert all bp's are on disk
for (int i = 0; i < (*dn)->n_children; i++) {
if ((*dn)->height == 0) {
......@@ -213,14 +212,14 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_AVAIL);
}
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
// assert all bp's are still available, because we touched the clock
assert(BP_STATE(*dn,i) == PT_AVAIL);
// now assert all should be evicted
assert(BP_SHOULD_EVICT(*dn, i));
}
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), &attr, brt_h);
toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr);
for (int i = 0; i < (*dn)->n_children; i++) {
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
}
......@@ -306,7 +305,8 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
......@@ -449,7 +449,8 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
......@@ -586,7 +587,8 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......@@ -733,7 +735,8 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......@@ -881,7 +884,8 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......@@ -1009,7 +1013,8 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......@@ -1134,7 +1139,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
TXNID_NONE,
4*1024*1024,
128*1024,
TOKU_DEFAULT_COMPRESSION_METHOD);
TOKU_DEFAULT_COMPRESSION_METHOD,
16);
brt->ft = brt_h;
toku_blocktable_create_new(&brt_h->blocktable);
......
......@@ -170,7 +170,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail) {
}
FTLOADER loader;
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, TXNID_NONE, true, 0, false);
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false);
assert(r == 0);
struct rowset *rowset[nrowsets];
......
......@@ -180,7 +180,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char
sprintf(temp, "%s/%s", testdir, "tempXXXXXX");
FTLOADER loader;
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, TXNID_NONE, true, 0, false);
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false);
assert(r == 0);
struct rowset *rowset[nrowsets];
......
......@@ -402,7 +402,7 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) {
sprintf(temp, "%s/%s", testdir, "tempXXXXXX");
FTLOADER loader;
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, temp, ZERO_LSN, TXNID_NONE, true, 0, false);
r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, temp, ZERO_LSN, nullptr, true, 0, false);
assert(r == 0);
struct rowset *rowset[nrowsets];
......
......@@ -412,7 +412,7 @@ static void test (const char *directory, bool is_error) {
bt_compare_functions,
"tempxxxxxx",
*lsnp,
TXNID_NONE, true, 0, false);
nullptr, true, 0, false);
assert(r==0);
}
......
......@@ -143,7 +143,7 @@ static void test_loader_open(int ndbs) {
for (i = 0; ; i++) {
set_my_malloc_trigger(i+1);
r = toku_ft_loader_open(&loader, NULL, NULL, NULL, ndbs, brts, dbs, fnames, compares, "", ZERO_LSN, TXNID_NONE, true, 0, false);
r = toku_ft_loader_open(&loader, NULL, NULL, NULL, ndbs, brts, dbs, fnames, compares, "", ZERO_LSN, nullptr, true, 0, false);
if (r == 0)
break;
}
......
......@@ -213,7 +213,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec
ft_loader_set_error_function(&bl.error_callback, NULL, NULL);
ft_loader_set_poll_function(&bl.poll_callback, loader_poll_callback, NULL);
result = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD);
result = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16);
toku_set_func_malloc_only(NULL);
toku_set_func_realloc_only(NULL);
......
......@@ -262,7 +262,7 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI
assert(fd>=0);
if (verbose) traceit("write to file");
r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD);
r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16);
assert(r==0);
r = queue_destroy(q2);
......
......@@ -425,7 +425,7 @@ static void test_merge_files (const char *tf_template, const char *output_name)
int fd = open(output_name, O_RDWR | O_CREAT | O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO);
assert(fd>=0);
r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD);
r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16);
assert(r==0);
destroy_merge_fileset(&fs);
......
......@@ -124,8 +124,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
MSN msn = next_dummymsn();
// apply an insert to the leaf node
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn;
......
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
#include "test.h"
#include "bndata.h"
static void
le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize)
{
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
bn->get_space_for_insert(
idx,
key,
keysize,
size_needed,
&r
);
resource_assert(r);
r->type = LE_CLEAN;
r->u.clean.vallen = valsize;
memcpy(r->u.clean.val, val, valsize);
}
static void
le_overwrite(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize) {
LEAFENTRY r = NULL;
uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
bn->get_space_for_overwrite(
idx,
key,
keysize,
size_needed, // old_le_size
size_needed,
&r
);
resource_assert(r);
r->type = LE_CLEAN;
r->u.clean.vallen = valsize;
memcpy(r->u.clean.val, val, valsize);
}
class bndata_bugfix_test {
public:
void
run_test(void) {
// struct ft_handle source_ft;
struct ftnode sn;
// just copy this code from a previous test
// don't care what it does, just want to get a node up and running
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
sn.layout_version = FT_LAYOUT_VERSION;
sn.layout_version_original = FT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 2;
sn.dirty = 1;
sn.oldest_referenced_xid_known = TXNID_NONE;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(1, sn.childkeys);
toku_memdup_dbt(&sn.childkeys[0], "b", 2);
sn.totalchildkeylens = 2;
BP_STATE(&sn,0) = PT_AVAIL;
BP_STATE(&sn,1) = PT_AVAIL;
set_BLB(&sn, 0, toku_create_empty_bn());
set_BLB(&sn, 1, toku_create_empty_bn());
le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5);
le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
// now this is the test. If I keep getting space for overwrite
// like crazy, it should expose the bug
bn_data* bnd = BLB_DATA(&sn, 0);
size_t old_size = bnd->m_buffer_mempool.size;
if (verbose) printf("frag size: %zu\n", bnd->m_buffer_mempool.frag_size);
if (verbose) printf("size: %zu\n", bnd->m_buffer_mempool.size);
for (uint32_t i = 0; i < 1000000; i++) {
le_overwrite(bnd, 0, "a", 2, "aval", 5);
}
if (verbose) printf("frag size: %zu\n", bnd->m_buffer_mempool.frag_size);
if (verbose) printf("size: %zu\n", bnd->m_buffer_mempool.size);
size_t new_size = bnd->m_buffer_mempool.size;
// just a crude test to make sure we did not grow unbounded.
// if this assert ever fails, revisit the code and see what is going
// on. It may be that some algorithm has changed.
assert(new_size < 5*old_size);
for (int i = 0; i < sn.n_children-1; ++i) {
toku_free(sn.childkeys[i].data);
}
for (int i = 0; i < sn.n_children; i++) {
destroy_basement_node(BLB(&sn, i));
}
toku_free(sn.bp);
toku_free(sn.childkeys);
}
};
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
bndata_bugfix_test t;
t.run_test();
return 0;
}
......@@ -132,8 +132,9 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
MSN msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, &gc_info, nullptr, nullptr);
{
int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair);
assert(r==0);
......@@ -141,7 +142,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
}
FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, &gc_info, nullptr, nullptr);
// message should be rejected for duplicate msn, row should still have original val
{
......@@ -154,7 +155,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, &gc_info, nullptr, nullptr);
// message should be accepted, val should have new value
{
......@@ -166,7 +167,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
// now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10;
FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, &gc_info, nullptr, nullptr);
// message should be rejected, val should still have value in pair2
{
......
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char *fname = TOKU_TEST_FILENAME;
static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
static int dummy_cmp(DB *db __attribute__((unused)),
const DBT *a, const DBT *b) {
......@@ -217,8 +218,8 @@ insert_random_message_to_bn(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn;
}
......@@ -267,12 +268,12 @@ insert_same_message_to_bns(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn;
}
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn;
}
......@@ -684,7 +685,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -908,7 +909,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -1104,8 +1105,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......
......@@ -260,8 +260,7 @@ doit (bool keep_other_bn_in_memory) {
assert(node->n_children == 2);
// a hack to get the basement nodes evicted
for (int i = 0; i < 20; i++) {
PAIR_ATTR attr;
toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), &attr, brt->ft);
toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), brt->ft, def_pe_finalize_impl, nullptr);
}
// this ensures that when we do the lookups below,
// that the data is read off disk
......
......@@ -205,7 +205,8 @@ doit (void) {
toku_fill_dbt(&left, "g", 2);
DBT right;
toku_fill_dbt(&right, "n", 2);
r = toku_ft_hot_optimize(t, &left, &right, NULL, NULL);
uint64_t loops_run = 0;
r = toku_ft_hot_optimize(t, &left, &right, NULL, NULL, &loops_run);
assert(r==0);
// at this point, we have should have flushed
......
......@@ -130,10 +130,10 @@ const char *fname = TOKU_TEST_FILENAME;
static void
doit (int ksize __attribute__((__unused__))) {
BLOCKNUM cnodes[FT_FANOUT], bnode, anode;
BLOCKNUM cnodes[16], bnode, anode;
char *keys[FT_FANOUT-1];
int keylens[FT_FANOUT-1];
char *keys[16-1];
int keylens[16-1];
int i;
int r;
......@@ -144,7 +144,7 @@ doit (int ksize __attribute__((__unused__))) {
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
for (i=0; i<FT_FANOUT; i++) {
for (i=0; i<16; i++) {
r=toku_testsetup_leaf(t, &cnodes[i], 1, NULL, NULL);
assert(r==0);
char key[KSIZE+10];
......@@ -156,16 +156,16 @@ doit (int ksize __attribute__((__unused__))) {
}
// Now we have a bunch of leaves, all of which are with 100 bytes of full.
for (i=0; i+1<FT_FANOUT; i++) {
for (i=0; i+1<16; i++) {
char key[TOKU_PSIZE];
keylens[i]=1+snprintf(key, TOKU_PSIZE, "%08d", (i+1)*10000);
keys[i]=toku_strdup(key);
}
r = toku_testsetup_nonleaf(t, 1, &bnode, FT_FANOUT, cnodes, keys, keylens);
r = toku_testsetup_nonleaf(t, 1, &bnode, 16, cnodes, keys, keylens);
assert(r==0);
for (i=0; i+1<FT_FANOUT; i++) {
for (i=0; i+1<16; i++) {
toku_free(keys[i]);
}
......
......@@ -453,12 +453,12 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
size_t result_memsize = 0;
int64_t ignoreme;
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_le_apply_msg(msg,
le_initial,
nullptr,
0,
TXNID_NONE,
make_gc_info(true),
&gc_info,
&le_result,
&ignoreme);
if (le_result) {
......@@ -751,7 +751,8 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_reference
LEAFENTRY le;
int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r);
invariant_notnull(le);
bool worth_running = toku_le_worth_running_garbage_collection(le, oldest_referenced_xid_known);
txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);
toku_free(le);
return worth_running;
}
......
......@@ -369,8 +369,7 @@ doit (void) {
assert_zero(r);
toku_pin_node_with_min_bfe(&node, node_internal, t);
for (int i = 0; i < 20; i++) {
PAIR_ATTR attr;
toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), &attr, t->ft);
toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), t->ft, def_pe_finalize_impl, nullptr);
}
assert(BP_STATE(node,0) == PT_COMPRESSED);
toku_unpin_ftnode(t->ft, node);
......
......@@ -239,17 +239,21 @@ def_pe_est_callback(
}
static UU() int
def_pe_callback (
def_pe_callback(
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
)
void* extraargs __attribute__((__unused__)),
void (*finalize)(PAIR_ATTR bytes_freed, void *extra),
void *finalize_extra
)
{
*bytes_freed = bytes_to_free;
finalize(bytes_to_free, finalize_extra);
return 0;
}
static UU() void
def_pe_finalize_impl(PAIR_ATTR UU(bytes_freed), void *UU(extra)) { }
static UU() bool def_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) {
return false;
}
......
......@@ -144,7 +144,8 @@ static int
do_hot_optimize(FT_HANDLE t, CACHETABLE UU(ct), void *extra)
{
float *CAST_FROM_VOIDP(fraction, extra);
int r = toku_ft_hot_optimize(t, NULL, NULL, progress, extra);
uint64_t loops_run = 0;
int r = toku_ft_hot_optimize(t, NULL, NULL, progress, extra, &loops_run);
if (*fraction < 1.0) {
CKERR2(r, 1);
} else {
......
......@@ -128,7 +128,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -119,7 +119,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -239,7 +239,6 @@ toku_txn_begin_with_xid (
}
else {
parent->child_manager->start_child_txn_for_recovery(txn, parent, xid);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
}
}
else {
......@@ -255,7 +254,6 @@ toku_txn_begin_with_xid (
}
else {
parent->child_manager->start_child_txn(txn, parent);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
toku_txn_manager_handle_snapshot_create_for_child_txn(
txn,
logger->txn_manager,
......@@ -308,11 +306,8 @@ static void toku_txn_create_txn (
.num_rollentries_processed = 0,
.rollentry_raw_count = 0,
.spilled_rollback_head = ROLLBACK_NONE,
.spilled_rollback_head_hash = 0,
.spilled_rollback_tail = ROLLBACK_NONE,
.spilled_rollback_tail_hash = 0,
.current_rollback = ROLLBACK_NONE,
.current_rollback_hash = 0,
};
static txn_child_manager tcm;
......@@ -330,7 +325,6 @@ static txn_child_manager tcm;
.container_db_txn = container_db_txn,
.live_root_txn_list = nullptr,
.xids = NULL,
.oldest_referenced_xid = TXNID_NONE,
.snapshot_next = NULL,
.snapshot_prev = NULL,
.begin_was_logged = false,
......@@ -405,17 +399,9 @@ toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info) {
txn->roll_info.num_rollback_nodes = info->num_rollback_nodes;
txn->roll_info.num_rollentries = info->num_rollentries;
CACHEFILE rollback_cachefile = txn->logger->rollback_cachefile;
txn->roll_info.spilled_rollback_head = info->spilled_rollback_head;
txn->roll_info.spilled_rollback_head_hash = toku_cachetable_hash(rollback_cachefile,
txn->roll_info.spilled_rollback_head);
txn->roll_info.spilled_rollback_tail = info->spilled_rollback_tail;
txn->roll_info.spilled_rollback_tail_hash = toku_cachetable_hash(rollback_cachefile,
txn->roll_info.spilled_rollback_tail);
txn->roll_info.current_rollback = info->current_rollback;
txn->roll_info.current_rollback_hash = toku_cachetable_hash(rollback_cachefile,
txn->roll_info.current_rollback);
return 0;
}
......
......@@ -291,6 +291,7 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
txn_manager->last_xid = 0;
txn_manager->last_xid_seen_for_recover = TXNID_NONE;
txn_manager->last_calculated_oldest_referenced_xid = TXNID_NONE;
*txn_managerp = txn_manager;
}
......@@ -324,6 +325,10 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) {
return rval;
}
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) {
return txn_manager->last_calculated_oldest_referenced_xid;
}
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids);
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){
(*referenced_xids)[index] = live_xid->txnid.parent_id64;
......@@ -371,7 +376,7 @@ max_xid(TXNID a, TXNID b) {
return a < b ? b : a;
}
static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) {
TXNID oldest_referenced_xid = TXNID_MAX;
int r;
if (txn_manager->live_root_ids.size() > 0) {
......@@ -397,8 +402,8 @@ static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
if (txn_manager->last_xid < oldest_referenced_xid) {
oldest_referenced_xid = txn_manager->last_xid;
}
paranoid_invariant(oldest_referenced_xid != TXNID_MAX);
return oldest_referenced_xid;
invariant(oldest_referenced_xid != TXNID_MAX);
txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid;
}
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
......@@ -610,7 +615,6 @@ void toku_txn_manager_start_txn_for_recovery(
// using xid that is passed in
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
toku_txn_update_xids_in_txn(txn, xid);
txn->oldest_referenced_xid = TXNID_NONE;
uint32_t idx;
int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx);
......@@ -672,7 +676,7 @@ void toku_txn_manager_start_txn(
r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx);
invariant_zero(r);
}
txn->oldest_referenced_xid = get_oldest_referenced_xid_unlocked(txn_manager);
set_oldest_referenced_xid(txn_manager);
if (needs_snapshot) {
txn_manager_create_snapshot_unlocked(
......@@ -825,7 +829,17 @@ void toku_txn_manager_clone_state_for_gc(
txn_manager_unlock(txn_manager);
}
void txn_manager_state::init() {
invariant(!initialized);
invariant_notnull(txn_manager);
toku_txn_manager_clone_state_for_gc(
txn_manager,
&snapshot_xids,
&referenced_xids,
&live_root_txns
);
initialized = true;
}
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) {
TOKUTXN txn;
......
......@@ -121,14 +121,74 @@ struct txn_manager {
TXNID last_xid;
TXNID last_xid_seen_for_recover;
TXNID last_calculated_oldest_referenced_xid;
};
struct txn_manager_state {
txn_manager_state(TXN_MANAGER mgr) :
txn_manager(mgr),
initialized(false) {
snapshot_xids.create_no_array();
referenced_xids.create_no_array();
live_root_txns.create_no_array();
}
// should not copy construct
txn_manager_state &operator=(txn_manager_state &rhs) = delete;
txn_manager_state(txn_manager_state &rhs) = delete;
~txn_manager_state() {
snapshot_xids.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
}
void init();
TXN_MANAGER txn_manager;
bool initialized;
// a snapshot of the txn manager's mvcc state
// only valid if initialized = true
xid_omt_t snapshot_xids;
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
};
// represents all of the information needed to run garbage collection
struct txn_gc_info {
txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc)
: txn_state_for_gc(st),
oldest_referenced_xid_for_simple_gc(xid_sgc),
oldest_referenced_xid_for_implicit_promotion(xid_ip),
mvcc_needed(mvcc) {
}
// a snapshot of the transcation system. may be null.
txn_manager_state *txn_state_for_gc;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
// lower bound on the oldest xid in any live when the messages to be cleaned
// had no messages above them. suitable for implicitly promoting a provisonal uxr.
TXNID oldest_referenced_xid_for_implicit_promotion;
// whether or not mvcc is actually needed - false during recovery and non-transactional systems
const bool mvcc_needed;
};
void toku_txn_manager_init(TXN_MANAGER* txn_manager);
void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager);
void toku_txn_manager_handle_snapshot_create_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
......
This diff is collapsed.
......@@ -102,6 +102,9 @@ PATENT RIGHTS GRANT:
#include "txn_manager.h"
#include <util/mempool.h>
void toku_ule_status_init(void);
void toku_ule_status_destroy(void);
// opaque handles used by outside world (i.e. indexer)
typedef struct ule *ULEHANDLE;
typedef struct uxr *UXRHANDLE;
......
......@@ -125,16 +125,15 @@ class lock_request {
WRITE
};
// effect: Initializes a lock request with a given wait time.
void create(uint64_t wait_time);
// effect: Initializes a lock request.
void create(void);
// effect: Destroys a lock request.
void destroy(void);
// effect: Resets the lock request parameters, allowing it to be reused.
// requires: Lock request was already created at some point
void set(locktree *lt, TXNID txnid,
const DBT *left_key, const DBT *right_key, type lock_type);
void set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key, type lock_type, bool big_txn);
// effect: Tries to acquire a lock described by this lock request.
// returns: The return code of locktree::acquire_[write,read]_lock()
......@@ -144,7 +143,8 @@ class lock_request {
// effect: Sleeps until either the request is granted or the wait time expires.
// returns: The return code of locktree::acquire_[write,read]_lock()
// or simply DB_LOCK_NOTGRANTED if the wait time expired.
int wait(void);
int wait(uint64_t wait_time_ms);
int wait(uint64_t wait_time_ms, uint64_t killed_time_ms, int (*killed_callback)(void));
// return: left end-point of the lock range
const DBT *get_left_key(void) const;
......@@ -196,9 +196,10 @@ class lock_request {
int m_complete_r;
state m_state;
uint64_t m_wait_time;
toku_cond_t m_wait_cond;
bool m_big_txn;
// the lock request info state stored in the
// locktree that this lock request is for.
struct locktree::lt_lock_request_info *m_info;
......@@ -235,8 +236,6 @@ class lock_request {
void copy_keys(void);
void calculate_cond_wakeup_time(struct timespec *ts);
static int find_by_txnid(lock_request * const &request, const TXNID &txnid);
friend class lock_request_unit_test;
......
......@@ -119,6 +119,7 @@ namespace toku {
void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id,
DESCRIPTOR desc, ft_compare_func cmp) {
m_mem_tracker = mem_tracker;
m_mgr = mem_tracker->get_manager();
m_dict_id = dict_id;
// the only reason m_cmp is malloc'd here is to prevent gdb from printing
......@@ -410,8 +411,8 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid,
}
int locktree::try_acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) {
int r = m_mem_tracker->check_current_lock_constraints();
const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
int r = m_mgr->check_current_lock_constraints(big_txn);
if (r == 0) {
r = acquire_lock(is_write_request, txnid, left_key, right_key, conflicts);
}
......@@ -420,13 +421,13 @@ int locktree::try_acquire_lock(bool is_write_request, TXNID txnid,
// the locktree silently upgrades read locks to write locks for simplicity
int locktree::acquire_read_lock(TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) {
return acquire_write_lock(txnid, left_key, right_key, conflicts);
const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn);
}
int locktree::acquire_write_lock(TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) {
return try_acquire_lock(true, txnid, left_key, right_key, conflicts);
const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
return try_acquire_lock(true, txnid, left_key, right_key, conflicts, big_txn);
}
void locktree::get_conflicts(bool is_write_request, TXNID txnid,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment