ma_loghandler.c 236 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (C) 2007 MySQL AB & Sanja Belkin

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

16
#include "maria_def.h"
17
#include "trnman.h"
18 19
#include "ma_blockrec.h" /* for some constants and in-write hooks */
#include "ma_key_recover.h" /* For some in-write hooks */
20

21 22 23 24 25
/**
   @file
   @brief Module which writes and reads to a transaction log
*/

26
/* 0xFF can never be valid first byte of a chunk */
unknown's avatar
unknown committed
27 28
#define TRANSLOG_FILLER 0xFF

unknown's avatar
unknown committed
29
/* number of opened log files in the pagecache (should be at least 2) */
30 31
#define OPENED_FILES_NUM 3

unknown's avatar
unknown committed
32
/* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
33 34 35 36 37 38 39 40 41
#define TRANSLOG_WRITE_BUFFER (1024*1024)
/* min chunk length */
#define TRANSLOG_MIN_CHUNK 3
/*
  Number of buffers used by loghandler

  Should be at least 4, because one thread can block up to 2 buffers in
  normal circumstances (less then half of one and full other, or just
  switched one and other), But if we met end of the file in the middle and
unknown's avatar
unknown committed
42 43
  have to switch buffer it will be 3.  + 1 buffer for flushing/writing.
  We have a bigger number here for higher concurrency.
44 45
*/
#define TRANSLOG_BUFFERS_NO 5
unknown's avatar
unknown committed
46
/* number of bytes (+ header) which can be unused on first page in sequence */
47 48
#define TRANSLOG_MINCHUNK_CONTENT 1
/* version of log file */
unknown's avatar
unknown committed
49
#define TRANSLOG_VERSION_ID 10000               /* 1.00.00 */
50

unknown's avatar
unknown committed
51 52 53
#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */

/* QQ:  For temporary debugging */
54 55 56 57 58
#define UNRECOVERABLE_ERROR(E) \
  do { \
    DBUG_PRINT("error", E); \
    printf E; \
    putchar('\n'); \
unknown's avatar
unknown committed
59
    DBUG_ASSERT(0); \
60 61
  } while(0);

62 63 64
/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
unknown's avatar
unknown committed
65

unknown's avatar
unknown committed
66

67 68 69 70 71 72
/* log write buffer descriptor */
struct st_translog_buffer
{
  LSN last_lsn;
  /* This buffer offset in the file */
  TRANSLOG_ADDRESS offset;
73 74 75 76 77
  /*
    Next buffer offset in the file (it is not always offset + size,
    in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
  */
  TRANSLOG_ADDRESS next_buffer_offset;
78
  /*
79
     How much is written (or will be written when copy_to_buffer_in_progress
80 81
     become 0) to this buffer
  */
unknown's avatar
unknown committed
82 83
  translog_size_t size;
  /* File handler for this buffer */
84 85
  File file;
  /* Threads which are waiting for buffer filling/freeing */
unknown's avatar
unknown committed
86
  pthread_cond_t waiting_filling_buffer;
87
  /* Number of records which are in copy progress */
unknown's avatar
unknown committed
88
  uint copy_to_buffer_in_progress;
89 90
  /* list of waiting buffer ready threads */
  struct st_my_thread_var *waiting_flush;
91 92
  /*
    Pointer on the buffer which overlap with this one (due to flush of
unknown's avatar
unknown committed
93 94 95
    loghandler, the last page of that buffer is the same as the first page
    of this buffer) and have to be written first (because contain old
    content of page which present in both buffers)
96
  */
97 98
  struct st_translog_buffer *overlay;
#ifndef DBUG_OFF
unknown's avatar
unknown committed
99
  uint buffer_no;
100
#endif
101 102
  /*
    Lock for the buffer.
unknown's avatar
unknown committed
103

104
    Current buffer also lock the whole handler (if one want lock the handler
unknown's avatar
unknown committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
    one should lock the current buffer).

    Buffers are locked only in one direction (with overflow and beginning
    from the first buffer). If we keep lock on buffer N we can lock only
    buffer N+1 (never N-1).

    One thread do not lock more then 2 buffer in a time, so to make dead
    lock it should be N thread (where N equal number of buffers) takes one
    buffer and try to lock next. But it is impossible because there is only
    2 cases when thread take 2 buffers: 1) one thread finishes current
    buffer (where horizon is) and start next (to which horizon moves).  2)
    flush start from buffer after current (oldest) and go till the current
    crabbing by buffer sequence. And there is  only one flush in a moment
    (they are serialised).

   Because of above and number of buffers equal 5 we can't get dead lock (it is
   impossible to get all 5 buffers locked simultaneously).
122
  */
unknown's avatar
unknown committed
123
  pthread_mutex_t mutex;
124
  /* Cache for current log. */
unknown's avatar
unknown committed
125
  uchar buffer[TRANSLOG_WRITE_BUFFER];
126 127 128 129 130
};


struct st_buffer_cursor
{
131
  /* pointer into the buffer */
unknown's avatar
unknown committed
132
  uchar *ptr;
unknown's avatar
unknown committed
133 134
  /* current buffer */
  struct st_translog_buffer *buffer;
135
  /* How many bytes we wrote on the current page */
unknown's avatar
unknown committed
136
  uint16 current_page_fill;
137 138 139 140
  /*
    How many times we write the page on the disk during flushing process
    (for sector protection).
  */
141 142 143
  uint16 write_counter;
  /* previous write offset */
  uint16 previous_offset;
unknown's avatar
unknown committed
144
  /* Number of current buffer */
145
  uint8 buffer_no;
146 147 148 149 150 151 152 153 154 155
  /*
    True if it is just filling buffer after advancing the pointer to
    the horizon.
  */
  my_bool chaser;
  /*
    Is current page of the cursor already finished (sector protection
    should be applied if it is needed)
  */
  my_bool protected;
156 157 158 159 160 161 162
};


struct st_translog_descriptor
{
  /* *** Parameters of the log handler *** */

unknown's avatar
unknown committed
163 164 165
  /* Page cache for the log reads */
  PAGECACHE *pagecache;
  uint flags;
166 167 168
  /* max size of one log size (for new logs creation) */
  uint32 log_file_max_size;
  uint32 server_version;
169
  /* server ID (used for replication) */
170 171 172
  uint32 server_id;
  /* Loghandler's buffer capacity in case of chunk 2 filling */
  uint32 buffer_capacity_chunk_2;
173 174 175 176 177 178
  /*
    Half of the buffer capacity in case of chunk 2 filling,
    used to decide will we write a record in one group or many.
    It is written to the variable just to avoid devision every
    time we need it.
  */
179
  uint32 half_buffer_capacity_chunk_2;
180
  /* Page overhead calculated by flags (whether CRC is enabled, etc) */
unknown's avatar
unknown committed
181
  uint16 page_overhead;
182 183 184 185
  /*
    Page capacity ("useful load") calculated by flags
    (TRANSLOG_PAGE_SIZE - page_overhead-1)
  */
unknown's avatar
unknown committed
186
  uint16 page_capacity_chunk_2;
187
  /* Path to the directory where we store log store files */
unknown's avatar
unknown committed
188
  char directory[FN_REFLEN];
189 190

  /* *** Current state of the log handler *** */
191
  /* Current and (OPENED_FILES_NUM-1) last logs number in the page cache */
192
  File log_file_num[OPENED_FILES_NUM];
193 194 195 196
  /*
    File descriptor of the directory where we store log files for syncing
    it.
  */
unknown's avatar
unknown committed
197
  File directory_fd;
198 199 200 201 202 203 204 205 206
  /* buffers for log writing */
  struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
  /*
     horizon - visible end of the log (here is absolute end of the log:
     position where next chunk can start
  */
  TRANSLOG_ADDRESS horizon;
  /* horizon buffer cursor */
  struct st_buffer_cursor bc;
207 208
  /* maximum LSN of the current (not finished) file */
  LSN max_lsn;
209

210 211 212 213 214 215
  /*
    Last flushed LSN (protected by log_flush_lock).
    Pointers in the log ordered like this:
    last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
    max_lsn <= horizon
  */
216
  LSN flushed;
217
  /* Last LSN sent to the disk (but maybe not written yet) */
218
  LSN sent_to_disk;
219
  TRANSLOG_ADDRESS previous_flush_horizon;
220
  /* All what is after this address is not sent to disk yet */
221
  TRANSLOG_ADDRESS in_buffers_only;
unknown's avatar
unknown committed
222
  /* protection of sent_to_file and in_buffers_only */
223
  pthread_mutex_t sent_to_disk_lock;
unknown's avatar
unknown committed
224 225 226 227
  /*
    Protect flushed (see above) and for flush serialization (will
    be removed in v1.5
  */
228
  pthread_mutex_t log_flush_lock;
229 230 231 232 233 234 235 236 237 238

  /* Protects changing of headers of finished files (max_lsn) */
  pthread_mutex_t file_header_lock;

  /*
    Sorted array (with protection) of files where we started writing process
    and so we can't give last LSN yet
  */
  pthread_mutex_t unfinished_files_lock;
  DYNAMIC_ARRAY unfinished_files;
239

240 241 242 243 244
  /*
    minimum number of still need file calculeted during last
    translog_purge call
  */
  uint32 min_need_file;
245 246 247 248 249 250
  /* Purger data: minimum file in the log (or 0 if unknown) */
  uint32 min_file_number;
  /* Protect purger from many calls and it's data */
  pthread_mutex_t purger_lock;
  /* last low water mark checked */
  LSN last_lsn_checked;
251 252 253 254
};

static struct st_translog_descriptor log_descriptor;

255 256
ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
ulong log_file_size= TRANSLOG_FILE_SIZE;
257 258
ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;

unknown's avatar
unknown committed
259
/* Marker for end of log */
unknown's avatar
unknown committed
260
static uchar end_of_log= 0;
unknown's avatar
unknown committed
261
#define END_OF_LOG &end_of_log
262

263
my_bool translog_inited= 0;
264

265
/* chunk types */
unknown's avatar
unknown committed
266 267 268 269 270
#define TRANSLOG_CHUNK_LSN   0x00      /* 0 chunk refer as LSN (head or tail */
#define TRANSLOG_CHUNK_FIXED (1 << 6)  /* 1 (pseudo)fixed record (also LSN) */
#define TRANSLOG_CHUNK_NOHDR (2 << 6)  /* 2 no head chunk (till page end) */
#define TRANSLOG_CHUNK_LNGTH (3 << 6)  /* 3 chunk with chunk length */
#define TRANSLOG_CHUNK_TYPE  (3 << 6)  /* Mask to get chunk type */
271 272 273
#define TRANSLOG_REC_TYPE    0x3F               /* Mask to get record type */

/* compressed (relative) LSN constants */
unknown's avatar
unknown committed
274
#define TRANSLOG_CLSN_LEN_BITS 0xC0    /* Mask to get compressed LSN length */
275 276


unknown's avatar
unknown committed
277

278 279 280 281 282 283
#include <my_atomic.h>
/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
static MARIA_SHARE **id_to_share= NULL;
/* lock for id_to_share */
static my_atomic_rwlock_t LOCK_id_to_share;

284 285
static my_bool translog_page_validator(uchar *page_addr, uchar* data_ptr);

286 287
static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);

288 289 290 291
/*
  Initialize log_record_type_descriptors
*/

unknown's avatar
unknown committed
292
LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
293

294 295

#ifndef DBUG_OFF
unknown's avatar
unknown committed
296 297

#define translog_buffer_lock_assert_owner(B) \
298
  safe_mutex_assert_owner(&(B)->mutex);
unknown's avatar
unknown committed
299 300 301 302 303
void translog_lock_assert_owner()
{
  translog_buffer_lock_assert_owner(log_descriptor.bc.buffer);
}

304 305 306 307 308 309 310 311 312 313 314 315 316 317
/**
  @brief check the description table validity

  @param num             how many records should be filled
*/

static void check_translog_description_table(int num)
{
  int i;
  DBUG_ENTER("check_translog_description_table");
  DBUG_PRINT("enter", ("last record: %d", num));
  DBUG_ASSERT(num > 0);
  /* last is reserved for extending the table */
  DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
unknown's avatar
unknown committed
318
  DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
319 320

  for (i= 0; i <= num; i++)
321 322 323 324
  {
    DBUG_PRINT("info",
               ("record type: %d  class: %d  fixed: %u  header: %u  LSNs: %u  "
                "name: %s",
unknown's avatar
unknown committed
325
                i, log_record_type_descriptor[i].rclass,
326 327 328 329
                (uint)log_record_type_descriptor[i].fixed_length,
                (uint)log_record_type_descriptor[i].read_header_len,
                (uint)log_record_type_descriptor[i].compressed_LSN,
                log_record_type_descriptor[i].name));
unknown's avatar
unknown committed
330
    switch (log_record_type_descriptor[i].rclass) {
331
    case LOGRECTYPE_NOT_ALLOWED:
332
      DBUG_ASSERT(i == 0);
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
      break;
    case LOGRECTYPE_VARIABLE_LENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
      DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 1) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE)) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 2) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE * 2)));
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
      break;
    case LOGRECTYPE_FIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
      break;
    default:
      DBUG_ASSERT(0);
    }
  }
  for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
  {
unknown's avatar
unknown committed
361 362
    DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
                LOGRECTYPE_NOT_ALLOWED);
363 364 365
  }
  DBUG_VOID_RETURN;
}
unknown's avatar
unknown committed
366 367 368
#else
#define translog_buffer_lock_assert_owner(B)
#define translog_lock_assert_owner()
369 370
#endif

371
static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
unknown's avatar
unknown committed
372
{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
373
 "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
374 375 376

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
377
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
378
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
379
 "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
380 381

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
382 383
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
384
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
385
 "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
386

387 388
/* Use this entry next time we need to add a new entry */
static LOG_DESC INIT_LOGREC_REDO_NOT_USED=
unknown's avatar
unknown committed
389
{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
390
 "redo_insert_row_blob", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
391

unknown's avatar
unknown committed
392
/** @todo RECOVERY BUG handle it in recovery */
393
/*QQ:TODO:header???*/
394
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
unknown's avatar
unknown committed
395 396
{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
397
 "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
398 399 400 401 402

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
403
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
404
 "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
405 406 407 408 409

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
410
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
411
 "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
412

413
static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
414 415
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
unknown's avatar
unknown committed
416
 NULL, write_hook_for_redo, NULL, 0,
417 418 419 420 421 422 423 424
 "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
425

unknown's avatar
unknown committed
426
/* not yet used; for when we have versioning */
427
static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
unknown's avatar
unknown committed
428
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
429
 "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
430

unknown's avatar
unknown committed
431
/** @todo RECOVERY BUG unused, remove? */
432
static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
unknown's avatar
unknown committed
433
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
434
 "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
435 436

static LOG_DESC INIT_LOGREC_REDO_INDEX=
unknown's avatar
unknown committed
437
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
438
 "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
439

440 441 442 443 444 445 446 447 448 449 450 451
static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

452
static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
unknown's avatar
unknown committed
453
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
454
 "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
455 456

static LOG_DESC INIT_LOGREC_CLR_END=
unknown's avatar
unknown committed
457 458
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
 CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
unknown's avatar
unknown committed
459
 "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
460 461

static LOG_DESC INIT_LOGREC_PURGE_END=
unknown's avatar
unknown committed
462
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
463
 "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
464 465

static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
unknown's avatar
unknown committed
466
{LOGRECTYPE_VARIABLE_LENGTH, 0,
467
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
468
 NULL, write_hook_for_undo_row_insert, NULL, 1,
unknown's avatar
unknown committed
469
 "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
470 471 472 473

static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
474
 NULL, write_hook_for_undo_row_delete, NULL, 1,
unknown's avatar
unknown committed
475
 "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
476 477 478 479

static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
480
 NULL, write_hook_for_undo_row_update, NULL, 1,
unknown's avatar
unknown committed
481
 "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
482 483

static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
484 485
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
486
 NULL, write_hook_for_undo_key_insert, NULL, 1,
unknown's avatar
unknown committed
487
 "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
488

489 490 491 492 493 494 495
/* This will never be in the log, only in the clr */
static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
 "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};

496
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
497 498 499
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
unknown's avatar
unknown committed
500
 "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
501

502 503 504 505 506 507
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
 "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};

508
static LOG_DESC INIT_LOGREC_PREPARE=
unknown's avatar
unknown committed
509
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
510
 "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
511 512

static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
513
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
514
 "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
515 516

static LOG_DESC INIT_LOGREC_COMMIT=
unknown's avatar
unknown committed
517 518 519
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
 NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
 NULL};
520 521

static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
unknown's avatar
unknown committed
522
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
523
 "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
524

525
static LOG_DESC INIT_LOGREC_CHECKPOINT=
unknown's avatar
unknown committed
526
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
527
 "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
528 529

static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
unknown's avatar
unknown committed
530
{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
531
"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
532 533

static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
unknown's avatar
unknown committed
534
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
535
 "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
536 537

static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
538
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
539
 "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
540

541 542
static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
unknown's avatar
unknown committed
543
 NULL, write_hook_for_redo_delete_all, NULL, 0,
unknown's avatar
unknown committed
544
 "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
545 546

static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
547
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4 + 8, FILEID_STORE_SIZE + 4 + 8,
unknown's avatar
unknown committed
548
 NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
549
 "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
550 551

static LOG_DESC INIT_LOGREC_FILE_ID=
unknown's avatar
unknown committed
552
{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
unknown's avatar
unknown committed
553
 "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
554 555

static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
unknown's avatar
unknown committed
556
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
557
 "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
558

unknown's avatar
unknown committed
559 560 561 562 563
static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
 NULL, NULL, NULL, 0,
 "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};

unknown's avatar
unknown committed
564 565 566 567 568
static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
{LOGRECTYPE_FIXEDLENGTH, 0, 0,
 NULL, NULL, NULL, 0,
 "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};

569
const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
570

571
static void loghandler_init()
572
{
573
  int i;
574 575 576 577 578 579
  log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
    INIT_LOGREC_RESERVED_FOR_CHUNKS23;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
    INIT_LOGREC_REDO_INSERT_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
    INIT_LOGREC_REDO_INSERT_ROW_TAIL;
580 581
  log_record_type_descriptor[LOGREC_REDO_NOT_USED]=
    INIT_LOGREC_REDO_NOT_USED;
582 583 584 585 586 587
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
    INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
    INIT_LOGREC_REDO_PURGE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
    INIT_LOGREC_REDO_PURGE_ROW_TAIL;
588 589 590 591
  log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
    INIT_LOGREC_REDO_FREE_BLOCKS;
  log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
    INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
592 593 594 595 596 597
  log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
    INIT_LOGREC_REDO_DELETE_ROW;
  log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
    INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INDEX]=
    INIT_LOGREC_REDO_INDEX;
598 599 600 601
  log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
    INIT_LOGREC_REDO_INDEX_NEW_PAGE;
  log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
    INIT_LOGREC_REDO_INDEX_FREE_PAGE;
602 603 604 605 606 607 608 609 610 611 612 613 614 615
  log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
    INIT_LOGREC_REDO_UNDELETE_ROW;
  log_record_type_descriptor[LOGREC_CLR_END]=
    INIT_LOGREC_CLR_END;
  log_record_type_descriptor[LOGREC_PURGE_END]=
    INIT_LOGREC_PURGE_END;
  log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
    INIT_LOGREC_UNDO_ROW_INSERT;
  log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
    INIT_LOGREC_UNDO_ROW_DELETE;
  log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
    INIT_LOGREC_UNDO_ROW_UPDATE;
  log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
    INIT_LOGREC_UNDO_KEY_INSERT;
616 617
  log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
    INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
618 619
  log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
    INIT_LOGREC_UNDO_KEY_DELETE;
620 621
  log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
    INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
622 623 624 625 626 627 628 629
  log_record_type_descriptor[LOGREC_PREPARE]=
    INIT_LOGREC_PREPARE;
  log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
    INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
  log_record_type_descriptor[LOGREC_COMMIT]=
    INIT_LOGREC_COMMIT;
  log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
    INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
630 631
  log_record_type_descriptor[LOGREC_CHECKPOINT]=
    INIT_LOGREC_CHECKPOINT;
632 633 634 635 636 637
  log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
    INIT_LOGREC_REDO_CREATE_TABLE;
  log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
    INIT_LOGREC_REDO_RENAME_TABLE;
  log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
    INIT_LOGREC_REDO_DROP_TABLE;
638 639 640 641
  log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
    INIT_LOGREC_REDO_DELETE_ALL;
  log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
    INIT_LOGREC_REDO_REPAIR_TABLE;
642 643 644 645
  log_record_type_descriptor[LOGREC_FILE_ID]=
    INIT_LOGREC_FILE_ID;
  log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
    INIT_LOGREC_LONG_TRANSACTION_ID;
unknown's avatar
unknown committed
646 647
  log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
    INIT_LOGREC_INCOMPLETE_LOG;
unknown's avatar
unknown committed
648 649 650
  log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
    INIT_LOGREC_INCOMPLETE_GROUP;
  for (i= LOGREC_INCOMPLETE_GROUP + 1;
651 652
       i < LOGREC_NUMBER_OF_TYPES;
       i++)
unknown's avatar
unknown committed
653
    log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
unknown's avatar
unknown committed
654 655 656
#ifndef DBUG_OFF
  check_translog_description_table(LOGREC_INCOMPLETE_GROUP);
#endif
657 658
};

659

unknown's avatar
unknown committed
660 661
/* all possible flags page overheads */
static uint page_overhead[TRANSLOG_FLAGS_NUM];
662 663 664 665 666 667 668 669 670 671 672

typedef struct st_translog_validator_data
{
  TRANSLOG_ADDRESS *addr;
  my_bool was_recovered;
} TRANSLOG_VALIDATOR_DATA;


const char *maria_data_root;


unknown's avatar
unknown committed
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
/*
  Check cursor/buffer consistence

  SYNOPSIS
    translog_check_cursor
    cursor               cursor which will be checked
*/

static void translog_check_cursor(struct st_buffer_cursor *cursor)
{
  DBUG_ASSERT(cursor->chaser ||
              ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
               cursor->buffer->size));
  DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
  DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
              cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
  DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
}

692
/*
693
  @brief Get file name of the log by log number
694

695 696
  @param file_no         Number of the log we want to open
  @param path            Pointer to buffer where file name will be
unknown's avatar
unknown committed
697
                         stored (must be FN_REFLEN bytes at least)
698 699

  @return pointer to path
700 701
*/

702
char *translog_filename_by_fileno(uint32 file_no, char *path)
703
{
unknown's avatar
unknown committed
704 705
  char buff[11], *end;
  uint length;
706
  DBUG_ENTER("translog_filename_by_fileno");
unknown's avatar
unknown committed
707
  DBUG_ASSERT(file_no <= 0xfffffff);
unknown's avatar
unknown committed
708 709 710 711

  /* log_descriptor.directory is already formated */
  end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS);
  length= (uint) (int10_to_str(file_no, buff, 10) - buff);
712
  strmov(end - length +1, buff);
unknown's avatar
unknown committed
713

unknown's avatar
unknown committed
714
  DBUG_PRINT("info", ("Path: '%s'  path: 0x%lx", path, (ulong) path));
unknown's avatar
unknown committed
715
  DBUG_RETURN(path);
716 717 718
}


719 720
/**
  @brief Create log file with given number without cache
721

722
  @param file_no         Number of the log we want to open
723

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
  retval -1  error
  retval # file descriptor number
*/

static File create_logfile_by_number_no_cache(uint32 file_no)
{
  File file;
  char path[FN_REFLEN];
  DBUG_ENTER("create_logfile_by_number_no_cache");

  /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
  if ((file= my_create(translog_filename_by_fileno(file_no, path),
                       0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0)
  {
    UNRECOVERABLE_ERROR(("Error %d during creating file '%s'", errno, path));
    DBUG_RETURN(-1);
  }
  if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
      my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
  {
    UNRECOVERABLE_ERROR(("Error %d during syncing directory '%s'",
                         errno, log_descriptor.directory));
    DBUG_RETURN(-1);
  }
  DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
  DBUG_RETURN(file);
}

/**
  @brief Open (not create) log file with given number without cache

  @param file_no         Number of the log we want to open

  retval -1  error
  retval # file descriptor number
759 760
*/

761
static File open_logfile_by_number_no_cache(uint32 file_no)
762 763 764 765 766
{
  File file;
  char path[FN_REFLEN];
  DBUG_ENTER("open_logfile_by_number_no_cache");

767
  /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
768
  /* TODO: use my_create() */
unknown's avatar
unknown committed
769
  if ((file= my_open(translog_filename_by_fileno(file_no, path),
770
                     O_BINARY | O_RDWR,
771 772 773
                     MYF(MY_WME))) < 0)
  {
    UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path));
unknown's avatar
unknown committed
774
    DBUG_RETURN(-1);
775
  }
unknown's avatar
unknown committed
776
  DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
777 778 779 780
  DBUG_RETURN(file);
}


781 782 783 784 785 786 787 788
uchar	NEAR maria_trans_file_magic[]=
{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
 'L', 'O', 'G' };
#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
                              8 + 4 + 4 + 4 + 2 + 3 + \
                              LSN_STORE_SIZE)


789 790 791 792 793 794
/*
  Write log file page header in the just opened new log file

  SYNOPSIS
    translog_write_file_header();

unknown's avatar
unknown committed
795 796 797
   NOTES
    First page is just a marker page; We don't store any real log data in it.

798 799 800 801 802
  RETURN
    0 OK
    1 ERROR
*/

803
static my_bool translog_write_file_header()
804 805
{
  ulonglong timestamp;
unknown's avatar
unknown committed
806
  uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
807 808 809
  DBUG_ENTER("translog_write_file_header");

  /* file tag */
unknown's avatar
unknown committed
810 811
  memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
  page+= sizeof(maria_trans_file_magic);
812 813
  /* timestamp */
  timestamp= my_getsystime();
unknown's avatar
unknown committed
814 815
  int8store(page, timestamp);
  page+= 8;
816
  /* maria version */
unknown's avatar
unknown committed
817 818
  int4store(page, TRANSLOG_VERSION_ID);
  page+= 4;
819
  /* mysql version (MYSQL_VERSION_ID) */
unknown's avatar
unknown committed
820 821
  int4store(page, log_descriptor.server_version);
  page+= 4;
822
  /* server ID */
unknown's avatar
unknown committed
823 824 825 826 827
  int4store(page, log_descriptor.server_id);
  page+= 4;
  /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */
  int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE);
  page+= 2;
828
  /* file number */
unknown's avatar
unknown committed
829 830
  int3store(page, LSN_FILE_NO(log_descriptor.horizon));
  page+= 3;
unknown's avatar
unknown committed
831
  lsn_store(page, LSN_IMPOSSIBLE);
832
  page+= LSN_STORE_SIZE;
unknown's avatar
unknown committed
833
  memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
834

unknown's avatar
unknown committed
835
  DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff,
836
                        sizeof(page_buff), 0, log_write_flags) != 0);
837 838
}

839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
/*
  @brief write the new LSN on the given file header

  @param file            The file descriptor
  @param lsn             That LSN which should be written

  @retval 0 OK
  @retval 1 Error
*/

static my_bool translog_max_lsn_to_header(File file, LSN lsn)
{
  uchar lsn_buff[LSN_STORE_SIZE];
  DBUG_ENTER("translog_max_lsn_to_header");
  DBUG_PRINT("enter", ("File descriptor: %ld  "
                       "lsn: (%lu,0x%lx)",
                       (long) file,
unknown's avatar
unknown committed
856
                       LSN_IN_PARTS(lsn)));
857 858 859 860 861

  lsn_store(lsn_buff, lsn);

  DBUG_RETURN(my_pwrite(file, lsn_buff,
                        LSN_STORE_SIZE,
862
                        (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
863 864 865 866
                        log_write_flags) != 0 ||
              my_sync(file, MYF(MY_WME)) != 0);
}

867

868 869 870 871 872 873
/*
  Information from transaction log file header
*/

typedef struct st_loghandler_file_info
{
874
  /*
875 876 877
    LSN_IMPOSSIBLE for current file (not finished file).
    Maximum LSN of the record which parts stored in the
    file.
878 879
  */
  LSN max_lsn;
880 881
  ulonglong timestamp;   /* Time stamp */
  ulong maria_version;   /* Version of maria loghandler */
882
  ulong mysql_version;   /* Version of mysql server */
883 884 885 886 887 888
  ulong server_id;       /* Server ID */
  uint page_size;        /* Loghandler page size */
  uint file_number;      /* Number of the file (from the file header) */
} LOGHANDLER_FILE_INFO;

/*
889
  @brief Read hander file information from loghandler file
890 891

  @param desc header information descriptor to be filled with information
892
  @param file file descriptor to read
893 894 895 896 897

  @retval 0 OK
  @retval 1 Error
*/

898
my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
899
{
900
  uchar page_buff[LOG_HEADER_DATA_SIZE], *ptr;
901 902
  DBUG_ENTER("translog_read_file_header");

903
  if (my_pread(file, page_buff,
904 905 906 907 908 909 910 911 912 913
               sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
  {
    DBUG_PRINT("info", ("log read fail error: %d", my_errno));
    DBUG_RETURN(1);
  }
  ptr= page_buff + sizeof(maria_trans_file_magic);
  desc->timestamp= uint8korr(ptr);
  ptr+= 8;
  desc->maria_version= uint4korr(ptr);
  ptr+= 4;
914
  desc->mysql_version= uint4korr(ptr);
915 916
  ptr+= 4;
  desc->server_id= uint4korr(ptr);
917
  ptr+= 4;
918 919 920
  desc->page_size= uint2korr(ptr);
  ptr+= 2;
  desc->file_number= uint3korr(ptr);
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
  ptr+=3;
  desc->max_lsn= lsn_korr(ptr);
  DBUG_RETURN(0);
}


/*
  @brief set the lsn to the files from_file - to_file if it is greater
  then written in the file

  @param from_file       first file number (min)
  @param to_file         last file number (max)
  @param lsn             the lsn for writing
  @param is_locked       true if current thread locked the log handler

  @retval 0 OK
  @retval 1 Error
*/

940
static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
941 942
                                          LSN lsn, my_bool is_locked)
{
943
  uint32 file;
944 945
  DBUG_ENTER("translog_set_lsn_for_files");
  DBUG_PRINT("enter", ("From: %lu  to: %lu  lsn: (%lu,0x%lx)  locked: %d",
946
                       (ulong) from_file, (ulong) to_file,
unknown's avatar
unknown committed
947
                       LSN_IN_PARTS(lsn),
948 949 950 951 952 953 954
                       is_locked));
  DBUG_ASSERT(from_file <= to_file);
  DBUG_ASSERT(from_file > 0); /* we have not file 0 */

  /* Checks the current file (not finished yet file) */
  if (!is_locked)
    translog_lock();
955
  if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
956 957 958 959 960 961 962 963 964
  {
    if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
      log_descriptor.max_lsn= lsn;
    to_file--;
  }
  if (!is_locked)
    translog_unlock();

  /* Checks finished files if they are */
965
  pthread_mutex_lock(&log_descriptor.file_header_lock);
966 967 968 969 970 971 972 973 974 975
  for (file= from_file; file <= to_file; file++)
  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd) ||
        (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
         translog_max_lsn_to_header(fd, lsn)))
      DBUG_RETURN(1);
  }
976
  pthread_mutex_unlock(&log_descriptor.file_header_lock);
977

978 979 980 981
  DBUG_RETURN(0);
}


982 983 984
/* descriptor of file in unfinished_files */
struct st_file_counter
{
985 986
  uint32 file;            /* file number */
  uint32 counter;         /* counter for started writes */
987 988 989 990 991 992 993 994 995
};


/*
  @brief mark file "in progress" (for multi-group records)

  @param file            log file number
*/

996
static void translog_mark_file_unfinished(uint32 file)
997 998 999 1000 1001 1002
{
  int place, i;
  struct st_file_counter fc, *fc_ptr;
  fc.file= file; fc.counter= 1;

  DBUG_ENTER("translog_mark_file_unfinished");
1003
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1004

1005
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1006 1007 1008 1009 1010 1011 1012 1013

  if (log_descriptor.unfinished_files.elements == 0)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The first element inserted"));
    goto end;
  }

1014
  for (place= log_descriptor.unfinished_files.elements - 1;
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
       place >= 0;
       place--)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            place, struct st_file_counter *);
    if (fc_ptr->file <= file)
      break;
  }

  if (place >= 0 && fc_ptr->file == file)
  {
     fc_ptr->counter++;
     DBUG_PRINT("info", ("counter increased"));
     goto end;
  }

  if (place == (int)log_descriptor.unfinished_files.elements)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The last element inserted"));
    goto end;
  }
  /* shift and assign new element */
  insert_dynamic(&log_descriptor.unfinished_files,
                 (uchar*)
                 dynamic_element(&log_descriptor.unfinished_files,
                                 log_descriptor.unfinished_files.elements- 1,
                                 struct st_file_counter *));
  for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
  {
    /* we do not use set_dynamic() to avoid unneeded checks */
    memcpy(dynamic_element(&log_descriptor.unfinished_files,
                           i, struct st_file_counter *),
           dynamic_element(&log_descriptor.unfinished_files,
                           i + 1, struct st_file_counter *),
           sizeof(struct st_file_counter));
  }
  memcpy(dynamic_element(&log_descriptor.unfinished_files,
                         place + 1, struct st_file_counter *),
         &fc, sizeof(struct st_file_counter));
end:
1056
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
  DBUG_VOID_RETURN;
}



/*
  @brief remove file mark "in progress" (for multi-group records)

  @param file            log file number
*/

1068
static void translog_mark_file_finished(uint32 file)
1069 1070 1071 1072
{
  int i;
  struct st_file_counter *fc_ptr;
  DBUG_ENTER("translog_mark_file_finished");
1073
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1074

unknown's avatar
unknown committed
1075 1076
  LINT_INIT(fc_ptr);

1077
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094

  DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
  for (i= 0;
       i < (int) log_descriptor.unfinished_files.elements;
       i++)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            i, struct st_file_counter *);
    if (fc_ptr->file == file)
    {
      break;
    }
  }
  DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);

  if (! --fc_ptr->counter)
    delete_dynamic_element(&log_descriptor.unfinished_files, i);
1095
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
  DBUG_VOID_RETURN;
}


/*
  @brief get max LSN of the record which parts stored in this file

  @param file            file number

  @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
    @retval LSN_IMPOSSIBLE File is still not finished
    @retval LSN_ERROR Error opening file
    @retval # LSN of the record which parts stored in this file
*/

1111
LSN translog_get_file_max_lsn_stored(uint32 file)
1112
{
1113
  uint32 limit= FILENO_IMPOSSIBLE;
1114
  DBUG_ENTER("translog_get_file_max_lsn_stored");
1115
  DBUG_PRINT("enter", ("file: %lu", (ulong)file));
unknown's avatar
unknown committed
1116
  DBUG_ASSERT(translog_inited == 1);
1117

1118
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1119 1120 1121 1122 1123 1124 1125 1126 1127

  /* find file with minimum file number "in progress" */
  if (log_descriptor.unfinished_files.elements > 0)
  {
    struct st_file_counter *fc_ptr;
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            0, struct st_file_counter *);
    limit= fc_ptr->file; /* minimal file number "in progress" */
  }
1128
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155

  /*
    if there is no "in progress file" then unfinished file is in progress
    for sure
  */
  if (limit == FILENO_IMPOSSIBLE)
  {
    TRANSLOG_ADDRESS horizon= translog_get_horizon();
    limit= LSN_FILE_NO(horizon);
  }

  if (file >= limit)
  {
    DBUG_PRINT("info", ("The file in in progress"));
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }

  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd))
    {
      DBUG_PRINT("error", ("Can't read file header"));
      DBUG_RETURN(LSN_ERROR);
    }
    DBUG_PRINT("error", ("Max lsn: (%lu,0x%lx)",
unknown's avatar
unknown committed
1156
                         LSN_IN_PARTS(info.max_lsn)));
1157 1158 1159 1160
    DBUG_RETURN(info.max_lsn);
  }
}

1161 1162 1163 1164 1165 1166 1167 1168
/*
  Initialize transaction log file buffer

  SYNOPSIS
    translog_buffer_init()
    buffer               The buffer to initialize

  RETURN
unknown's avatar
unknown committed
1169 1170
    0  OK
    1  Error
1171 1172
*/

1173
static my_bool translog_buffer_init(struct st_translog_buffer *buffer)
1174 1175
{
  DBUG_ENTER("translog_buffer_init");
unknown's avatar
unknown committed
1176
  buffer->last_lsn= LSN_IMPOSSIBLE;
1177
  /* This Buffer File */
unknown's avatar
unknown committed
1178
  buffer->file= -1;
1179
  buffer->overlay= 0;
1180
  /* cache for current log */
unknown's avatar
unknown committed
1181
  memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1182 1183 1184
  /* Buffer size */
  buffer->size= 0;
  /* cond of thread which is waiting for buffer filling */
unknown's avatar
unknown committed
1185 1186
  if (pthread_cond_init(&buffer->waiting_filling_buffer, 0))
    DBUG_RETURN(1);
1187
  /* Number of records which are in copy progress */
1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
  buffer->copy_to_buffer_in_progress= 0;
  /* list of waiting buffer ready threads */
  buffer->waiting_flush= 0;
  /* lock for the buffer. Current buffer also lock the handler */
  if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


/*
  Close transaction log file by descriptor

  SYNOPSIS
    translog_close_log_file()
    file                 file descriptor

  RETURN
unknown's avatar
unknown committed
1206 1207
    0  OK
    1  Error
1208 1209 1210 1211
*/

static my_bool translog_close_log_file(File file)
{
unknown's avatar
unknown committed
1212 1213 1214
  int rc;
  PAGECACHE_FILE fl;
  fl.file= file;
1215
  flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE);
unknown's avatar
unknown committed
1216 1217 1218 1219 1220 1221 1222
  /*
    Sync file when we close it
    TODO: sync only we have changed the log
  */
  rc= my_sync(file, MYF(MY_WME));
  rc|= my_close(file, MYF(MY_WME));
  return test(rc);
1223 1224 1225
}


1226 1227
/**
  @brief Create and fill header of new file.
1228

1229 1230 1231
  @note the caller must call it right after it has increased
   log_descriptor.horizon to the new file
   (log_descriptor.horizon+= LSN_ONE_FILE)
1232

1233 1234 1235

  @retval 0 OK
  @retval 1 Error
1236 1237
*/

1238
static my_bool translog_create_new_file()
1239 1240
{
  int i;
1241
  uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1242 1243
  DBUG_ENTER("translog_create_new_file");

1244
  /*
1245 1246 1247 1248
    Writes max_lsn to the file header before finishing it (there is no need
    to lock file header buffer because it is still unfinished file, so only
    one thread can finish the file and nobody interested of LSN of current
    (unfinished) file, because no one can purge it).
1249 1250 1251 1252 1253
  */
  translog_max_lsn_to_header(log_descriptor.log_file_num[0],
                             log_descriptor.max_lsn);
  log_descriptor.max_lsn= LSN_IMPOSSIBLE;

unknown's avatar
unknown committed
1254
  if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 &&
1255 1256 1257 1258 1259 1260 1261
      translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM -
                                                          1]))
    DBUG_RETURN(1);
  for (i= OPENED_FILES_NUM - 1; i > 0; i--)
    log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1];

  if ((log_descriptor.log_file_num[0]=
1262
       create_logfile_by_number_no_cache(file_no)) == -1 ||
1263 1264 1265
      translog_write_file_header())
    DBUG_RETURN(1);

unknown's avatar
unknown committed
1266
  if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no,
1267 1268 1269 1270 1271 1272 1273
                                      CONTROL_FILE_UPDATE_ONLY_LOGNO))
    DBUG_RETURN(1);

  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
1274 1275
/**
  @brief Locks the loghandler buffer.
1276

unknown's avatar
unknown committed
1277
  @param buffer          This buffer which should be locked
1278

unknown's avatar
unknown committed
1279 1280 1281 1282
  @note See comment before buffer 'mutex' variable.

  @retval 0 OK
  @retval 1 Error
1283 1284 1285 1286
*/

static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
{
1287
  my_bool res;
1288
  DBUG_ENTER("translog_buffer_lock");
unknown's avatar
unknown committed
1289
  DBUG_PRINT("enter",
1290 1291
             ("Lock buffer #%u: (0x%lx)", (uint) buffer->buffer_no,
              (ulong) buffer));
1292
  res= (pthread_mutex_lock(&buffer->mutex) != 0);
1293
  DBUG_RETURN(res);
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
}


/*
  Unlock the loghandler buffer

  SYNOPSIS
    translog_buffer_unlock()
    buffer               This buffer which should be unlocked

  RETURN
unknown's avatar
unknown committed
1305 1306
    0  OK
    1  Error
1307 1308 1309 1310
*/

static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
{
1311
  my_bool res;
1312
  DBUG_ENTER("translog_buffer_unlock");
1313 1314
  DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)",
                       (uint) buffer->buffer_no, (ulong) buffer));
1315

1316
  res= (pthread_mutex_unlock(&buffer->mutex) != 0);
1317 1318 1319 1320 1321
  DBUG_RETURN(res);
}


/*
unknown's avatar
unknown committed
1322
  Write a header on the page
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335

  SYNOPSIS
    translog_new_page_header()
    horizon              Where to write the page
    cursor               Where to write the page

  NOTE
    - space for page header should be checked before
*/

static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
                                     struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1336
  uchar *ptr;
1337 1338

  DBUG_ENTER("translog_new_page_header");
unknown's avatar
unknown committed
1339
  DBUG_ASSERT(cursor->ptr);
1340 1341 1342 1343 1344

  cursor->protected= 0;

  ptr= cursor->ptr;
  /* Page number */
1345 1346
  int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
  ptr+= 3;
1347
  /* File number */
1348 1349
  int3store(ptr, LSN_FILE_NO(*horizon));
  ptr+= 3;
unknown's avatar
unknown committed
1350 1351 1352
  DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
  cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
  ptr++;
1353 1354 1355 1356
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
  {
#ifndef DBUG_OFF
    DBUG_PRINT("info", ("write  0x11223344 CRC to (%lu,0x%lx)",
unknown's avatar
unknown committed
1357
                        LSN_IN_PARTS(*horizon)));
unknown's avatar
unknown committed
1358
    /* This will be overwritten by real CRC; This is just for debugging */
1359 1360
    int4store(ptr, 0x11223344);
#endif
unknown's avatar
unknown committed
1361 1362
    /* CRC will be put when page is finished */
    ptr+= CRC_LENGTH;
1363 1364 1365
  }
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
  {
1366 1367 1368 1369 1370 1371
    /*
      The time() works like "random" values producer because it is enough to
      have such "random" for this purpose and it will not interfere with
      higher level pseudo random value generator
    */
    uint16 tmp_time= time(NULL);
unknown's avatar
unknown committed
1372 1373
    ptr[0]= tmp_time & 0xFF;
    ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1374 1375
  }
  {
unknown's avatar
unknown committed
1376
    uint len= (ptr - cursor->ptr);
1377
    (*horizon)+= len; /* increasing the offset part of the address */
unknown's avatar
unknown committed
1378
    cursor->current_page_fill= len;
1379 1380 1381 1382
    if (!cursor->chaser)
      cursor->buffer->size+= len;
  }
  cursor->ptr= ptr;
unknown's avatar
unknown committed
1383
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
1384 1385
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1386
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1387
  translog_check_cursor(cursor);
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
  DBUG_VOID_RETURN;
}


/*
  Put sector protection on the page image

  SYNOPSIS
    translog_put_sector_protection()
    page                 reference on the page content
    cursor               cursor of the buffer
unknown's avatar
unknown committed
1399 1400 1401 1402

  NOTES
    We put a sector protection on all following sectors on the page,
    except the first sector that is protected by page header.
1403 1404
*/

unknown's avatar
unknown committed
1405
static void translog_put_sector_protection(uchar *page,
1406 1407
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1408
  uchar *table= page + log_descriptor.page_overhead -
unknown's avatar
unknown committed
1409 1410
    TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
  uint i, offset;
unknown's avatar
unknown committed
1411 1412 1413
  uint16 last_protected_sector= ((cursor->previous_offset - 1) /
                                 DISK_DRIVE_SECTOR_SIZE);
  uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
1414
  uint8 value= table[0] + cursor->write_counter;
1415
  DBUG_ENTER("translog_put_sector_protection");
unknown's avatar
unknown committed
1416

1417
  if (start_sector == 0)
unknown's avatar
unknown committed
1418 1419 1420 1421
  {
    /* First sector is protected by file & page numbers in the page header. */
    start_sector= 1;
  }
1422

unknown's avatar
unknown committed
1423 1424
  DBUG_PRINT("enter", ("Write counter:%u  value:%u  offset:%u, "
                       "last protected:%u  start sector:%u",
1425 1426 1427 1428 1429 1430
                       (uint) cursor->write_counter,
                       (uint) value,
                       (uint) cursor->previous_offset,
                       (uint) last_protected_sector, (uint) start_sector));
  if (last_protected_sector == start_sector)
  {
unknown's avatar
unknown committed
1431
    i= last_protected_sector;
unknown's avatar
unknown committed
1432
    offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1433 1434 1435 1436
    /* restore data, because we modified sector which was protected */
    if (offset < cursor->previous_offset)
      page[offset]= table[i];
  }
unknown's avatar
unknown committed
1437 1438 1439
  for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
       i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
       i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1440
  {
unknown's avatar
unknown committed
1441 1442
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
                        i, offset, (uint) page[offset]));
1443
    table[i]= page[offset];
unknown's avatar
unknown committed
1444 1445 1446
    page[offset]= value;
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
                        i, offset, (uint) page[offset]));
1447 1448 1449 1450 1451 1452
  }
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1453
  Calculate CRC32 of given area
1454 1455

  SYNOPSIS
unknown's avatar
unknown committed
1456
    translog_crc()
1457 1458 1459 1460
    area                 Pointer of the area beginning
    length               The Area length

  RETURN
unknown's avatar
unknown committed
1461
    CRC32
1462 1463
*/

unknown's avatar
unknown committed
1464
static uint32 translog_crc(uchar *area, uint length)
1465
{
1466 1467
  DBUG_ENTER("translog_crc");
  DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482
}


/*
  Finish current page with zeros

  SYNOPSIS
    translog_finish_page()
    horizon              \ horizon & buffer pointers
    cursor               /
*/

static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
                                 struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1483
  uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
unknown's avatar
unknown committed
1484
  uchar *page= cursor->ptr - cursor->current_page_fill;
1485
  DBUG_ENTER("translog_finish_page");
unknown's avatar
unknown committed
1486 1487 1488 1489
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size:%lu (%lu)  Pg:%u  left:%u",
1490
                       (uint) cursor->buffer_no, (ulong) cursor->buffer,
unknown's avatar
unknown committed
1491
                       LSN_IN_PARTS(cursor->buffer->offset),
1492 1493
                       (ulong) LSN_FILE_NO(*horizon),
                       (ulong) (LSN_OFFSET(*horizon) -
unknown's avatar
unknown committed
1494
                                cursor->current_page_fill),
1495 1496
                       (ulong) cursor->buffer->size,
                       (ulong) (cursor->ptr -cursor->buffer->buffer),
unknown's avatar
unknown committed
1497
                       (uint) cursor->current_page_fill, (uint) left));
1498
  DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
unknown's avatar
unknown committed
1499
  translog_check_cursor(cursor);
1500 1501 1502 1503 1504
  if (cursor->protected)
  {
    DBUG_PRINT("info", ("Already protected and finished"));
    DBUG_VOID_RETURN;
  }
unknown's avatar
unknown committed
1505 1506 1507 1508
  cursor->protected= 1;

  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
1509
  {
unknown's avatar
unknown committed
1510
    DBUG_PRINT("info", ("left: %u", (uint) left));
unknown's avatar
unknown committed
1511
    memset(cursor->ptr, TRANSLOG_FILLER, left);
1512
    cursor->ptr+= left;
unknown's avatar
unknown committed
1513
    (*horizon)+= left; /* offset increasing */
1514 1515
    if (!cursor->chaser)
      cursor->buffer->size+= left;
1516
    /* We are finishing the page so reset the counter */
unknown's avatar
unknown committed
1517 1518 1519
    cursor->current_page_fill= 0;
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
                        "chaser: %d  Size: %lu (%lu)",
1520 1521 1522
                        (uint) cursor->buffer->buffer_no,
                        (ulong) cursor->buffer, cursor->chaser,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1523
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1524
    translog_check_cursor(cursor);
1525
  }
unknown's avatar
unknown committed
1526 1527
  /*
    When we are finishing the page other thread might not finish the page
1528 1529
    header yet (in case if we started from the middle of the page) so we
    have to read log_descriptor.flags but not the flags from the page.
unknown's avatar
unknown committed
1530 1531
  */
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1532 1533 1534 1535 1536 1537
  {
    translog_put_sector_protection(page, cursor);
    DBUG_PRINT("info", ("drop write_counter"));
    cursor->write_counter= 0;
    cursor->previous_offset= 0;
  }
unknown's avatar
unknown committed
1538
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1539
  {
unknown's avatar
unknown committed
1540 1541 1542 1543 1544
    uint32 crc= translog_crc(page + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
    DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
    /* We have page number, file number and flag before crc */
1545 1546 1547 1548 1549 1550 1551
    int4store(page + 3 + 3 + 1, crc);
  }
  DBUG_VOID_RETURN;
}


/*
1552
  @brief Wait until all threads have finished filling this buffer.
1553

1554
  @param buffer          This buffer should be check
1555
*/
1556

1557 1558 1559
static void translog_wait_for_writers(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_wait_for_writers");
unknown's avatar
unknown committed
1560
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  copies in progress: %u",
1561 1562
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress));
1563
  translog_buffer_lock_assert_owner(buffer);
1564

unknown's avatar
unknown committed
1565
  while (buffer->copy_to_buffer_in_progress)
1566
  {
1567 1568
    DBUG_PRINT("info", ("wait for writers... buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1569
    DBUG_ASSERT(buffer->file != -1);
unknown's avatar
unknown committed
1570
    pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1571 1572
    DBUG_PRINT("info", ("wait for writers done buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1573
  }
1574 1575 1576 1577 1578 1579 1580

  DBUG_VOID_RETURN;
}


/*

unknown's avatar
unknown committed
1581
  Wait for buffer to become free
1582 1583 1584

  SYNOPSIS
    translog_wait_for_buffer_free()
unknown's avatar
unknown committed
1585
    buffer               The buffer we are waiting for
1586 1587 1588 1589 1590 1591 1592 1593

  NOTE
    - this buffer should be locked
*/

static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_wait_for_buffer_free");
unknown's avatar
unknown committed
1594 1595
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  copies in progress: %u  "
                       "File: %d  size: 0x%lu",
1596 1597
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress,
unknown's avatar
unknown committed
1598
                       buffer->file, (ulong) buffer->size));
1599 1600 1601

  translog_wait_for_writers(buffer);

unknown's avatar
unknown committed
1602
  while (buffer->file != -1)
1603
  {
1604 1605
    DBUG_PRINT("info", ("wait for writers... buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1606
    pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1607 1608
    DBUG_PRINT("info", ("wait for writers done. buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1609 1610
  }
  DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
1611 1612 1613 1614 1615
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1616
  Initialize the cursor for a buffer
1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632

  SYNOPSIS
    translog_cursor_init()
    buffer               The buffer
    cursor               It's cursor
    buffer_no            Number of buffer
*/

static void translog_cursor_init(struct st_buffer_cursor *cursor,
                                 struct st_translog_buffer *buffer,
                                 uint8 buffer_no)
{
  DBUG_ENTER("translog_cursor_init");
  cursor->ptr= buffer->buffer;
  cursor->buffer= buffer;
  cursor->buffer_no= buffer_no;
unknown's avatar
unknown committed
1633
  cursor->current_page_fill= 0;
1634 1635 1636 1637 1638 1639 1640 1641 1642
  cursor->chaser= (cursor != &log_descriptor.bc);
  cursor->write_counter= 0;
  cursor->previous_offset= 0;
  cursor->protected= 0;
  DBUG_VOID_RETURN;
}


/*
1643
  @brief Initialize buffer for the current file, and a cursor for this buffer.
1644

1645 1646 1647
  @param buffer          The buffer
  @param cursor          It's cursor
  @param buffer_no       Number of buffer
1648
*/
1649

1650 1651
static void translog_start_buffer(struct st_translog_buffer *buffer,
                                  struct st_buffer_cursor *cursor,
unknown's avatar
unknown committed
1652
                                  uint buffer_no)
1653 1654 1655
{
  DBUG_ENTER("translog_start_buffer");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
1656
             ("Assign buffer: #%u (0x%lx)  to file: %d  offset: 0x%lx(%lu)",
1657
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
1658
              log_descriptor.log_file_num[0],
1659 1660
              (ulong) LSN_OFFSET(log_descriptor.horizon),
              (ulong) LSN_OFFSET(log_descriptor.horizon)));
1661
  DBUG_ASSERT(buffer_no == buffer->buffer_no);
unknown's avatar
unknown committed
1662
  buffer->last_lsn= LSN_IMPOSSIBLE;
1663
  buffer->offset= log_descriptor.horizon;
1664
  buffer->next_buffer_offset= LSN_IMPOSSIBLE;
1665 1666 1667 1668
  buffer->file= log_descriptor.log_file_num[0];
  buffer->overlay= 0;
  buffer->size= 0;
  translog_cursor_init(cursor, buffer, buffer_no);
unknown's avatar
unknown committed
1669
  DBUG_PRINT("info", ("init cursor #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
1670 1671
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1672
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1673
  translog_check_cursor(cursor);
1674 1675 1676 1677 1678
  DBUG_VOID_RETURN;
}


/*
1679
  @brief Switch to the next buffer in a chain.
1680

1681 1682 1683
  @param horizon         \ Pointers on current position in file and buffer
  @param cursor          /
  @param new_file        Also start new file
1684

1685
  @note
1686 1687 1688
   - loghandler should be locked
   - after return new and old buffer still are locked

1689 1690
  @retval 0 OK
  @retval 1 Error
1691 1692 1693 1694 1695 1696
*/

static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
                                    struct st_buffer_cursor *cursor,
                                    my_bool new_file)
{
unknown's avatar
unknown committed
1697 1698
  uint old_buffer_no= cursor->buffer_no;
  uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
1699 1700 1701 1702
  struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
  my_bool chasing= cursor->chaser;
  DBUG_ENTER("translog_buffer_next");

unknown's avatar
unknown committed
1703
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  chasing: %d",
unknown's avatar
unknown committed
1704
                      LSN_IN_PARTS(log_descriptor.horizon), chasing));
1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716

  DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);

  translog_finish_page(horizon, cursor);

  if (!chasing)
  {
    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);
  }
  else
    DBUG_ASSERT(new_buffer->file != 0);
1717

1718 1719
  if (new_file)
  {
1720

1721
    /* move the horizon to the next file and its header page */
unknown's avatar
unknown committed
1722 1723
    (*horizon)+= LSN_ONE_FILE;
    (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
    if (!chasing && translog_create_new_file())
    {
      DBUG_RETURN(1);
    }
  }

  /* prepare next page */
  if (chasing)
    translog_cursor_init(cursor, new_buffer, new_buffer_no);
  else
1734 1735
  {
    translog_lock_assert_owner();
1736
    translog_start_buffer(new_buffer, cursor, new_buffer_no);
1737
  }
1738
  log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
1739 1740 1741 1742 1743 1744
  translog_new_page_header(horizon, cursor);
  DBUG_RETURN(0);
}


/*
1745
  Sets max LSN sent to file, and address from which data is only in the buffer
1746 1747

  SYNOPSIS
1748
    translog_set_sent_to_disk()
1749
    lsn                  LSN to assign
1750 1751 1752
    in_buffers           to assign to in_buffers_only

  TODO: use atomic operations if possible (64bit architectures?)
1753 1754
*/

1755
static void translog_set_sent_to_disk(LSN lsn, TRANSLOG_ADDRESS in_buffers)
1756
{
1757
  DBUG_ENTER("translog_set_sent_to_disk");
1758
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1759 1760
  DBUG_PRINT("enter", ("lsn: (%lu,0x%lx) in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1761 1762 1763
                       LSN_IN_PARTS(lsn),
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1764 1765
  DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0);
  log_descriptor.sent_to_disk= lsn;
1766 1767 1768 1769 1770 1771
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
1772
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1773 1774 1775 1776 1777
  DBUG_VOID_RETURN;
}


/*
1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
  Sets address from which data is only in the buffer

  SYNOPSIS
    translog_set_only_in_buffers()
    lsn                  LSN to assign
    in_buffers           to assign to in_buffers_only
*/

static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
{
  DBUG_ENTER("translog_set_only_in_buffers");
1789
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1790 1791
  DBUG_PRINT("enter", ("in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1792 1793
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1794 1795 1796 1797 1798 1799
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
1800
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818
  DBUG_VOID_RETURN;
}


/*
  Gets address from which data is only in the buffer

  SYNOPSIS
    translog_only_in_buffers()

  RETURN
    address from which data is only in the buffer
*/

static TRANSLOG_ADDRESS translog_only_in_buffers()
{
  register TRANSLOG_ADDRESS addr;
  DBUG_ENTER("translog_only_in_buffers");
1819
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1820
  addr= log_descriptor.in_buffers_only;
1821
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1822 1823 1824 1825 1826 1827
  DBUG_RETURN(addr);
}


/*
  Get max LSN sent to file
1828 1829

  SYNOPSIS
1830
    translog_get_sent_to_disk()
1831 1832 1833

  RETURN
    max LSN send to file
1834 1835
*/

1836
static LSN translog_get_sent_to_disk()
1837
{
1838
  register LSN lsn;
1839
  DBUG_ENTER("translog_get_sent_to_disk");
1840
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1841
  lsn= log_descriptor.sent_to_disk;
1842
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1843
  DBUG_RETURN(lsn);
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857
}


/*
  Get first chunk address on the given page

  SYNOPSIS
    translog_get_first_chunk_offset()
    page                 The page where to find first chunk

  RETURN
    first chunk offset
*/

unknown's avatar
unknown committed
1858
static my_bool translog_get_first_chunk_offset(uchar *page)
1859 1860
{
  DBUG_ENTER("translog_get_first_chunk_offset");
1861 1862
  DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
  DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876
}


/*
  Write coded length of record

  SYNOPSIS
    translog_write_variable_record_1group_code_len
    dst                  Destination buffer pointer
    length               Length which should be coded
    header_len           Calculated total header length
*/

static void
unknown's avatar
unknown committed
1877
translog_write_variable_record_1group_code_len(uchar *dst,
1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891
                                               translog_size_t length,
                                               uint16 header_len)
{
  switch (header_len) {
  case 6:                                      /* (5 + 1) */
    DBUG_ASSERT(length <= 250);
    *dst= (uint8) length;
    return;
  case 8:                                      /* (5 + 3) */
    DBUG_ASSERT(length <= 0xFFFF);
    *dst= 251;
    int2store(dst + 1, length);
    return;
  case 9:                                      /* (5 + 4) */
unknown's avatar
unknown committed
1892
    DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
    *dst= 252;
    int3store(dst + 1, length);
    return;
  case 10:                                     /* (5 + 5) */
    *dst= 253;
    int4store(dst + 1, length);
    return;
  default:
    DBUG_ASSERT(0);
  }
  return;
}


/*
  Decode record data length and advance given pointer to the next field

  SYNOPSIS
    translog_variable_record_1group_decode_len()
    src                  The pointer to the pointer to the length beginning

  RETURN
    decoded length
*/

unknown's avatar
unknown committed
1918
static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
1919 1920 1921 1922
{
  uint8 first= (uint8) (**src);
  switch (first) {
  case 251:
unknown's avatar
unknown committed
1923
    (*src)+= 3;
1924 1925
    return (uint2korr((*src) - 2));
  case 252:
unknown's avatar
unknown committed
1926
    (*src)+= 4;
1927 1928
    return (uint3korr((*src) - 3));
  case 253:
unknown's avatar
unknown committed
1929
    (*src)+= 5;
1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
    return (uint4korr((*src) - 4));
  case 254:
  case 255:
    DBUG_ASSERT(0);                             /* reserved for future use */
    return (0);
  default:
    (*src)++;
    return (first);
  }
}


/*
  Get total length of this chunk (not only body)

  SYNOPSIS
    translog_get_total_chunk_length()
    page                 The page where chunk placed
    offset               Offset of the chunk on this place

  RETURN
    total length of the chunk
*/

unknown's avatar
unknown committed
1954
static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
1955 1956 1957
{
  DBUG_ENTER("translog_get_total_chunk_length");
  switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
unknown's avatar
unknown committed
1958
  case TRANSLOG_CHUNK_LSN:
1959
  {
unknown's avatar
unknown committed
1960
    /* 0 chunk referred as LSN (head or tail) */
1961
    translog_size_t rec_len;
unknown's avatar
unknown committed
1962
    uchar *start= page + offset;
1963
    uchar *ptr= start + 1 + 2; /* chunk type and short trid */
1964 1965 1966 1967
    uint16 chunk_len, header_len, page_rest;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
1968 1969
    header_len= (ptr -start) + 2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
      DBUG_PRINT("info", ("chunk len: %u + %u = %u",
                          (uint) header_len, (uint) chunk_len,
                          (uint) (chunk_len + header_len)));
      DBUG_RETURN(chunk_len + header_len);
    }
    page_rest= TRANSLOG_PAGE_SIZE - offset;
    DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
    if (rec_len + header_len < page_rest)
      DBUG_RETURN(rec_len + header_len);
    DBUG_RETURN(page_rest);
  }
unknown's avatar
unknown committed
1984
  case TRANSLOG_CHUNK_FIXED:
1985
  {
unknown's avatar
unknown committed
1986
    uchar *ptr;
1987
    uint type= page[offset] & TRANSLOG_REC_TYPE;
unknown's avatar
unknown committed
1988 1989 1990 1991
    uint length;
    int i;
    /* 1 (pseudo)fixed record (also LSN) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
unknown's avatar
unknown committed
1992
    DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
1993
                LOGRECTYPE_FIXEDLENGTH ||
unknown's avatar
unknown committed
1994
                log_record_type_descriptor[type].rclass ==
1995
                LOGRECTYPE_PSEUDOFIXEDLENGTH);
unknown's avatar
unknown committed
1996
    if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
1997 1998 1999 2000 2001 2002
    {
      DBUG_PRINT("info",
                 ("Fixed length: %u",
                  (uint) (log_record_type_descriptor[type].fixed_length + 3)));
      DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
    }
unknown's avatar
unknown committed
2003 2004 2005 2006

    ptr= page + offset + 3;            /* first compressed LSN */
    length= log_record_type_descriptor[type].fixed_length + 3;
    for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2007
    {
unknown's avatar
unknown committed
2008
      /* first 2 bits is length - 2 */
2009
      uint len= (((uint8) (*ptr)) >> 6) + 2;
2010 2011
      if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
        len+= LSN_STORE_SIZE; /* case of full LSN storing */
unknown's avatar
unknown committed
2012
      ptr+= len;
2013
      /* subtract saved bytes */
2014
      length-= (LSN_STORE_SIZE - len);
2015
    }
unknown's avatar
unknown committed
2016 2017
    DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
    DBUG_RETURN(length);
2018
  }
unknown's avatar
unknown committed
2019 2020 2021
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR  length: %u",
2022 2023 2024 2025 2026
                        (uint) (TRANSLOG_PAGE_SIZE - offset)));
    DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
  case TRANSLOG_CHUNK_LNGTH:                   /* 3 chunk with chunk length */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
    DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
unknown's avatar
unknown committed
2027
    DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2028 2029 2030
    DBUG_RETURN(uint2korr(page + offset + 1) + 3);
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
2031
    DBUG_RETURN(0);
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
  }
}


/*
  Flush given buffer

  SYNOPSIS
    translog_buffer_flush()
    buffer               This buffer should be flushed

  RETURN
unknown's avatar
unknown committed
2044 2045
    0  OK
    1  Error
2046 2047 2048 2049
*/

static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
{
2050
  uint32 i, pg;
2051
  PAGECACHE_FILE file;
2052 2053
  DBUG_ENTER("translog_buffer_flush");
  DBUG_PRINT("enter",
2054
             ("Buffer: #%u 0x%lx file: %d  offset: (%lu,0x%lx)  size: %lu",
2055
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
2056
              buffer->file,
unknown's avatar
unknown committed
2057
              LSN_IN_PARTS(buffer->offset),
2058
              (ulong) buffer->size));
2059
  translog_buffer_lock_assert_owner(buffer);
2060

unknown's avatar
unknown committed
2061
  DBUG_ASSERT(buffer->file != -1);
2062 2063

  translog_wait_for_writers(buffer);
2064 2065 2066 2067

  if (buffer->overlay && buffer->overlay->file == buffer->file &&
      cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
                        buffer->offset) > 0)
2068
  {
2069 2070 2071 2072
    /*
      This can't happen for normal translog_flush,
      only during destroying the loghandler
    */
2073
    struct st_translog_buffer *overlay= buffer->overlay;
2074 2075
    TRANSLOG_ADDRESS buffer_offset= buffer->offset;
    File file= buffer->file;
2076 2077
    translog_buffer_unlock(buffer);
    translog_buffer_lock(overlay);
2078 2079 2080 2081 2082 2083 2084
    /* rechecks under mutex protection that overlay is still our overlay */
    if (buffer->overlay->file == file &&
        cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
                          buffer_offset) > 0)
    {
      translog_wait_for_buffer_free(overlay);
    }
2085 2086
    translog_buffer_unlock(overlay);
    translog_buffer_lock(buffer);
2087 2088 2089 2090 2091 2092 2093 2094 2095 2096
    if (buffer->file != -1 && buffer_offset == buffer->offset)
    {
      /*
        This means that somebody else flushed the buffer while we was
        waiting for overlay then for locking buffer again.
        It is possible for single request for flush and destroying the
        loghandler.
      */
      DBUG_RETURN(0);
    }
2097 2098
  }

unknown's avatar
unknown committed
2099 2100 2101 2102
  /*
    Send page by page in the pagecache what we are going to write on the
    disk
  */
2103
  file.file= buffer->file;
unknown's avatar
unknown committed
2104
  for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2105 2106
       i < buffer->size;
       i+= TRANSLOG_PAGE_SIZE, pg++)
2107
  {
2108 2109 2110
    TRANSLOG_ADDRESS addr= (buffer->offset + i);
    TRANSLOG_VALIDATOR_DATA data;
    data.addr= &addr;
unknown's avatar
unknown committed
2111
    DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2112
    DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2113
    if (pagecache_inject(log_descriptor.pagecache,
2114
                        &file, pg, 3,
2115 2116 2117
                        buffer->buffer + i,
                        PAGECACHE_PLAIN_PAGE,
                        PAGECACHE_LOCK_LEFT_UNLOCKED,
2118
                        PAGECACHE_PIN_LEFT_UNPINNED, 0,
2119
                        LSN_IMPOSSIBLE,
2120
                        &translog_page_validator, (uchar*) &data))
2121
    {
unknown's avatar
unknown committed
2122
      UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache",
2123
                           (ulong) buffer->file,
2124
                           (ulong) (LSN_OFFSET(buffer->offset)+ i)));
2125 2126 2127
    }
  }
  if (my_pwrite(buffer->file, (char*) buffer->buffer,
2128
                buffer->size, LSN_OFFSET(buffer->offset),
2129
                log_write_flags))
2130
  {
unknown's avatar
unknown committed
2131 2132
    UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu "
                         "to the disk (%d)",
2133
                         (ulong) buffer->file,
2134
                         (ulong) LSN_OFFSET(buffer->offset),
2135 2136 2137
                         (ulong) buffer->size, errno));
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2138

2139
  if (LSN_OFFSET(buffer->last_lsn) != 0)    /* if buffer->last_lsn is set */
2140
    translog_set_sent_to_disk(buffer->last_lsn,
2141 2142 2143
                              buffer->next_buffer_offset);
  else
    translog_set_only_in_buffers(buffer->next_buffer_offset);
2144
  /* Free buffer */
unknown's avatar
unknown committed
2145
  buffer->file= -1;
2146
  buffer->overlay= 0;
unknown's avatar
unknown committed
2147
  pthread_cond_broadcast(&buffer->waiting_filling_buffer);
2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
  DBUG_RETURN(0);
}


/*
  Recover page with sector protection (wipe out failed chunks)

  SYNOPSYS
    translog_recover_page_up_to_sector()
    page                 reference on the page
    offset               offset of failed sector

  RETURN
unknown's avatar
unknown committed
2161 2162
    0  OK
    1  Error
2163 2164
*/

unknown's avatar
unknown committed
2165
static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2166 2167 2168
{
  uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
  DBUG_ENTER("translog_recover_page_up_to_sector");
unknown's avatar
unknown committed
2169
  DBUG_PRINT("enter", ("offset: %u  first chunk: %u",
2170 2171
                       (uint) offset, (uint) chunk_offset));

unknown's avatar
unknown committed
2172
  while (page[chunk_offset] != TRANSLOG_FILLER && chunk_offset < offset)
2173 2174 2175 2176 2177 2178 2179 2180 2181
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
    {
      UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)",
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2182
    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2183 2184 2185
                        (uint) chunk_offset, (uint) chunk_length));
    if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
    {
unknown's avatar
unknown committed
2186
      UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area",
2187 2188 2189 2190 2191 2192 2193
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
    chunk_offset+= chunk_length;
  }

  valid_chunk_end= chunk_offset;
unknown's avatar
unknown committed
2194
  /* end of trusted area - sector parsing */
unknown's avatar
unknown committed
2195
  while (page[chunk_offset] != TRANSLOG_FILLER)
2196 2197 2198 2199 2200
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
      break;
unknown's avatar
unknown committed
2201 2202

    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2203
                        (uint) chunk_offset, (uint) chunk_length));
unknown's avatar
unknown committed
2204 2205
    if (((ulong) chunk_offset) + ((ulong) chunk_length) >
        (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2206
      break;
unknown's avatar
unknown committed
2207

2208 2209 2210 2211 2212
    chunk_offset+= chunk_length;
    valid_chunk_end= chunk_offset;
  }
  DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));

unknown's avatar
unknown committed
2213 2214
  memset(page + valid_chunk_end, TRANSLOG_FILLER,
         TRANSLOG_PAGE_SIZE - valid_chunk_end);
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228

  DBUG_RETURN(0);
}


/*
  Log page validator

  SYNOPSIS
    translog_page_validator()
    page_addr            The page to check
    data                 data, need for validation (address in this case)

  RETURN
unknown's avatar
unknown committed
2229 2230
    0  OK
    1  Error
2231
*/
unknown's avatar
unknown committed
2232
static my_bool translog_page_validator(uchar *page_addr, uchar* data_ptr)
2233
{
unknown's avatar
unknown committed
2234 2235
  uint this_page_page_overhead;
  uint flags;
unknown's avatar
unknown committed
2236
  uchar *page= (uchar*) page_addr, *page_pos;
unknown's avatar
unknown committed
2237 2238
  TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr;
  TRANSLOG_ADDRESS addr= *(data->addr);
2239 2240
  DBUG_ENTER("translog_page_validator");

unknown's avatar
unknown committed
2241
  data->was_recovered= 0;
2242

2243 2244
  if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE ||
      uint3korr(page + 3) != LSN_FILE_NO(addr))
2245 2246
  {
    UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
unknown's avatar
unknown committed
2247
                         "page address written in the page is incorrect: "
2248
                         "File %lu instead of %lu or page %lu instead of %lu",
unknown's avatar
unknown committed
2249
                         LSN_IN_PARTS(addr),
2250
                         (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr),
2251
                         (ulong) uint3korr(page),
2252
                         (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE));
2253 2254
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2255 2256
  flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
  this_page_page_overhead= page_overhead[flags];
2257 2258 2259 2260 2261
  if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                TRANSLOG_RECORD_CRC))
  {
    UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
                         "Garbage in the page flags field detected : %x",
unknown's avatar
unknown committed
2262
                         LSN_IN_PARTS(addr), (uint) flags));
2263 2264
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2265
  page_pos= page + (3 + 3 + 1);
2266 2267
  if (flags & TRANSLOG_PAGE_CRC)
  {
unknown's avatar
unknown committed
2268 2269 2270 2271
    uint32 crc= translog_crc(page + this_page_page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             this_page_page_overhead);
    if (crc != uint4korr(page_pos))
2272 2273 2274
    {
      UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
                           "CRC mismatch: calculated: %lx on the page %lx",
unknown's avatar
unknown committed
2275
                           LSN_IN_PARTS(addr),
unknown's avatar
unknown committed
2276
                           (ulong) crc, (ulong) uint4korr(page_pos)));
2277 2278
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2279
    page_pos+= CRC_LENGTH;                      /* Skip crc */
2280 2281 2282 2283
  }
  if (flags & TRANSLOG_SECTOR_PROTECTION)
  {
    uint i, offset;
unknown's avatar
unknown committed
2284
    uchar *table= page_pos;
unknown's avatar
unknown committed
2285 2286 2287 2288
    uint8 current= table[0];
    for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
         i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
         i++, offset+= DISK_DRIVE_SECTOR_SIZE)
2289 2290
    {
      /*
unknown's avatar
unknown committed
2291
         TODO: add chunk counting for "suspecting" sectors (difference is
unknown's avatar
unknown committed
2292 2293
         more than 1-2), if difference more then present chunks then it is
         the problem.
2294
      */
unknown's avatar
unknown committed
2295
      uint8 test= page[offset];
unknown's avatar
unknown committed
2296 2297
      DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx "
                          "read: 0x%x  stored: 0x%x%x",
unknown's avatar
unknown committed
2298
                          i, offset, (ulong) current,
2299 2300
                          (uint) uint2korr(page + offset), (uint) table[i],
                          (uint) table[i + 1]));
unknown's avatar
unknown committed
2301 2302 2303 2304 2305
      /*
        3 is minimal possible record length. So we can have "distance"
        between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
        only if it is old value, i.e. the sector was not written.
      */
unknown's avatar
unknown committed
2306
      if (((test < current) &&
unknown's avatar
unknown committed
2307
           (0xFFL - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) ||
unknown's avatar
unknown committed
2308 2309
          ((test >= current) &&
           (test - current > DISK_DRIVE_SECTOR_SIZE / 3)))
2310 2311 2312
      {
        if (translog_recover_page_up_to_sector(page, offset))
          DBUG_RETURN(1);
unknown's avatar
unknown committed
2313
        data->was_recovered= 1;
2314 2315 2316
        DBUG_RETURN(0);
      }

unknown's avatar
unknown committed
2317
      /* Restore value on the page */
2318 2319
      page[offset]= table[i];
      current= test;
unknown's avatar
unknown committed
2320
      DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx  "
unknown's avatar
unknown committed
2321 2322 2323
                          "read: 0x%x  stored: 0x%x",
                          i, offset, (ulong) current,
                          (uint) page[offset], (uint) table[i]));
2324 2325 2326 2327 2328
    }
  }
  DBUG_RETURN(0);
}

2329

unknown's avatar
unknown committed
2330 2331
/**
  @brief Locks the loghandler.
2332

unknown's avatar
unknown committed
2333
  @note See comment before buffer 'mutex' variable.
2334

unknown's avatar
unknown committed
2335 2336
  @retval 0 OK
  @retval 1 Error
2337 2338 2339 2340
*/

my_bool translog_lock()
{
2341
  uint8 current_buffer;
2342 2343 2344 2345 2346 2347 2348 2349
  DBUG_ENTER("translog_lock");

  /*
     Locking the loghandler mean locking current buffer, but it can change
     during locking, so we should check it
  */
  for (;;)
  {
2350 2351 2352 2353 2354 2355
    /*
      log_descriptor.bc.buffer_no is only one byte so its reading is
      an atomic operation
    */
    current_buffer= log_descriptor.bc.buffer_no;
    if (translog_buffer_lock(log_descriptor.buffers + current_buffer))
2356
      DBUG_RETURN(1);
2357
    if (log_descriptor.bc.buffer_no == current_buffer)
2358
      break;
2359
    translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384
  }
  DBUG_RETURN(0);
}


/*
  Unlock the loghandler

  SYNOPSIS
    translog_unlock()

  RETURN
    0  OK
    1  Error
*/

my_bool translog_unlock()
{
  DBUG_ENTER("translog_unlock");
  translog_buffer_unlock(log_descriptor.bc.buffer);

  DBUG_RETURN(0);
}


2385 2386
/**
  @brief Get log page by file number and offset of the beginning of the page
2387

2388 2389
  @param data            validator data, which contains the page address
  @param buffer          buffer for page placing
2390
                         (might not be used in some cache implementations)
2391 2392
  @param direct_link     if it is not NULL then caller can accept direct
                         link to the page cache
2393

2394 2395
  @retval NULL Error
  @retval #    pointer to the page cache which should be used to read this page
2396 2397
*/

2398
static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
unknown's avatar
unknown committed
2399
                                PAGECACHE_BLOCK_LINK **direct_link)
2400
{
2401
  TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
2402
  uint cache_index;
2403
  uint32 file_no= LSN_FILE_NO(addr);
2404
  DBUG_ENTER("translog_get_page");
unknown's avatar
unknown committed
2405
  DBUG_PRINT("enter", ("File: %lu  Offset: %lu(0x%lx)",
2406 2407 2408
                       (ulong) file_no,
                       (ulong) LSN_OFFSET(addr),
                       (ulong) LSN_OFFSET(addr)));
2409 2410

  /* it is really page address */
2411
  DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
2412

2413 2414 2415
  if (direct_link)
    *direct_link= NULL;

2416 2417
  in_buffers= translog_only_in_buffers();
  DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)",
unknown's avatar
unknown committed
2418
                      LSN_IN_PARTS(in_buffers)));
2419 2420 2421 2422 2423 2424 2425 2426 2427
  if (in_buffers != LSN_IMPOSSIBLE &&
      cmp_translog_addr(addr, in_buffers) >= 0)
  {
    translog_lock();
    /* recheck with locked loghandler */
    in_buffers= translog_only_in_buffers();
    if (cmp_translog_addr(addr, in_buffers) >= 0)
    {
      uint16 buffer_no= log_descriptor.bc.buffer_no;
2428
#ifndef DBUG_OFF
2429
      uint16 buffer_start= buffer_no;
2430
#endif
2431 2432 2433 2434 2435 2436
      struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
      struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
      for (;;)
      {
        /*
          if the page is in the buffer and it is the last version of the
2437
          page (in case of division the page by buffer flush)
2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
        */
        if (curr_buffer->file != -1 &&
            cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
            cmp_translog_addr(addr,
                              (curr_buffer->next_buffer_offset ?
                               curr_buffer->next_buffer_offset:
                               curr_buffer->offset + curr_buffer->size)) < 0)
        {
          int is_last_unfinished_page;
          uint last_protected_sector= 0;
2448
          uchar *from, *table= NULL;
2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463
          translog_wait_for_writers(curr_buffer);
          DBUG_ASSERT(LSN_FILE_NO(addr) ==  LSN_FILE_NO(curr_buffer->offset));
          from= curr_buffer->buffer + (addr - curr_buffer->offset);
          memcpy(buffer, from, TRANSLOG_PAGE_SIZE);
          is_last_unfinished_page= ((log_descriptor.bc.buffer ==
                                     curr_buffer) &&
                                    (log_descriptor.bc.ptr >= from) &&
                                    (log_descriptor.bc.ptr <
                                     from + TRANSLOG_PAGE_SIZE));
          if (is_last_unfinished_page &&
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
          {
            last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
                                    DISK_DRIVE_SECTOR_SIZE);
            table= buffer + log_descriptor.page_overhead -
unknown's avatar
unknown committed
2464
              TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487
          }

          DBUG_ASSERT(buffer_unlock == curr_buffer);
          translog_buffer_unlock(buffer_unlock);
          if (is_last_unfinished_page)
          {
            uint i;
            /*
              This is last unfinished page => we should not check CRC and
              remove only that protection which already installed (no need
              to check it)

              We do not check the flag of sector protection, because if
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
              not set then last_protected_sector will be 0 so following loop
              will be never executed
            */
            DBUG_PRINT("info", ("This is last unfinished page, "
                                "last protected sector %u",
                                last_protected_sector));
            for (i= 1; i <= last_protected_sector; i++)
            {
              uint offset= i * DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
2488 2489 2490 2491
              DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
                                  i, buffer[offset],
                                  table[i]));
              buffer[offset]= table[i];
2492 2493 2494 2495 2496 2497 2498 2499 2500
            }
          }
          else
          {
            /*
              This IF should be true because we use in-memory data which
              supposed to be correct.
            */
            if (translog_page_validator((uchar*) buffer, (uchar*) data))
2501
            {
2502
              buffer= NULL;
2503 2504
              DBUG_ASSERT(FALSE);
            }
2505 2506 2507 2508 2509 2510 2511 2512
          }
          DBUG_RETURN(buffer);
        }
        buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
        curr_buffer= log_descriptor.buffers + buffer_no;
        translog_buffer_lock(curr_buffer);
        translog_buffer_unlock(buffer_unlock);
        buffer_unlock= curr_buffer;
unknown's avatar
unknown committed
2513
        /* we can't make a full circle */
2514 2515 2516 2517 2518
        DBUG_ASSERT(buffer_start != buffer_no);
      }
    }
    translog_unlock();
  }
2519
  if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) <
2520 2521 2522 2523
      OPENED_FILES_NUM)
  {
    PAGECACHE_FILE file;
    /* file in the cache */
unknown's avatar
unknown committed
2524
    if (log_descriptor.log_file_num[cache_index] == -1)
2525 2526
    {
      if ((log_descriptor.log_file_num[cache_index]=
unknown's avatar
unknown committed
2527
           open_logfile_by_number_no_cache(file_no)) == -1)
2528 2529 2530 2531
        DBUG_RETURN(NULL);
    }
    file.file= log_descriptor.log_file_num[cache_index];

2532
    buffer=
unknown's avatar
unknown committed
2533
      (uchar*) pagecache_valid_read(log_descriptor.pagecache, &file,
2534
                                     LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
unknown's avatar
unknown committed
2535
                                     3, (direct_link ? NULL : (char*) buffer),
2536
                                     PAGECACHE_PLAIN_PAGE,
unknown's avatar
unknown committed
2537 2538 2539 2540 2541
                                     (direct_link ?
                                      PAGECACHE_LOCK_READ :
                                      PAGECACHE_LOCK_LEFT_UNLOCKED),
                                     direct_link,
                                     &translog_page_validator, (uchar*) data);
2542 2543 2544
    DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
                        (ulong) direct_link,
                        (ulong)(direct_link ? *direct_link : NULL)));
2545 2546 2547
  }
  else
  {
unknown's avatar
unknown committed
2548 2549 2550 2551 2552 2553 2554 2555
    /*
      TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT
      THE LAST USED FILES.  THIS WILL BE A NOTABLE PROBLEM IF WE ARE
      FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES.  WE WILL
      PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG
      FILES.
    */

2556
    File file= open_logfile_by_number_no_cache(file_no);
unknown's avatar
unknown committed
2557 2558
    if (file == -1)
        DBUG_RETURN(NULL);
2559
    if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE,
2560
                 LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME)))
2561
      buffer= NULL;
unknown's avatar
unknown committed
2562
    else if (translog_page_validator((uchar*) buffer, (uchar*) data))
2563 2564 2565 2566 2567 2568
      buffer= NULL;
    my_close(file, MYF(MY_WME));
  }
  DBUG_RETURN(buffer);
}

2569

2570 2571 2572 2573 2574 2575 2576
/**
  @brief free direct log page link

  @param direct_link the direct log page link to be freed

*/

unknown's avatar
unknown committed
2577
static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
2578 2579 2580 2581 2582 2583 2584
{
  DBUG_ENTER("translog_free_link");
  DBUG_PRINT("info", ("Direct link: 0x%lx",
                      (ulong) direct_link));
  if (direct_link)
    pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
                             PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
2585
                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0);
2586 2587
  DBUG_VOID_RETURN;
}
2588

2589

unknown's avatar
unknown committed
2590 2591
/**
  @brief Finds last full page of the given log file.
2592

unknown's avatar
unknown committed
2593
  @param addr            address structure to fill with data, which contain
2594
                         file number of the log file
unknown's avatar
unknown committed
2595 2596 2597
  @param last_page_ok    Result of the check whether last page OK.
                         (for now only we check only that file length
                         divisible on page length).
2598

unknown's avatar
unknown committed
2599 2600
  @retval 0 OK
  @retval 1 Error
2601 2602 2603 2604 2605
*/

static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
                                           my_bool *last_page_ok)
{
2606
  MY_STAT stat_buff, *local_stat;
2607
  char path[FN_REFLEN];
2608 2609
  uint32 rec_offset;
  uint32 file_no= LSN_FILE_NO(*addr);
2610 2611
  DBUG_ENTER("translog_get_last_page_addr");

2612 2613
  if (!(local_stat= my_stat(translog_filename_by_fileno(file_no, path),
                            &stat_buff, MYF(MY_WME))))
2614
    DBUG_RETURN(1);
2615 2616
  DBUG_PRINT("info", ("File size: %lu", (ulong) local_stat->st_size));
  if (local_stat->st_size > TRANSLOG_PAGE_SIZE)
2617
  {
2618
    rec_offset= (((local_stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
2619
                       TRANSLOG_PAGE_SIZE);
2620
    *last_page_ok= (local_stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
2621 2622 2623 2624
  }
  else
  {
    *last_page_ok= 0;
2625
    rec_offset= 0;
2626
  }
2627
  *addr= MAKE_LSN(file_no, rec_offset);
unknown's avatar
unknown committed
2628
  DBUG_PRINT("info", ("Last page: 0x%lx  ok: %d", (ulong) rec_offset,
2629 2630 2631 2632 2633
                      *last_page_ok));
  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
2634 2635
/**
  @brief Get number bytes for record length storing
2636

unknown's avatar
unknown committed
2637
  @param length          Record length which will be encoded
2638

unknown's avatar
unknown committed
2639
  @return 1,3,4,5 - number of bytes to store given length
2640
*/
2641

2642 2643 2644 2645
static uint translog_variable_record_length_bytes(translog_size_t length)
{
  if (length < 250)
    return 1;
unknown's avatar
unknown committed
2646
  if (length < 0xFFFF)
2647
    return 3;
unknown's avatar
unknown committed
2648
  if (length < (ulong) 0xFFFFFF)
2649 2650 2651 2652 2653
    return 4;
  return 5;
}


unknown's avatar
unknown committed
2654
/**
unknown's avatar
unknown committed
2655
  @brief Gets header of this chunk.
2656

unknown's avatar
unknown committed
2657
  @param chunk           The pointer to the chunk beginning
2658

unknown's avatar
unknown committed
2659 2660
  @retval # total length of the chunk
  @retval 0 Error
2661 2662
*/

unknown's avatar
unknown committed
2663
static uint16 translog_get_chunk_header_length(uchar *chunk)
2664 2665
{
  DBUG_ENTER("translog_get_chunk_header_length");
unknown's avatar
unknown committed
2666
  switch (*chunk & TRANSLOG_CHUNK_TYPE) {
unknown's avatar
unknown committed
2667
  case TRANSLOG_CHUNK_LSN:
2668
  {
unknown's avatar
unknown committed
2669
    /* 0 chunk referred as LSN (head or tail) */
2670
    translog_size_t rec_len;
unknown's avatar
unknown committed
2671
    uchar *start= chunk;
unknown's avatar
unknown committed
2672
    uchar *ptr= start + 1 + 2;
2673 2674 2675 2676
    uint16 chunk_len, header_len;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
2677 2678
    header_len= (ptr - start) +2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
2679 2680 2681
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
unknown's avatar
unknown committed
2682
      /* TODO: fine header end */
unknown's avatar
unknown committed
2683 2684 2685 2686 2687
      /*
        The last chunk of multi-group record can be base for it header
        calculation (we skip to the first group to read the header) so if we
        stuck here something is wrong.
      */
2688
      DBUG_ASSERT(0);
unknown's avatar
unknown committed
2689
      DBUG_RETURN(0);                               /* Keep compiler happy */
2690 2691 2692
    }
    DBUG_RETURN(header_len);
  }
unknown's avatar
unknown committed
2693
  case TRANSLOG_CHUNK_FIXED:
2694
  {
unknown's avatar
unknown committed
2695
    /* 1 (pseudo)fixed record (also LSN) */
2696 2697 2698
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
    DBUG_RETURN(3);
  }
unknown's avatar
unknown committed
2699 2700
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
2701 2702 2703
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
    DBUG_RETURN(1);
    break;
unknown's avatar
unknown committed
2704 2705
  case TRANSLOG_CHUNK_LNGTH:
    /* 3 chunk with chunk length */
2706 2707 2708 2709 2710
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
    DBUG_RETURN(3);
    break;
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
2711
    DBUG_RETURN(0);                               /* Keep compiler happy */
2712 2713 2714 2715
  }
}


2716
/**
unknown's avatar
unknown committed
2717 2718
  @brief Truncate the log to the given address. Used during the startup if the
         end of log if corrupted.
2719 2720 2721 2722 2723 2724 2725 2726 2727

  @param addr            new horizon

  @retval 0 OK
  @retval 1 Error
*/

static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
{
2728 2729
  uchar *page;
  TRANSLOG_ADDRESS current_page;
2730 2731 2732
  uint32 next_page_offset, page_rest;
  uint32 i;
  File fd;
2733 2734 2735
  TRANSLOG_VALIDATOR_DATA data;
  char path[FN_REFLEN];
  uchar page_buff[TRANSLOG_PAGE_SIZE];
2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756
  DBUG_ENTER("translog_truncate_log");
  /* TODO: write warning to the client */
  DBUG_PRINT("warning", ("removing all records from (%lx,0x%lx) "
                         "till (%lx,0x%lx)",
                         LSN_IN_PARTS(addr),
                         LSN_IN_PARTS(log_descriptor.horizon)));
  DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
  /* remove files between the address and horizon */
  for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
    if (my_delete(translog_filename_by_fileno(i, path),  MYF(MY_WME)))
    {
      translog_unlock();
      DBUG_RETURN(1);
    }

  /* truncate the last file up to the last page */
  next_page_offset= LSN_OFFSET(addr);
  next_page_offset= (next_page_offset -
                     ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
                     TRANSLOG_PAGE_SIZE);
  page_rest= next_page_offset - LSN_OFFSET(addr);
2757
  memset(page_buff, TRANSLOG_FILLER, page_rest);
2758 2759
  if ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
      my_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
2760
      (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
2761 2762
                              log_write_flags)) ||
      my_sync(fd, MYF(MY_WME)) ||
unknown's avatar
unknown committed
2763 2764 2765
      my_close(fd, MYF(MY_WME)) ||
      (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
       my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD))))
2766
    DBUG_RETURN(1);
unknown's avatar
unknown committed
2767

2768
  /* fix the horizon */
2769
  log_descriptor.horizon= addr;
2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783
  /* fix the buffer data */
  current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
                                             TRANSLOG_PAGE_SIZE));
  data.addr= &current_page;
  if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
      NULL)
    DBUG_RETURN(1);
  if (page != log_descriptor.buffers->buffer)
    memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
  log_descriptor.bc.buffer->offset= current_page;
  log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
  log_descriptor.bc.ptr=
    log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
  log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
2784 2785 2786
  DBUG_RETURN(0);
}

2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799
/*
  Initialize transaction log

  SYNOPSIS
    translog_init()
    directory            Directory where log files are put
    log_file_max_size    max size of one log size (for new logs creation)
    server_version       version of MySQL server (MYSQL_VERSION_ID)
    server_id            server ID (replication & Co)
    pagecache            Page cache for the log reads
    flags                flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
                           TRANSLOG_RECORD_CRC)

2800 2801 2802
  TODO
    Free used resources in case of error.

2803
  RETURN
unknown's avatar
unknown committed
2804 2805
    0  OK
    1  Error
2806 2807 2808 2809 2810 2811 2812 2813 2814
*/

my_bool translog_init(const char *directory,
                      uint32 log_file_max_size,
                      uint32 server_version,
                      uint32 server_id, PAGECACHE *pagecache, uint flags)
{
  int i;
  int old_log_was_recovered= 0, logs_found= 0;
unknown's avatar
unknown committed
2815
  uint old_flags= flags;
2816
  TRANSLOG_ADDRESS sure_page, last_page, last_valid_page;
2817
  my_bool version_changed= 0;
2818
  DBUG_ENTER("translog_init");
unknown's avatar
unknown committed
2819
  DBUG_ASSERT(translog_inited == 0);
unknown's avatar
unknown committed
2820 2821 2822
  compile_time_assert(TRANSLOG_MIN_FILE_SIZE >
                      TRANSLOG_WRITE_BUFFER * TRANSLOG_BUFFERS_NO);
  compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
2823

2824
  loghandler_init();                            /* Safe to do many times */
2825

2826
  if (pthread_mutex_init(&log_descriptor.sent_to_disk_lock,
2827 2828 2829 2830 2831
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.file_header_lock,
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.unfinished_files_lock,
                         MY_MUTEX_INIT_FAST) ||
2832 2833
      pthread_mutex_init(&log_descriptor.purger_lock,
                         MY_MUTEX_INIT_FAST) ||
2834 2835
      pthread_mutex_init(&log_descriptor.log_flush_lock,
                         MY_MUTEX_INIT_FAST) ||
unknown's avatar
unknown committed
2836 2837 2838
      my_init_dynamic_array(&log_descriptor.unfinished_files,
                            sizeof(struct st_file_counter),
                            10, 10))
2839
    DBUG_RETURN(1);
2840
  log_descriptor.min_need_file= 0;
2841 2842
  log_descriptor.min_file_number= 0;
  log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854

  /* Directory to store files */
  unpack_dirname(log_descriptor.directory, directory);

  if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
                                            O_RDONLY, MYF(MY_WME))) < 0)
  {
    UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'",
                         errno, log_descriptor.directory));
    DBUG_RETURN(1);
  }

2855
  log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
2856
  DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
unknown's avatar
unknown committed
2857
              log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
2858
  /* max size of one log size (for new logs creation) */
2859
  log_file_size= log_descriptor.log_file_max_size=
unknown's avatar
unknown committed
2860
    log_file_max_size;
2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871
  /* server version */
  log_descriptor.server_version= server_version;
  /* server ID */
  log_descriptor.server_id= server_id;
  /* Page cache for the log reads */
  log_descriptor.pagecache= pagecache;
  /* Flags */
  DBUG_ASSERT((flags &
               ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                 TRANSLOG_RECORD_CRC)) == 0);
  log_descriptor.flags= flags;
unknown's avatar
unknown committed
2872 2873 2874 2875 2876 2877
  for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
  {
     page_overhead[i]= 7;
     if (i & TRANSLOG_PAGE_CRC)
       page_overhead[i]+= CRC_LENGTH;
     if (i & TRANSLOG_SECTOR_PROTECTION)
unknown's avatar
unknown committed
2878 2879
       page_overhead[i]+= TRANSLOG_PAGE_SIZE /
                           DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
2880 2881
  }
  log_descriptor.page_overhead= page_overhead[flags];
2882 2883
  log_descriptor.page_capacity_chunk_2=
    TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
unknown's avatar
unknown committed
2884
  compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
2885 2886 2887 2888 2889 2890
  log_descriptor.buffer_capacity_chunk_2=
    (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
    log_descriptor.page_capacity_chunk_2;
  log_descriptor.half_buffer_capacity_chunk_2=
    log_descriptor.buffer_capacity_chunk_2 / 2;
  DBUG_PRINT("info",
unknown's avatar
unknown committed
2891
             ("Overhead: %u  pc2: %u  bc2: %u,  bc2/2: %u",
2892 2893 2894 2895 2896 2897 2898 2899 2900
              log_descriptor.page_overhead,
              log_descriptor.page_capacity_chunk_2,
              log_descriptor.buffer_capacity_chunk_2,
              log_descriptor.half_buffer_capacity_chunk_2));

  /* *** Current state of the log handler *** */

  /* Init log handler file handlers cache */
  for (i= 0; i < OPENED_FILES_NUM; i++)
unknown's avatar
unknown committed
2901
    log_descriptor.log_file_num[i]= -1;
2902 2903 2904 2905 2906 2907 2908

  /* just to init it somehow */
  translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);

  /* Buffers for log writing */
  for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
  {
unknown's avatar
unknown committed
2909 2910
    if (translog_buffer_init(log_descriptor.buffers + i))
      DBUG_RETURN(1);
2911 2912 2913
#ifndef DBUG_OFF
    log_descriptor.buffers[i].buffer_no= (uint8) i;
#endif
unknown's avatar
unknown committed
2914 2915
    DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
                        i, (ulong) log_descriptor.buffers + i));
2916 2917
  }

unknown's avatar
unknown committed
2918 2919 2920 2921
  /*
    last_logno and last_checkpoint_lsn were set in
    ma_control_file_create_or_open()
  */
unknown's avatar
unknown committed
2922
  logs_found= (last_logno != FILENO_IMPOSSIBLE);
2923 2924 2925 2926 2927

  if (logs_found)
  {
    my_bool pageok;
    /*
unknown's avatar
unknown committed
2928
      TODO: scan directory for maria_log.XXXXXXXX files and find
2929
       highest XXXXXXXX & set logs_found
unknown's avatar
unknown committed
2930
      TODO: check that last checkpoint within present log addresses space
2931

unknown's avatar
unknown committed
2932
      find the log end
unknown's avatar
unknown committed
2933
    */
unknown's avatar
unknown committed
2934
    if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
2935
    {
2936
      DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
2937
      /* there was no checkpoints we will read from the beginning */
2938
      sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE);
2939 2940 2941 2942
    }
    else
    {
      sure_page= last_checkpoint_lsn;
2943 2944
      DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
      sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
2945
    }
unknown's avatar
unknown committed
2946
    /* Set horizon to the beginning of the last file first */
unknown's avatar
unknown committed
2947
    log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
2948 2949
    if (translog_get_last_page_addr(&last_page, &pageok))
      DBUG_RETURN(1);
2950
    if (LSN_OFFSET(last_page) == 0)
2951
    {
2952
      if (LSN_FILE_NO(last_page) == 1)
2953 2954 2955 2956 2957
      {
        logs_found= 0;                          /* file #1 has no pages */
      }
      else
      {
2958
        last_page-= LSN_ONE_FILE;
2959 2960 2961 2962 2963 2964 2965 2966 2967 2968
        if (translog_get_last_page_addr(&last_page, &pageok))
          DBUG_RETURN(1);
      }
    }
  }
  if (logs_found)
  {
    TRANSLOG_ADDRESS current_page= sure_page;
    my_bool pageok;

2969
    DBUG_ASSERT(sure_page <= last_page);
2970 2971 2972

    /* TODO: check page size */

unknown's avatar
unknown committed
2973
    last_valid_page= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
2974 2975 2976 2977 2978 2979
    /*
      Scans and validate pages. We need it to show "outside" only for sure
      valid part of the log. If the log was damaged then fixed we have to
      cut off damaged part before some other process start write something
      in the log.
    */
2980 2981 2982
    do
    {
      TRANSLOG_ADDRESS current_file_last_page;
2983
      current_file_last_page= current_page;
2984 2985 2986 2987
      if (translog_get_last_page_addr(&current_file_last_page, &pageok))
        DBUG_RETURN(1);
      if (!pageok)
      {
2988 2989
        DBUG_PRINT("error", ("File %lu have no complete last page",
                             (ulong) LSN_FILE_NO(current_file_last_page)));
2990 2991 2992 2993 2994 2995 2996
        old_log_was_recovered= 1;
        /* This file is not written till the end so it should be last */
        last_page= current_file_last_page;
        /* TODO: issue warning */
      }
      do
      {
unknown's avatar
unknown committed
2997
        TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
2998
        uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
2999
        data.addr= &current_page;
3000
        if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
3001 3002 3003
          DBUG_RETURN(1);
        if (data.was_recovered)
        {
unknown's avatar
unknown committed
3004 3005 3006
          DBUG_PRINT("error", ("file no: %lu (%d)  "
                               "rec_offset: 0x%lx (%lu) (%d)",
                               (ulong) LSN_FILE_NO(current_page),
3007 3008 3009 3010
                               (uint3korr(page + 3) !=
                                LSN_FILE_NO(current_page)),
                               (ulong) LSN_OFFSET(current_page),
                               (ulong) (LSN_OFFSET(current_page) /
3011 3012
                                        TRANSLOG_PAGE_SIZE),
                               (uint3korr(page) !=
3013 3014
                                LSN_OFFSET(current_page) /
                                TRANSLOG_PAGE_SIZE)));
3015 3016 3017
          old_log_was_recovered= 1;
          break;
        }
unknown's avatar
unknown committed
3018
        old_flags= page[TRANSLOG_PAGE_FLAGS];
3019
        last_valid_page= current_page;
3020 3021 3022 3023 3024
        current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
      } while (current_page <= current_file_last_page);
      current_page+= LSN_ONE_FILE;
      current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
    } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3025
             !old_log_was_recovered);
unknown's avatar
unknown committed
3026
    if (last_valid_page == LSN_IMPOSSIBLE)
3027 3028 3029 3030 3031
    {
      /* Panic!!! Even page which should be valid is invalid */
      /* TODO: issue error */
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
3032 3033 3034 3035
    DBUG_PRINT("info", ("Last valid page is in file: %lu  "
                        "offset: %lu (0x%lx)  "
                        "Logs found: %d  was recovered: %d  "
                        "flags match: %d",
3036 3037 3038
                        (ulong) LSN_FILE_NO(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
unknown's avatar
unknown committed
3039 3040
                        logs_found, old_log_was_recovered,
                        (old_flags == flags)));
3041 3042

    /* TODO: check server ID */
unknown's avatar
unknown committed
3043
    if (logs_found && !old_log_was_recovered && old_flags == flags)
3044
    {
unknown's avatar
unknown committed
3045
      TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
3046
      uchar buffer[TRANSLOG_PAGE_SIZE], *page;
3047
      uint16 chunk_offset;
unknown's avatar
unknown committed
3048
      data.addr= &last_valid_page;
3049
      /* continue old log */
3050 3051
      DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
                  LSN_FILE_NO(log_descriptor.horizon));
3052
      if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
3053 3054 3055 3056 3057 3058 3059
          (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
        DBUG_RETURN(1);

      /* Puts filled part of old page in the buffer */
      log_descriptor.horizon= last_valid_page;
      translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
      /*
unknown's avatar
unknown committed
3060 3061
         Free space if filled with TRANSLOG_FILLER and first uchar of
         real chunk can't be TRANSLOG_FILLER
3062
      */
unknown's avatar
unknown committed
3063 3064
      while (chunk_offset < TRANSLOG_PAGE_SIZE &&
             page[chunk_offset] != TRANSLOG_FILLER)
3065 3066 3067 3068 3069
      {
        uint16 chunk_length;
        if ((chunk_length=
             translog_get_total_chunk_length(page, chunk_offset)) == 0)
          DBUG_RETURN(1);
unknown's avatar
unknown committed
3070
        DBUG_PRINT("info", ("chunk: offset: %u  length: %u",
3071 3072 3073 3074 3075 3076
                            (uint) chunk_offset, (uint) chunk_length));
        chunk_offset+= chunk_length;

        /* chunk can't cross the page border */
        DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
      }
unknown's avatar
unknown committed
3077
      memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3078 3079
      log_descriptor.bc.buffer->size+= chunk_offset;
      log_descriptor.bc.ptr+= chunk_offset;
unknown's avatar
unknown committed
3080
      log_descriptor.bc.current_page_fill= chunk_offset;
3081 3082 3083
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 (chunk_offset +
                                                  LSN_OFFSET(last_valid_page)));
unknown's avatar
unknown committed
3084
      DBUG_PRINT("info", ("Move Page #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
3085 3086 3087 3088
                          (uint) log_descriptor.bc.buffer_no,
                          (ulong) log_descriptor.bc.buffer,
                          log_descriptor.bc.chaser,
                          (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
3089
                          (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3090
                                   buffer->buffer)));
unknown's avatar
unknown committed
3091
      translog_check_cursor(&log_descriptor.bc);
3092
    }
3093 3094 3095
    if (!old_log_was_recovered && old_flags == flags)
    {
      LOGHANDLER_FILE_INFO info;
3096
      if (translog_read_file_header(&info, log_descriptor.log_file_num[0]))
3097 3098 3099
        DBUG_RETURN(1);
      version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
    }
3100
  }
unknown's avatar
unknown committed
3101
  DBUG_PRINT("info", ("Logs found: %d  was recovered: %d",
3102 3103 3104 3105 3106
                      logs_found, old_log_was_recovered));
  if (!logs_found)
  {
    /* Start new log system from scratch */
    /* Used space */
unknown's avatar
unknown committed
3107
    log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */
3108
    /* Current logs file number in page cache */
unknown's avatar
unknown committed
3109
    if ((log_descriptor.log_file_num[0]=
3110
         create_logfile_by_number_no_cache(1)) == -1 ||
unknown's avatar
unknown committed
3111
        translog_write_file_header())
3112
      DBUG_RETURN(1);
unknown's avatar
unknown committed
3113
    if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, 1,
3114 3115 3116 3117 3118 3119
                                        CONTROL_FILE_UPDATE_ONLY_LOGNO))
      DBUG_RETURN(1);
    /* assign buffer 0 */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  }
3120
  else if (old_log_was_recovered || old_flags != flags || version_changed)
unknown's avatar
unknown committed
3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134
  {
    /* leave the damaged file untouched */
    log_descriptor.horizon+= LSN_ONE_FILE;
    /* header page */
    log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                               TRANSLOG_PAGE_SIZE);
    if (translog_create_new_file())
      DBUG_RETURN(1);
    /*
      Buffer system left untouched after recovery => we should init it
      (starting from buffer 0)
    */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
3135 3136 3137
  }

  /* all LSNs that are on disk are flushed */
3138
  log_descriptor.sent_to_disk=
3139 3140
    log_descriptor.flushed= log_descriptor.horizon;
  log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
3141
  log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
3142
  log_descriptor.previous_flush_horizon= log_descriptor.horizon;
unknown's avatar
unknown committed
3143
  /*
unknown's avatar
unknown committed
3144 3145 3146 3147 3148 3149
    Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
    address of the next LSN and we want indicate that all LSNs that are
    already on the disk are flushed so we need decrease horizon on 1 (we are
    sure that there is no LSN on the disk which is greater then 'flushed'
    and there will not be LSN created that is equal or less then the value
    of the 'flushed').
unknown's avatar
unknown committed
3150
  */
3151
  log_descriptor.flushed--; /* offset decreased */
3152
  log_descriptor.sent_to_disk--; /* offset decreased */
3153 3154 3155 3156 3157
  /*
    Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
    structures for generating 2-byte ids:
  */
  my_atomic_rwlock_init(&LOCK_id_to_share);
unknown's avatar
unknown committed
3158 3159
  id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
                                          MYF(MY_WME | MY_ZEROFILL));
3160 3161 3162
  if (unlikely(!id_to_share))
    DBUG_RETURN(1);
  id_to_share--; /* min id is 1 */
3163

3164
  translog_inited= 1;
3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176
  /* Check the last LSN record integrity */
  if (logs_found)
  {
    TRANSLOG_SCANNER_DATA scanner;
    TRANSLOG_ADDRESS page_addr;
    LSN last_lsn= LSN_IMPOSSIBLE;
    /*
      take very last page address and try to find LSN record on it
      if it fail take address of previous page and so on
    */
    page_addr= (log_descriptor.horizon -
                ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
unknown's avatar
unknown committed
3177
    if (translog_scanner_init(page_addr, 1, &scanner, 1))
3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248
      DBUG_RETURN(1);
    scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
    for (;;)
    {
      uint chunk_type;
      chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
      DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                          (uint) scanner.page[scanner.page_offset]));
      while (chunk_type != TRANSLOG_CHUNK_LSN &&
             chunk_type != TRANSLOG_CHUNK_FIXED &&
             scanner.page != END_OF_LOG &&
             scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
             scanner.page_addr == page_addr)
      {
        if (translog_get_next_chunk(&scanner))
        {
          translog_destroy_scanner(&scanner);
          DBUG_RETURN(1);
        }
        if (scanner.page != END_OF_LOG)
        {
          chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
          DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                              (uint) scanner.page[scanner.page_offset]));
        }
      }
      if (chunk_type == TRANSLOG_CHUNK_LSN ||
          chunk_type == TRANSLOG_CHUNK_FIXED)
      {
        last_lsn= scanner.page_addr + scanner.page_offset;
        if (translog_get_next_chunk(&scanner))
        {
          translog_destroy_scanner(&scanner);
          DBUG_RETURN(1);
        }
        if (scanner.page == END_OF_LOG)
          break; /* it was the last record */
        chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
        DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                            (uint) scanner.page[scanner.page_offset]));
        continue; /* try to find other record on this page */
      }

      if (last_lsn != LSN_IMPOSSIBLE)
        break; /* there is no more records on the page */

      /* We have to make step back */
      if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
      {
        uint32 file_no= LSN_FILE_NO(page_addr);
        bool last_page_ok;
        /* it is beginning of the current file */
        if (unlikely(file_no == 1))
        {
          /*
            It is beginning of the log => there is no LSNs in the log =>
            There is no harm in leaving it "as-is".
          */
          DBUG_RETURN(0);
        }
        file_no--;
        page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
        translog_get_last_page_addr(&page_addr, &last_page_ok);
        /* page should be OK as it is not the last file */
        DBUG_ASSERT(last_page_ok);
      }
      else
      {
         page_addr-= TRANSLOG_PAGE_SIZE;
      }
      translog_destroy_scanner(&scanner);
unknown's avatar
unknown committed
3249
      if (translog_scanner_init(page_addr, 1, &scanner, 1))
3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299
        DBUG_RETURN(1);
      scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
    }
    translog_destroy_scanner(&scanner);

    /* Now scanner points to the last LSN chunk, lets check it */
    {
      TRANSLOG_HEADER_BUFFER rec;
      translog_size_t rec_len;
      int len;
      uchar buffer[1];
      DBUG_PRINT("info", ("going to check the last found record (%lu,0x%lx)",
                          LSN_IN_PARTS(last_lsn)));

      len=
        translog_read_record_header(last_lsn, &rec);
      if (unlikely (len == RECHEADER_READ_ERROR ||
                    len == RECHEADER_READ_EOF))
      {
        DBUG_PRINT("error", ("unexpected end of log or record during "
                             "reading record header: (%lu,0x%lx)  len: %d",
                             LSN_IN_PARTS(last_lsn), len));
        if (translog_truncate_log(last_lsn))
          DBUG_RETURN(1);
      }
      else
      {
        DBUG_ASSERT(last_lsn == rec.lsn);
        if (likely(rec.record_length != 0))
        {
          /*
            Reading the last byte of record will trigger scanning all
            record chunks for now
          */
          rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
                                        buffer, NULL);
          if (rec_len != 1)
          {
            DBUG_PRINT("error", ("unexpected end of log or record during "
                                 "reading record body: (%lu,0x%lx)  len: %d",
                                 LSN_IN_PARTS(rec.lsn),
                                 len));
            if (translog_truncate_log(last_lsn))
              DBUG_RETURN(1);
          }
        }
      }
    }
  }

3300 3301 3302 3303 3304
  DBUG_RETURN(0);
}


/*
unknown's avatar
unknown committed
3305
  @brief Free transaction log file buffer.
3306

unknown's avatar
unknown committed
3307
  @param buffer_no       The buffer to free
3308 3309 3310 3311 3312 3313
*/

static void translog_buffer_destroy(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_destroy");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
3314
             ("Buffer #%u: 0x%lx  file: %d  offset: (%lu,0x%lx)  size: %lu",
3315
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
3316
              buffer->file,
unknown's avatar
unknown committed
3317
              LSN_IN_PARTS(buffer->offset),
3318
              (ulong) buffer->size));
unknown's avatar
unknown committed
3319
  if (buffer->file != -1)
3320 3321
  {
    /*
unknown's avatar
unknown committed
3322
       We ignore errors here, because we can't do something about it
3323 3324
       (it is shutting down)
    */
3325
    translog_buffer_lock(buffer);
3326
    translog_buffer_flush(buffer);
3327
    translog_buffer_unlock(buffer);
3328
  }
unknown's avatar
unknown committed
3329
  DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
3330
  pthread_mutex_destroy(&buffer->mutex);
unknown's avatar
unknown committed
3331
  pthread_cond_destroy(&buffer->waiting_filling_buffer);
3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344
  DBUG_VOID_RETURN;
}


/*
  Free log handler resources

  SYNOPSIS
    translog_destroy()
*/

void translog_destroy()
{
unknown's avatar
unknown committed
3345
  uint i;
3346
  DBUG_ENTER("translog_destroy");
3347

3348 3349 3350 3351 3352 3353
  DBUG_ASSERT(translog_inited);
  translog_lock();
  translog_inited= 0;
  if (log_descriptor.bc.buffer->file != -1)
    translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
  translog_unlock();
3354

3355 3356 3357 3358 3359
  for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
  {
    struct st_translog_buffer *buffer= log_descriptor.buffers + i;
    translog_buffer_destroy(buffer);
  }
3360

3361 3362 3363 3364 3365
  /* close files */
  for (i= 0; i < OPENED_FILES_NUM; i++)
  {
    if (log_descriptor.log_file_num[i] != -1)
      translog_close_log_file(log_descriptor.log_file_num[i]);
3366
  }
3367 3368 3369 3370 3371 3372 3373 3374 3375 3376
  pthread_mutex_destroy(&log_descriptor.sent_to_disk_lock);
  pthread_mutex_destroy(&log_descriptor.file_header_lock);
  pthread_mutex_destroy(&log_descriptor.unfinished_files_lock);
  pthread_mutex_destroy(&log_descriptor.purger_lock);
  pthread_mutex_destroy(&log_descriptor.log_flush_lock);
  delete_dynamic(&log_descriptor.unfinished_files);

  my_close(log_descriptor.directory_fd, MYF(MY_WME));
  my_atomic_rwlock_destroy(&LOCK_id_to_share);
  my_free((uchar*)(id_to_share + 1), MYF(MY_ALLOW_ZERO_PTR));
3377 3378 3379 3380
  DBUG_VOID_RETURN;
}


unknown's avatar
unknown committed
3381

3382

3383

3384
/*
unknown's avatar
unknown committed
3385
  @brief Starts new page.
3386

unknown's avatar
unknown committed
3387 3388 3389 3390
  @param horizon         \ Position in file and buffer where we are
  @param cursor          /
  @param prev_buffer     Buffer which should be flushed will be assigned here.
                         This is always set (to NULL if nothing to flush).
3391

unknown's avatar
unknown committed
3392 3393 3394
  @note We do not want to flush the buffer immediately because we want to
  let caller of this function first advance 'horizon' pointer and unlock the
  loghandler and only then flush the log which can take some time.
3395

unknown's avatar
unknown committed
3396 3397
  @retval 0 OK
  @retval 1 Error
3398 3399 3400 3401 3402 3403 3404 3405 3406
*/

static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
                                  struct st_buffer_cursor *cursor,
                                  struct st_translog_buffer **prev_buffer)
{
  struct st_translog_buffer *buffer= cursor->buffer;
  DBUG_ENTER("translog_page_next");

unknown's avatar
unknown committed
3407
  if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
3408
       cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
3409 3410
      (LSN_OFFSET(*horizon) >
       log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
3411
  {
unknown's avatar
unknown committed
3412 3413
    DBUG_PRINT("info", ("Switch to next buffer  Buffer Size: %lu (%lu) => %d  "
                        "File size: %lu  max: %lu => %d",
3414
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3415
                        (ulong) (cursor->ptr - cursor->buffer->buffer),
3416
                        (cursor->ptr + TRANSLOG_PAGE_SIZE >
3417
                         cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
3418
                        (ulong) LSN_OFFSET(*horizon),
3419
                        (ulong) log_descriptor.log_file_max_size,
3420 3421 3422
                        (LSN_OFFSET(*horizon) >
                         (log_descriptor.log_file_max_size -
                          TRANSLOG_PAGE_SIZE))));
3423
    if (translog_buffer_next(horizon, cursor,
3424 3425 3426
                             LSN_OFFSET(*horizon) >
                             (log_descriptor.log_file_max_size -
                              TRANSLOG_PAGE_SIZE)))
3427 3428
      DBUG_RETURN(1);
    *prev_buffer= buffer;
unknown's avatar
unknown committed
3429
    DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
3430 3431 3432 3433
                        (uint) buffer->buffer_no, (ulong) buffer));
  }
  else
  {
unknown's avatar
unknown committed
3434 3435
    DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
                        "Buffer Size: %lu (%lu)",
3436 3437 3438
                        (uint) buffer->buffer_no,
                        (ulong) buffer,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3439
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458
    translog_finish_page(horizon, cursor);
    translog_new_page_header(horizon, cursor);
    *prev_buffer= NULL;
  }
  DBUG_RETURN(0);
}


/*
  Write data of given length to the current page

  SYNOPSIS
    translog_write_data_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    buffer               buffer with data

  RETURN
unknown's avatar
unknown committed
3459 3460
    0  OK
    1  Error
3461 3462
*/

3463 3464 3465
static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor,
                                           translog_size_t length,
unknown's avatar
unknown committed
3466
                                           uchar *buffer)
3467 3468
{
  DBUG_ENTER("translog_write_data_on_page");
unknown's avatar
unknown committed
3469 3470
  DBUG_PRINT("enter", ("Chunk length: %lu  Page size %u",
                       (ulong) length, (uint) cursor->current_page_fill));
3471
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3472
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3473
  DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
3474 3475
              TRANSLOG_WRITE_BUFFER);

unknown's avatar
unknown committed
3476
  memcpy(cursor->ptr, buffer, length);
3477
  cursor->ptr+= length;
unknown's avatar
unknown committed
3478 3479
  (*horizon)+= length; /* adds offset */
  cursor->current_page_fill+= length;
3480 3481
  if (!cursor->chaser)
    cursor->buffer->size+= length;
unknown's avatar
unknown committed
3482 3483
  DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)",
3484 3485
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3486
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
3487
  translog_check_cursor(cursor);
3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503

  DBUG_RETURN(0);
}


/*
  Write data from parts of given length to the current page

  SYNOPSIS
    translog_write_parts_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    parts                IN/OUT chunk source

  RETURN
unknown's avatar
unknown committed
3504 3505
    0  OK
    1  Error
3506 3507
*/

3508 3509 3510 3511
static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
                                            struct st_buffer_cursor *cursor,
                                            translog_size_t length,
                                            struct st_translog_parts *parts)
3512 3513 3514 3515
{
  translog_size_t left= length;
  uint cur= (uint) parts->current;
  DBUG_ENTER("translog_write_parts_on_page");
unknown's avatar
unknown committed
3516
  DBUG_PRINT("enter", ("Chunk length: %lu  parts: %u of %u. Page size: %u  "
3517 3518
                       "Buffer size: %lu (%lu)",
                       (ulong) length,
3519
                       (uint) (cur + 1), (uint) parts->elements,
unknown's avatar
unknown committed
3520
                       (uint) cursor->current_page_fill,
3521
                       (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3522
                       (ulong) (cursor->ptr - cursor->buffer->buffer)));
3523
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3524
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3525
  DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
3526 3527 3528 3529 3530
              TRANSLOG_WRITE_BUFFER);

  do
  {
    translog_size_t len;
3531
    LEX_STRING *part;
unknown's avatar
unknown committed
3532
    uchar *buff;
3533

3534 3535
    DBUG_ASSERT(cur < parts->elements);
    part= parts->parts + cur;
unknown's avatar
unknown committed
3536
    buff= (uchar*) part->str;
3537 3538 3539
    DBUG_PRINT("info", ("Part: %u  Length: %lu  left: %lu  buff: 0x%lx",
                        (uint) (cur + 1), (ulong) part->length, (ulong) left,
                        (ulong) buff));
3540

3541
    if (part->length > left)
3542 3543 3544
    {
      /* we should write less then the current part */
      len= left;
3545 3546
      part->length-= len;
      part->str+= len;
unknown's avatar
unknown committed
3547
      DBUG_PRINT("info", ("Set new part: %u  Length: %lu",
3548
                          (uint) (cur + 1), (ulong) part->length));
3549 3550 3551
    }
    else
    {
3552
      len= part->length;
3553 3554 3555 3556 3557
      cur++;
      DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
    }
    DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx  %u",
                        (ulong) cursor->ptr, (ulong)buff, (uint)len));
3558 3559 3560 3561 3562 3563
    if (likely(len))
    {
      memcpy(cursor->ptr, buff, len);
      left-= len;
      cursor->ptr+= len;
    }
3564 3565
  } while (left);

unknown's avatar
unknown committed
3566
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)  Length %lu(0x%lx)",
unknown's avatar
unknown committed
3567
                      LSN_IN_PARTS(*horizon),
3568
                      (ulong) length, (ulong) length));
3569
  parts->current= cur;
unknown's avatar
unknown committed
3570 3571
  (*horizon)+= length; /* offset increasing */
  cursor->current_page_fill+= length;
3572 3573
  if (!cursor->chaser)
    cursor->buffer->size+= length;
unknown's avatar
unknown committed
3574 3575 3576 3577
  /*
    We do not not updating parts->total_record_length here because it is
    need only before writing record to have total length
  */
3578 3579
  DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)  "
unknown's avatar
unknown committed
3580
                      "Horizon: (%lu,0x%lx)  buff offset: 0x%lx",
3581 3582
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3583
                      (ulong) (cursor->ptr - cursor->buffer->buffer),
unknown's avatar
unknown committed
3584
                      LSN_IN_PARTS(*horizon),
3585 3586
                      (ulong) (LSN_OFFSET(cursor->buffer->offset) +
                               cursor->buffer->size)));
unknown's avatar
unknown committed
3587
  translog_check_cursor(cursor);
3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598

  DBUG_RETURN(0);
}


/*
  Put 1 group chunk type 0 header into parts array

  SYNOPSIS
    translog_write_variable_record_1group_header()
    parts                Descriptor of record source parts
unknown's avatar
unknown committed
3599
    type                 The log record type
3600
    short_trid           Short transaction ID or 0 if it has no sense
3601 3602 3603 3604 3605 3606 3607 3608 3609
    header_length        Calculated header length of chunk type 0
    chunk0_header        Buffer for the chunk header writing
*/

static void
translog_write_variable_record_1group_header(struct st_translog_parts *parts,
                                             enum translog_record_type type,
                                             SHORT_TRANSACTION_ID short_trid,
                                             uint16 header_length,
unknown's avatar
unknown committed
3610
                                             uchar *chunk0_header)
3611
{
3612
  LEX_STRING *part;
unknown's avatar
unknown committed
3613
  DBUG_ASSERT(parts->current != 0);     /* first part is left for header */
3614 3615 3616
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= header_length);
  part->str= (char*)chunk0_header;
unknown's avatar
unknown committed
3617
  /* puts chunk type */
unknown's avatar
unknown committed
3618
  *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
3619
  int2store(chunk0_header + 1, short_trid);
unknown's avatar
unknown committed
3620
  /* puts record length */
3621 3622 3623
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
unknown's avatar
unknown committed
3624
  /* puts 0 as chunk length which indicate 1 group record */
3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636
  int2store(chunk0_header + header_length - 2, 0);
}


/*
  Increase number of writers for this buffer

  SYNOPSIS
    translog_buffer_increase_writers()
    buffer               target buffer
*/

unknown's avatar
unknown committed
3637 3638
static inline void
translog_buffer_increase_writers(struct st_translog_buffer *buffer)
3639 3640
{
  DBUG_ENTER("translog_buffer_increase_writers");
unknown's avatar
unknown committed
3641
  translog_buffer_lock_assert_owner(buffer);
3642
  buffer->copy_to_buffer_in_progress++;
3643
  DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u  0x%lx  progress: %d",
3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661
                      (uint) buffer->buffer_no, (ulong) buffer,
                      buffer->copy_to_buffer_in_progress));
  DBUG_VOID_RETURN;
}


/*
  Decrease number of writers for this buffer

  SYNOPSIS
    translog_buffer_decrease_writers()
    buffer               target buffer
*/


static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_decrease_writers");
unknown's avatar
unknown committed
3662
  translog_buffer_lock_assert_owner(buffer);
3663
  buffer->copy_to_buffer_in_progress--;
3664 3665 3666 3667
  DBUG_PRINT("info",
             ("copy_to_buffer_in_progress. Buffer #%u  0x%lx  progress: %d",
              (uint) buffer->buffer_no, (ulong) buffer,
              buffer->copy_to_buffer_in_progress));
unknown's avatar
unknown committed
3668 3669
  if (buffer->copy_to_buffer_in_progress == 0)
    pthread_cond_broadcast(&buffer->waiting_filling_buffer);
3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683
  DBUG_VOID_RETURN;
}


/*
  Put chunk 2 from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk2_page()
    parts                Descriptor of record source parts
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
3684 3685
    0  OK
    1  Error
3686 3687 3688 3689 3690 3691 3692
*/

static my_bool
translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
3693
  struct st_translog_buffer *buffer_to_flush;
3694
  int rc;
unknown's avatar
unknown committed
3695
  uchar chunk2_header[1];
3696
  DBUG_ENTER("translog_write_variable_record_chunk2_page");
unknown's avatar
unknown committed
3697
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
3698

unknown's avatar
unknown committed
3699
  LINT_INIT(buffer_to_flush);
3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

unknown's avatar
unknown committed
3712
  /* Puts chunk type */
3713
  translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
unknown's avatar
unknown committed
3714
  /* Puts chunk body */
3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731
  translog_write_parts_on_page(horizon, cursor,
                               log_descriptor.page_capacity_chunk_2, parts);
  DBUG_RETURN(0);
}


/*
  Put chunk 3 of requested length in the buffer from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk3_page()
    parts                Descriptor of record source parts
    length               Length of this chunk
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
3732 3733
    0  OK
    1  Error
3734 3735 3736 3737 3738 3739 3740 3741
*/

static my_bool
translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
                                           uint16 length,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
3742
  struct st_translog_buffer *buffer_to_flush;
3743
  LEX_STRING *part;
3744
  int rc;
unknown's avatar
unknown committed
3745
  uchar chunk3_header[1 + 2];
3746 3747
  DBUG_ENTER("translog_write_variable_record_chunk3_page");

unknown's avatar
unknown committed
3748
  LINT_INIT(buffer_to_flush);
3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);
  if (length == 0)
  {
    /* It was call to write page header only (no data for chunk 3) */
    DBUG_PRINT("info", ("It is a call to make page header only"));
    DBUG_RETURN(0);
  }

unknown's avatar
unknown committed
3767
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
3768 3769 3770
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk3_header;
unknown's avatar
unknown committed
3771
  /* Puts chunk type */
unknown's avatar
unknown committed
3772
  *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
unknown's avatar
unknown committed
3773
  /* Puts chunk length */
3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789
  int2store(chunk3_header + 1, length);

  translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
  DBUG_RETURN(0);
}

/*
  Move log pointer (horizon) on given number pages starting from next page,
  and given offset on the last page

  SYNOPSIS
    translog_advance_pointer()
    pages                Number of full pages starting from the next one
    last_page_data       Plus this data on the last page

  RETURN
unknown's avatar
unknown committed
3790 3791
    0  OK
    1  Error
3792 3793 3794 3795
*/

static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
{
unknown's avatar
unknown committed
3796 3797
  translog_size_t last_page_offset= (log_descriptor.page_overhead +
                                     last_page_data);
3798
  translog_size_t offset= (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
3799
                           log_descriptor.bc.current_page_fill +
3800
                           pages * TRANSLOG_PAGE_SIZE + last_page_offset);
3801 3802
  translog_size_t buffer_end_offset, file_end_offset, min_offset;
  DBUG_ENTER("translog_advance_pointer");
3803
  DBUG_PRINT("enter", ("Pointer:  (%lu, 0x%lx) + %u + %u pages + %u + %u",
unknown's avatar
unknown committed
3804
                       LSN_IN_PARTS(log_descriptor.horizon),
3805
                       (uint) (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
3806
                               log_descriptor.bc.current_page_fill),
3807 3808
                       pages, (uint) log_descriptor.page_overhead,
                       (uint) last_page_data));
3809
  translog_lock_assert_owner();
3810

unknown's avatar
unknown committed
3811 3812 3813 3814 3815 3816 3817 3818
  /*
    The loop will be executed 1-3 times. Usually we advance the
    pointer to fill only the current buffer (if we have more then 1/2 of
    buffer free or 2 buffers (rest of current and all next). In case of
    really huge record end where we write last group with "table of
    content" of all groups and ignore buffer borders we can occupy
    3 buffers.
  */
3819 3820
  for (;;)
  {
unknown's avatar
unknown committed
3821
    uint8 new_buffer_no;
3822 3823 3824
    struct st_translog_buffer *new_buffer;
    struct st_translog_buffer *old_buffer;
    buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
unknown's avatar
unknown committed
3825 3826 3827
    file_end_offset= (log_descriptor.log_file_max_size -
                      LSN_OFFSET(log_descriptor.horizon));
    DBUG_PRINT("info", ("offset: %lu  buffer_end_offs: %lu, "
3828 3829 3830 3831 3832 3833 3834 3835
                        "file_end_offs:  %lu",
                        (ulong) offset, (ulong) buffer_end_offset,
                        (ulong) file_end_offset));
    DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
                        "0x%lx (0x%lx)",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (uint) log_descriptor.bc.buffer_no,
                        (ulong) log_descriptor.bc.buffer,
3836
                        (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
3837
                        (ulong) log_descriptor.bc.buffer->size,
3838
                        (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
3839
                                 log_descriptor.bc.buffer->size),
3840 3841
                        (ulong) LSN_OFFSET(log_descriptor.horizon)));
    DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
3842
                log_descriptor.bc.buffer->size ==
3843
                LSN_OFFSET(log_descriptor.horizon));
3844 3845 3846 3847 3848 3849 3850 3851 3852 3853

    if (offset <= buffer_end_offset && offset <= file_end_offset)
      break;
    old_buffer= log_descriptor.bc.buffer;
    new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
    new_buffer= log_descriptor.buffers + new_buffer_no;

    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
3854
    min_offset= min(buffer_end_offset, file_end_offset);
unknown's avatar
unknown committed
3855
    /* TODO: check is it ptr or size enough */
3856
    log_descriptor.bc.buffer->size+= min_offset;
3857
    log_descriptor.bc.ptr+= min_offset;
unknown's avatar
unknown committed
3858
    DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
3859 3860 3861 3862 3863 3864
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        log_descriptor.bc.chaser,
                        (ulong) log_descriptor.bc.buffer->size,
                        (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                 buffer->buffer)));
unknown's avatar
unknown committed
3865 3866
    DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
                         log_descriptor.bc.buffer->buffer) ==
3867 3868 3869 3870 3871 3872 3873
                log_descriptor.bc.buffer->size);
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
    translog_buffer_increase_writers(log_descriptor.bc.buffer);

    if (file_end_offset <= buffer_end_offset)
    {
3874 3875 3876
      log_descriptor.horizon+= LSN_ONE_FILE;
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3877
      DBUG_PRINT("info", ("New file: %lu",
3878
                          (ulong) LSN_FILE_NO(log_descriptor.horizon)));
3879 3880 3881 3882 3883 3884 3885 3886
      if (translog_create_new_file())
      {
        DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("info", ("The same file"));
3887
      log_descriptor.horizon+= min_offset; /* offset increasing */
3888 3889
    }
    translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
3890
    old_buffer->next_buffer_offset= new_buffer->offset;
3891 3892 3893 3894 3895 3896 3897
    if (translog_buffer_unlock(old_buffer))
      DBUG_RETURN(1);
    offset-= min_offset;
  }
  log_descriptor.bc.ptr+= offset;
  log_descriptor.bc.buffer->size+= offset;
  translog_buffer_increase_writers(log_descriptor.bc.buffer);
3898
  log_descriptor.horizon+= offset; /* offset increasing */
unknown's avatar
unknown committed
3899
  log_descriptor.bc.current_page_fill= last_page_offset;
3900 3901 3902
  DBUG_PRINT("info", ("drop write_counter"));
  log_descriptor.bc.write_counter= 0;
  log_descriptor.bc.previous_offset= 0;
unknown's avatar
unknown committed
3903 3904
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)  "
                      "offset: %u  last page: %u",
3905 3906 3907 3908
                      (uint) log_descriptor.bc.buffer->buffer_no,
                      (ulong) log_descriptor.bc.buffer,
                      log_descriptor.bc.chaser,
                      (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
3909 3910
                      (ulong) (log_descriptor.bc.ptr -
                               log_descriptor.bc.buffer->
3911 3912 3913
                               buffer), (uint) offset,
                      (uint) last_page_offset));
  DBUG_PRINT("info",
3914
             ("pointer moved to: (%lu, 0x%lx)",
unknown's avatar
unknown committed
3915
              LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
3916
  translog_check_cursor(&log_descriptor.bc);
3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934
  log_descriptor.bc.protected= 0;
  DBUG_RETURN(0);
}



/*
  Get page rest

  SYNOPSIS
    translog_get_current_page_rest()

  NOTE loghandler should be locked

  RETURN
    number of bytes left on the current page
*/

unknown's avatar
unknown committed
3935 3936 3937 3938 3939
static uint translog_get_current_page_rest()
{
  return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
}

3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952

/*
  Get buffer rest in full pages

  SYNOPSIS
     translog_get_current_buffer_rest()

  NOTE loghandler should be locked

  RETURN
    number of full pages left on the current buffer
*/

unknown's avatar
unknown committed
3953 3954 3955 3956 3957 3958
static uint translog_get_current_buffer_rest()
{
  return ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
           log_descriptor.bc.ptr) /
          TRANSLOG_PAGE_SIZE);
}
3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976

/*
  Calculate possible group size without first (current) page

  SYNOPSIS
    translog_get_current_group_size()

  NOTE loghandler should be locked

  RETURN
    group size without first (current) page
*/

static translog_size_t translog_get_current_group_size()
{
  /* buffer rest in full pages */
  translog_size_t buffer_rest= translog_get_current_buffer_rest();
  DBUG_ENTER("translog_get_current_group_size");
unknown's avatar
unknown committed
3977
  DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
3978 3979 3980 3981 3982

  buffer_rest*= log_descriptor.page_capacity_chunk_2;
  /* in case of only half of buffer free we can write this and next buffer */
  if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
  {
unknown's avatar
unknown committed
3983 3984
    DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
                        (ulong) buffer_rest,
3985 3986 3987 3988
                        (ulong) log_descriptor.buffer_capacity_chunk_2));
    buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
  }

unknown's avatar
unknown committed
3989
  DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
3990 3991 3992 3993 3994

  DBUG_RETURN(buffer_rest);
}


unknown's avatar
unknown committed
3995 3996
/**
   @brief Write variable record in 1 group.
3997

unknown's avatar
unknown committed
3998 3999 4000 4001 4002 4003 4004 4005 4006 4007
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4008

unknown's avatar
unknown committed
4009 4010 4011
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4012 4013 4014 4015 4016
*/

static my_bool
translog_write_variable_record_1group(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4017
                                      MARIA_HA *tbl_info,
4018 4019 4020 4021
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
4022
                                      TRN *trn, void *hook_arg)
4023 4024 4025 4026 4027 4028 4029
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i;
  translog_size_t record_rest, full_pages, first_page;
  uint additional_chunk3_page= 0;
unknown's avatar
unknown committed
4030
  uchar chunk0_header[1 + 2 + 5 + 2];
4031
  DBUG_ENTER("translog_write_variable_record_1group");
unknown's avatar
unknown committed
4032
  translog_lock_assert_owner();
4033 4034

  *lsn= horizon= log_descriptor.horizon;
4035 4036 4037 4038
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
4039
                                                        lsn, hook_arg)))
4040
  {
unknown's avatar
unknown committed
4041
    translog_unlock();
4042 4043 4044 4045 4046
    DBUG_RETURN(1);
  }
  cursor= log_descriptor.bc;
  cursor.chaser= 1;

unknown's avatar
unknown committed
4047
  /* Advance pointer to be able unlock the loghandler */
4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060
  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - header_length);
  full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
  record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);

  if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
  {
    DBUG_PRINT("info", ("2 chunks type 3 is needed"));
    /* We will write 2 chunks type 3 at the end of this group */
    additional_chunk3_page= 1;
    record_rest= 1;
  }

unknown's avatar
unknown committed
4061 4062
  DBUG_PRINT("info", ("first_page: %u (%u)  full_pages: %u (%lu)  "
                      "additional: %u (%u)  rest %u = %u",
4063 4064 4065 4066 4067 4068 4069 4070 4071
                      first_page, first_page - header_length,
                      full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      additional_chunk3_page,
                      additional_chunk3_page *
                      (log_descriptor.page_capacity_chunk_2 - 1),
                      record_rest, parts->record_length));
  /* record_rest + 3 is chunk type 3 overhead + record_rest */
unknown's avatar
unknown committed
4072 4073
  rc|= translog_advance_pointer(full_pages + additional_chunk3_page,
                                (record_rest ? record_rest + 3 : 0));
4074 4075 4076 4077 4078
  log_descriptor.bc.buffer->last_lsn= *lsn;

  rc|= translog_unlock();

  /*
unknown's avatar
unknown committed
4079
     Check if we switched buffer and need process it (current buffer is
4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  /* fill the pages */
  translog_write_parts_on_page(&horizon, &cursor, first_page, parts);


unknown's avatar
unknown committed
4098
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4099 4100
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
4101 4102 4103 4104 4105 4106

  for (i= 0; i < full_pages; i++)
  {
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
      DBUG_RETURN(1);

unknown's avatar
unknown committed
4107
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4108 4109
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
4110 4111 4112 4113 4114 4115 4116 4117 4118
  }

  if (additional_chunk3_page)
  {
    if (translog_write_variable_record_chunk3_page(parts,
                                                   log_descriptor.
                                                   page_capacity_chunk_2 - 2,
                                                   &horizon, &cursor))
      DBUG_RETURN(1);
unknown's avatar
unknown committed
4119
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4120 4121
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
unknown's avatar
unknown committed
4122
    DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
4123 4124 4125 4126 4127 4128
  }

  if (translog_write_variable_record_chunk3_page(parts,
                                                 record_rest,
                                                 &horizon, &cursor))
    DBUG_RETURN(1);
unknown's avatar
unknown committed
4129
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
4130 4131 4132 4133
                        (ulong) LSN_FILE_NO(log_descriptor.horizon),
                        (ulong) LSN_OFFSET(log_descriptor.horizon),
                        (ulong) LSN_FILE_NO(horizon),
                        (ulong) LSN_OFFSET(horizon)));
4134

unknown's avatar
unknown committed
4135
  if (!(rc= translog_buffer_lock(cursor.buffer)))
4136 4137 4138 4139 4140 4141 4142 4143
  {
    translog_buffer_decrease_writers(cursor.buffer);
  }
  rc|= translog_buffer_unlock(cursor.buffer);
  DBUG_RETURN(rc);
}


unknown's avatar
unknown committed
4144 4145
/**
   @brief Write variable record in 1 chunk.
4146

unknown's avatar
unknown committed
4147 4148 4149 4150 4151 4152 4153 4154 4155 4156
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4157

unknown's avatar
unknown committed
4158 4159 4160
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4161 4162 4163 4164 4165
*/

static my_bool
translog_write_variable_record_1chunk(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4166
                                      MARIA_HA *tbl_info,
4167 4168 4169 4170
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
4171
                                      TRN *trn, void *hook_arg)
4172 4173
{
  int rc;
unknown's avatar
unknown committed
4174
  uchar chunk0_header[1 + 2 + 5 + 2];
4175
  DBUG_ENTER("translog_write_variable_record_1chunk");
unknown's avatar
unknown committed
4176
  translog_lock_assert_owner();
4177 4178 4179 4180 4181

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  *lsn= log_descriptor.horizon;
4182 4183 4184 4185
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                                 *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
4186
                                                        lsn, hook_arg)))
4187
  {
unknown's avatar
unknown committed
4188
    translog_unlock();
4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213
    DBUG_RETURN(1);
  }

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);
  log_descriptor.bc.buffer->last_lsn= *lsn;
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


/*
unknown's avatar
unknown committed
4214
  @brief Calculates and write LSN difference (compressed LSN).
4215

unknown's avatar
unknown committed
4216 4217 4218
  @param base_lsn        LSN from which we calculate difference
  @param lsn             LSN for codding
  @param dst             Result will be written to dst[-pack_length] .. dst[-1]
4219

unknown's avatar
unknown committed
4220 4221
  @note To store an LSN in a compact way we will use the following compression:
    If a log record has LSN1, and it contains the LSN2 as a back reference,
unknown's avatar
unknown committed
4222
    Instead of LSN2 we write LSN1-LSN2, encoded as:
4223 4224 4225
     two bits     the number N (see below)
     14 bits
     N bytes
unknown's avatar
unknown committed
4226
     That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
4227 4228
     is stored in the first two bits.

unknown's avatar
unknown committed
4229 4230 4231
  @note function made to write the result in backward direction with no
  special sense or tricks both directions are equal in complicity

unknown's avatar
unknown committed
4232
  @retval #    pointer on coded LSN
4233 4234
*/

unknown's avatar
unknown committed
4235
static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
4236
{
unknown's avatar
unknown committed
4237
  uint64 diff;
4238
  DBUG_ENTER("translog_put_LSN_diff");
unknown's avatar
unknown committed
4239
  DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx)  val: (0x%lu,0x%lx)  dst: 0x%lx",
unknown's avatar
unknown committed
4240 4241
                       LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
                       (ulong) dst));
unknown's avatar
unknown committed
4242 4243 4244 4245
  DBUG_ASSERT(base_lsn > lsn);
  diff= base_lsn - lsn;
  DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
  if (diff <= 0x3FFF)
4246
  {
unknown's avatar
unknown committed
4247 4248 4249 4250 4251 4252 4253
    dst-= 2;
    /*
      Note we store this high uchar first to ensure that first uchar has
      0 in the 3 upper bits.
    */
    dst[0]= diff >> 8;
    dst[1]= (diff & 0xFF);
4254
  }
unknown's avatar
unknown committed
4255
  else if (diff <= 0x3FFFFFL)
4256
  {
unknown's avatar
unknown committed
4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267
    dst-= 3;
    dst[0]= 0x40 | (diff >> 16);
    int2store(dst + 1, diff & 0xFFFF);
  }
  else if (diff <= 0x3FFFFFFFL)
  {
    dst-= 4;
    dst[0]= 0x80 | (diff >> 24);
    int3store(dst + 1, diff & 0xFFFFFFL);
  }
  else if (diff <= LL(0x3FFFFFFFFF))
unknown's avatar
unknown committed
4268

unknown's avatar
unknown committed
4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283
  {
    dst-= 5;
    dst[0]= 0xC0 | (diff >> 32);
    int4store(dst + 1, diff & 0xFFFFFFFFL);
  }
  else
  {
    /*
      It is full LSN after special 1 diff (which is impossible
      in real life)
    */
    dst-= 2 + LSN_STORE_SIZE;
    dst[0]= 0;
    dst[1]= 1;
    lsn_store(dst + 2, lsn);
4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299
  }
  DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
  DBUG_RETURN(dst);
}


/*
  Get LSN from LSN-difference (compressed LSN)

  SYNOPSIS
    translog_get_LSN_from_diff()
    base_lsn             LSN from which we calculate difference
    src                  pointer to coded lsn
    dst                  pointer to buffer where to write 7byte LSN

  NOTE:
unknown's avatar
unknown committed
4300
    To store an LSN in a compact way we will use the following compression:
4301 4302

    If a log record has LSN1, and it contains the lSN2 as a back reference,
unknown's avatar
unknown committed
4303
    Instead of LSN2 we write LSN1-LSN2, encoded as:
4304 4305 4306 4307 4308

     two bits     the number N (see below)
     14 bits
     N bytes

unknown's avatar
unknown committed
4309 4310
    That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
    is stored in the first two bits.
4311 4312 4313 4314 4315

  RETURN
    pointer to buffer after decoded LSN
*/

unknown's avatar
unknown committed
4316
static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
4317 4318 4319 4320
{
  LSN lsn;
  uint32 diff;
  uint32 first_byte;
unknown's avatar
unknown committed
4321
  uint32 file_no, rec_offset;
4322 4323
  uint8 code;
  DBUG_ENTER("translog_get_LSN_from_diff");
unknown's avatar
unknown committed
4324
  DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx)  src: 0x%lx  dst 0x%lx",
unknown's avatar
unknown committed
4325
                       LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
4326
  first_byte= *((uint8*) src);
4327
  code= first_byte >> 6; /* Length is in 2 most significant bits */
unknown's avatar
unknown committed
4328 4329 4330 4331 4332
  first_byte&= 0x3F;
  src++;                                        /* Skip length + encode */
  file_no= LSN_FILE_NO(base_lsn);               /* Assume relative */
  DBUG_PRINT("info", ("code: %u  first byte: %lu",
                      (uint) code, (ulong) first_byte));
4333
  switch (code) {
unknown's avatar
unknown committed
4334
  case 0:
4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
    if (first_byte == 0 && *((uint8*)src) == 1)
    {
      /*
        It is full LSN after special 1 diff (which is impossible
        in real life)
      */
      memcpy(dst, src + 1, LSN_STORE_SIZE);
      DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
                          (ulong) (src + 1 + LSN_STORE_SIZE)));
      DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
    }
unknown's avatar
unknown committed
4346
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
4347
    break;
unknown's avatar
unknown committed
4348 4349 4350
  case 1:
    diff= uint2korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
4351
    break;
unknown's avatar
unknown committed
4352 4353 4354
  case 2:
    diff= uint3korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
4355
    break;
unknown's avatar
unknown committed
4356
  case 3:
4357
  {
4358
    ulonglong base_offset= LSN_OFFSET(base_lsn);
unknown's avatar
unknown committed
4359
    diff= uint4korr(src);
4360
    if (diff > LSN_OFFSET(base_lsn))
4361 4362 4363
    {
      /* take 1 from file offset */
      first_byte++;
unknown's avatar
unknown committed
4364
      base_offset+= LL(0x100000000);
4365
    }
unknown's avatar
unknown committed
4366 4367
    file_no= LSN_FILE_NO(base_lsn) - first_byte;
    rec_offset= base_offset - diff;
4368 4369 4370 4371 4372 4373
    break;
  }
  default:
    DBUG_ASSERT(0);
    DBUG_RETURN(NULL);
  }
unknown's avatar
unknown committed
4374 4375 4376
  lsn= MAKE_LSN(file_no, rec_offset);
  src+= code + 1;
  lsn_store(dst, lsn);
4377
  DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
4378 4379 4380 4381
  DBUG_RETURN(src);
}


unknown's avatar
unknown committed
4382 4383
/**
  @brief Encodes relative LSNs listed in the parameters.
4384

unknown's avatar
unknown committed
4385 4386 4387 4388
  @param parts           Parts list with encoded LSN(s)
  @param base_lsn        LSN which is base for encoding
  @param lsns            number of LSN(s) to encode
  @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
4389 4390
*/

unknown's avatar
unknown committed
4391 4392 4393
static void  translog_relative_LSN_encode(struct st_translog_parts *parts,
                                          LSN base_lsn,
                                          uint lsns, uchar *compressed_LSNs)
4394
{
4395
  LEX_STRING *part;
unknown's avatar
unknown committed
4396
  uint lsns_len= lsns * LSN_STORE_SIZE;
4397 4398
  char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
  char *buffer= buffer_src;
4399 4400 4401

  DBUG_ENTER("translog_relative_LSN_encode");

4402
  DBUG_ASSERT(parts->current != 0);
4403
  part= parts->parts + parts->current;
4404

4405
  /* collect all LSN(s) in one chunk if it (they) is (are) divided */
4406
  if (part->length < lsns_len)
4407
  {
4408 4409
    uint copied= part->length;
    LEX_STRING *next_part;
unknown's avatar
unknown committed
4410
    DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
unknown's avatar
unknown committed
4411
    memcpy(buffer, (uchar*)part->str, part->length);
4412
    next_part= parts->parts + parts->current + 1;
4413 4414
    do
    {
4415 4416
      DBUG_ASSERT(next_part < parts->parts + parts->elements);
      if ((next_part->length + copied) < lsns_len)
4417
      {
unknown's avatar
unknown committed
4418
        memcpy(buffer + copied, (uchar*)next_part->str,
4419 4420 4421 4422 4423
               next_part->length);
        copied+= next_part->length;
        next_part->length= 0; next_part->str= 0;
        /* delete_dynamic_element(&parts->parts, parts->current + 1); */
        next_part++;
4424 4425
        parts->current++;
        part= parts->parts + parts->current;
4426 4427 4428 4429
      }
      else
      {
        uint len= lsns_len - copied;
unknown's avatar
unknown committed
4430
        memcpy(buffer + copied, (uchar*)next_part->str, len);
4431
        copied= lsns_len;
4432 4433
        next_part->str+= len;
        next_part->length-= len;
4434 4435 4436
      }
    } while (copied < lsns_len);
  }
4437 4438 4439 4440 4441 4442 4443 4444 4445
  else
  {
    buffer= part->str;
    part->str+= lsns_len;
    part->length-= lsns_len;
    parts->current--;
    part= parts->parts + parts->current;
  }

4446 4447 4448
  {
    /* Compress */
    LSN ref;
4449
    int economy;
unknown's avatar
unknown committed
4450 4451
    uchar *src_ptr;
    uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
4452
                                      COMPRESSED_LSN_MAX_STORE_SIZE);
unknown's avatar
unknown committed
4453 4454 4455 4456
    /*
      We write the result in backward direction with no special sense or
      tricks both directions are equal in complicity
    */
4457
    for (src_ptr= buffer + lsns_len - LSN_STORE_SIZE;
4458
         src_ptr >= (uchar*) buffer;
4459
         src_ptr-= LSN_STORE_SIZE)
4460
    {
4461
      ref= lsn_korr(src_ptr);
unknown's avatar
unknown committed
4462
      dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
4463
    }
4464 4465 4466 4467 4468
    part->length= (uint)((compressed_LSNs +
                          (MAX_NUMBER_OF_LSNS_PER_RECORD *
                           COMPRESSED_LSN_MAX_STORE_SIZE)) -
                         dst_ptr);
    parts->record_length-= (economy= lsns_len - part->length);
4469 4470
    DBUG_PRINT("info", ("new length of LSNs: %lu  economy: %d",
                        (ulong)part->length, economy));
4471
    parts->total_record_length-= economy;
4472
    part->str= (char*)dst_ptr;
4473
  }
unknown's avatar
unknown committed
4474
  DBUG_VOID_RETURN;
4475 4476 4477
}


unknown's avatar
unknown committed
4478 4479
/**
   @brief Write multi-group variable-size record.
4480

unknown's avatar
unknown committed
4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Header length calculated for 1 group
   @param  buffer_rest     Beginning from which we plan to write in full pages
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4492

unknown's avatar
unknown committed
4493 4494 4495
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4496 4497 4498 4499 4500
*/

static my_bool
translog_write_variable_record_mgroup(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4501
                                      MARIA_HA *tbl_info,
4502 4503 4504 4505 4506 4507
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush,
                                      uint16 header_length,
                                      translog_size_t buffer_rest,
unknown's avatar
unknown committed
4508
                                      TRN *trn, void *hook_arg)
4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i, chunk2_page, full_pages;
  uint curr_group= 0;
  translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
  translog_size_t done= 0;
  struct st_translog_group_descriptor group;
  DYNAMIC_ARRAY groups;
  uint16 chunk3_size;
  uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
  uint16 last_page_capacity;
  my_bool new_page_before_chunk0= 1, first_chunk0= 1;
unknown's avatar
unknown committed
4523 4524
  uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
  uchar chunk2_header[1];
4525 4526
  uint header_fixed_part= header_length + 2;
  uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
4527
  uint file_of_the_first_group;
4528
  DBUG_ENTER("translog_write_variable_record_mgroup");
unknown's avatar
unknown committed
4529
  translog_lock_assert_owner();
4530

unknown's avatar
unknown committed
4531 4532
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;

unknown's avatar
unknown committed
4533 4534 4535
  if (my_init_dynamic_array(&groups,
                            sizeof(struct st_translog_group_descriptor),
                            10, 10))
4536
  {
unknown's avatar
unknown committed
4537
    translog_unlock();
4538 4539 4540 4541 4542 4543 4544 4545 4546 4547
    UNRECOVERABLE_ERROR(("init array failed"));
    DBUG_RETURN(1);
  }

  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - 1);
  DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));

  if (record_rest < buffer_rest)
  {
unknown's avatar
unknown committed
4548 4549 4550 4551 4552 4553 4554
    /*
      The record (group 1 type) is larger than the free space on the page
      - we need to split it in two. But when we split it in two, the first
      part is big enough to hold all the data of the record (because the
      header of the first part of the split is smaller than the header of
      the record as a whole when it takes only one chunk)
    */
4555 4556 4557 4558 4559
    DBUG_PRINT("info", ("too many free space because changing header"));
    buffer_rest-= log_descriptor.page_capacity_chunk_2;
    DBUG_ASSERT(record_rest >= buffer_rest);
  }

4560 4561
  file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
  translog_mark_file_unfinished(file_of_the_first_group);
4562 4563 4564 4565 4566 4567 4568
  do
  {
    group.addr= horizon= log_descriptor.horizon;
    cursor= log_descriptor.bc;
    cursor.chaser= 1;
    if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
    {
unknown's avatar
unknown committed
4569
      /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
4570 4571 4572 4573 4574
      full_pages= 255;
      buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
    }
    /*
       group chunks =
unknown's avatar
unknown committed
4575
       full pages + first page (which actually can be full, too).
4576 4577 4578
       But here we assign number of chunks - 1
    */
    group.num= full_pages;
unknown's avatar
unknown committed
4579
    if (insert_dynamic(&groups, (uchar*) &group))
4580 4581
    {
      UNRECOVERABLE_ERROR(("insert into array failed"));
unknown's avatar
unknown committed
4582
      goto err_unlock;
4583 4584
    }

unknown's avatar
unknown committed
4585 4586
    DBUG_PRINT("info", ("chunk: #%u  first_page: %u (%u)  "
                        "full_pages: %lu (%lu)  "
4587 4588 4589
                        "Left %lu",
                        groups.elements,
                        first_page, first_page - 1,
4590
                        (ulong) full_pages,
unknown's avatar
unknown committed
4591 4592 4593 4594 4595 4596
                        (ulong) (full_pages *
                                 log_descriptor.page_capacity_chunk_2),
                        (ulong)(parts->record_length - (first_page - 1 +
                                                        buffer_rest) -
                                done)));
    rc|= translog_advance_pointer(full_pages, 0);
4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611

    rc|= translog_unlock();

    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
      UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4612
      goto err;
4613 4614 4615 4616
    }

    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
4617 4618
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)  "
                        "Left  %lu",
unknown's avatar
unknown committed
4619 4620
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4621 4622 4623 4624 4625 4626
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));

    for (i= 0; i < full_pages; i++)
    {
      if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
4627
        goto err;
4628

unknown's avatar
unknown committed
4629 4630
      DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  "
                          "local: (%lu,0x%lx)  "
4631
                          "Left: %lu",
unknown's avatar
unknown committed
4632 4633
                          LSN_IN_PARTS(log_descriptor.horizon),
                          LSN_IN_PARTS(horizon),
4634 4635 4636 4637 4638 4639 4640
                          (ulong) (parts->record_length - (first_page - 1) -
                                   i * log_descriptor.page_capacity_chunk_2 -
                                   done)));
    }

    done+= (first_page - 1 + buffer_rest);

unknown's avatar
unknown committed
4641
    /* TODO: make separate function for following */
4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654
    rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
      UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4655
      goto err;
4656 4657 4658 4659 4660 4661
    }
    rc= translog_buffer_lock(cursor.buffer);
    if (!rc)
      translog_buffer_decrease_writers(cursor.buffer);
    rc|= translog_buffer_unlock(cursor.buffer);
    if (rc)
unknown's avatar
unknown committed
4662
      goto err;
4663 4664 4665 4666 4667 4668 4669 4670 4671 4672

    translog_lock();

    first_page= translog_get_current_page_rest();
    buffer_rest= translog_get_current_group_size();
  } while (first_page + buffer_rest < (uint) (parts->record_length - done));

  group.addr= horizon= log_descriptor.horizon;
  cursor= log_descriptor.bc;
  cursor.chaser= 1;
4673
  group.num= 0;                       /* 0 because it does not matter */
unknown's avatar
unknown committed
4674
  if (insert_dynamic(&groups, (uchar*) &group))
4675 4676
  {
    UNRECOVERABLE_ERROR(("insert into array failed"));
unknown's avatar
unknown committed
4677
    goto err_unlock;
4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726
  }
  record_rest= parts->record_length - done;
  DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
  if (first_page <= record_rest + 1)
  {
    chunk2_page= 1;
    record_rest-= (first_page - 1);
    full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
    record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
    last_page_capacity= page_capacity;
  }
  else
  {
    chunk2_page= full_pages= 0;
    last_page_capacity= first_page;
  }
  chunk3_size= 0;
  chunk3_pages= 0;
  if (last_page_capacity > record_rest + 1 && record_rest != 0)
  {
    if (last_page_capacity >
        record_rest + header_fixed_part + groups.elements * (7 + 1))
    {
      /* 1 record of type 0 */
      chunk3_pages= 0;
    }
    else
    {
      chunk3_pages= 1;
      if (record_rest + 2 == last_page_capacity)
      {
        chunk3_size= record_rest - 1;
        record_rest= 1;
      }
      else
      {
        chunk3_size= record_rest;
        record_rest= 0;
      }
    }
  }
  /*
     A first non-full page will hold type 0 chunk only if it fit in it with
     all its headers
  */
  while (page_capacity <
         record_rest + header_fixed_part +
         (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
    chunk0_pages++;
unknown's avatar
unknown committed
4727 4728
  DBUG_PRINT("info", ("chunk0_pages: %u  groups %u  groups per full page: %u  "
                      "Group on last page: %u",
4729 4730 4731 4732 4733
                      chunk0_pages, groups.elements,
                      groups_per_page,
                      (groups.elements -
                       ((page_capacity - header_fixed_part) / (7 + 1)) *
                       (chunk0_pages - 1))));
unknown's avatar
unknown committed
4734 4735
  DBUG_PRINT("info", ("first_page: %u  chunk2: %u  full_pages: %u (%lu)  "
                      "chunk3: %u (%u)  rest: %u",
4736 4737 4738 4739 4740
                      first_page,
                      chunk2_page, full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      chunk3_pages, (uint) chunk3_size, (uint) record_rest));
unknown's avatar
unknown committed
4741 4742 4743 4744 4745 4746 4747 4748 4749 4750
  rc= translog_advance_pointer(full_pages + chunk3_pages +
                               (chunk0_pages - 1),
                               record_rest + header_fixed_part +
                               (groups.elements -
                                ((page_capacity -
                                  header_fixed_part) / (7 + 1)) *
                                (chunk0_pages - 1)) * (7 + 1));
  rc|= translog_unlock();
  if (rc)
    goto err;
4751 4752 4753 4754 4755 4756

  if (chunk2_page)
  {
    DBUG_PRINT("info", ("chunk 2 to finish first page"));
    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
4757
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4758
                        "Left: %lu",
unknown's avatar
unknown committed
4759 4760
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4761 4762 4763 4764 4765 4766 4767
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));
  }
  else if (chunk3_pages)
  {
    DBUG_PRINT("info", ("chunk 3"));
    DBUG_ASSERT(full_pages == 0);
unknown's avatar
unknown committed
4768
    uchar chunk3_header[3];
unknown's avatar
unknown committed
4769
    chunk3_pages= 0;
4770 4771 4772 4773
    chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
    int2store(chunk3_header + 1, chunk3_size);
    translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
    translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
unknown's avatar
unknown committed
4774
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4775
                        "Left: %lu",
unknown's avatar
unknown committed
4776 4777
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789
                        (ulong) (parts->record_length - chunk3_size - done)));
  }
  else
  {
    DBUG_PRINT("info", ("no new_page_before_chunk0"));
    new_page_before_chunk0= 0;
  }

  for (i= 0; i < full_pages; i++)
  {
    DBUG_ASSERT(chunk2_page != 0);
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
4790
      goto err;
4791

unknown's avatar
unknown committed
4792
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4793
                        "Left: %lu",
unknown's avatar
unknown committed
4794 4795
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4796 4797 4798 4799 4800 4801 4802 4803 4804
                        (ulong) (parts->record_length - (first_page - 1) -
                                 i * log_descriptor.page_capacity_chunk_2 -
                                 done)));
  }

  if (chunk3_pages &&
      translog_write_variable_record_chunk3_page(parts,
                                                 chunk3_size,
                                                 &horizon, &cursor))
unknown's avatar
unknown committed
4805 4806
    goto err;
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4807 4808
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
4809

unknown's avatar
unknown committed
4810
  *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832
  int2store(chunk0_header + 1, short_trid);
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
  do
  {
    int limit;
    if (new_page_before_chunk0)
    {
      rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
      if (buffer_to_flush != NULL)
      {
        rc|= translog_buffer_lock(buffer_to_flush);
        translog_buffer_decrease_writers(buffer_to_flush);
        if (!rc)
          rc= translog_buffer_flush(buffer_to_flush);
        rc|= translog_buffer_unlock(buffer_to_flush);
        buffer_to_flush= NULL;
      }
      if (rc)
      {
        UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4833
        goto err;
4834 4835 4836 4837 4838 4839
      }
    }
    new_page_before_chunk0= 1;

    if (first_chunk0)
    {
unknown's avatar
unknown committed
4840
      first_chunk0= 0;
4841 4842
      *lsn= horizon;
      if (log_record_type_descriptor[type].inwrite_hook &&
unknown's avatar
unknown committed
4843 4844
          (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
                                                            tbl_info,
unknown's avatar
unknown committed
4845
                                                            lsn, hook_arg))
unknown's avatar
unknown committed
4846
        goto err;
4847 4848 4849 4850 4851 4852 4853 4854 4855
    }

    /*
       A first non-full page will hold type 0 chunk only if it fit in it with
       all its headers => the fist page is full or number of groups less then
       possible number of full page.
    */
    limit= (groups_per_page < groups.elements - curr_group ?
            groups_per_page : groups.elements - curr_group);
unknown's avatar
unknown committed
4856
    DBUG_PRINT("info", ("Groups: %u  curr: %u  limit: %u",
4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878
                        (uint) groups.elements, (uint) curr_group,
                        (uint) limit));

    if (chunk0_pages == 1)
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
                          (uint) limit, (uint) record_rest,
                          (uint) (2 + limit * (7 + 1) + record_rest)));
      int2store(chunk0_header + header_length - 2,
                2 + limit * (7 + 1) + record_rest);
    }
    else
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
                          (uint) limit, (uint) (2 + limit * (7 + 1))));
      int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
    }
    int2store(chunk0_header + header_length, groups.elements - curr_group);
    translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
                                chunk0_header);
    for (i= curr_group; i < limit + curr_group; i++)
    {
unknown's avatar
unknown committed
4879 4880 4881 4882 4883
      struct st_translog_group_descriptor *grp_ptr;
      grp_ptr= dynamic_element(&groups, i,
                               struct st_translog_group_descriptor *);
      lsn_store(group_desc, grp_ptr->addr);
      group_desc[7]= grp_ptr->num;
4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899
      translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
    }

    if (chunk0_pages == 1 && record_rest != 0)
      translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);

    chunk0_pages--;
    curr_group+= limit;

  } while (chunk0_pages != 0);
  rc= translog_buffer_lock(cursor.buffer);
  if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
    cursor.buffer->last_lsn= *lsn;
  translog_buffer_decrease_writers(cursor.buffer);
  rc|= translog_buffer_unlock(cursor.buffer);

4900 4901 4902 4903 4904 4905
  if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
                                 *lsn, FALSE))
    goto err;
  translog_mark_file_finished(file_of_the_first_group);


4906 4907
  delete_dynamic(&groups);
  DBUG_RETURN(rc);
unknown's avatar
unknown committed
4908 4909 4910 4911 4912 4913

err_unlock:
  translog_unlock();
err:
  delete_dynamic(&groups);
  DBUG_RETURN(1);
4914 4915 4916
}


unknown's avatar
unknown committed
4917 4918
/**
   @brief Write the variable length log record.
4919

unknown's avatar
unknown committed
4920 4921 4922 4923 4924 4925 4926 4927
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4928

unknown's avatar
unknown committed
4929 4930 4931
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4932 4933 4934 4935
*/

static my_bool translog_write_variable_record(LSN *lsn,
                                              enum translog_record_type type,
unknown's avatar
unknown committed
4936
                                              MARIA_HA *tbl_info,
4937 4938
                                              SHORT_TRANSACTION_ID short_trid,
                                              struct st_translog_parts *parts,
unknown's avatar
unknown committed
4939
                                              TRN *trn, void *hook_arg)
4940 4941 4942 4943 4944 4945
{
  struct st_translog_buffer *buffer_to_flush= NULL;
  uint header_length1= 1 + 2 + 2 +
    translog_variable_record_length_bytes(parts->record_length);
  ulong buffer_rest;
  uint page_rest;
unknown's avatar
unknown committed
4946
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
4947
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
4948
    COMPRESSED_LSN_MAX_STORE_SIZE];
unknown's avatar
unknown committed
4949
  my_bool res;
4950 4951 4952
  DBUG_ENTER("translog_write_variable_record");

  translog_lock();
unknown's avatar
unknown committed
4953
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
4954
                      LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
4955 4956
  page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  DBUG_PRINT("info", ("header length: %u  page_rest: %u",
4957 4958 4959
                      header_length1, page_rest));

  /*
4960 4961
    header and part which we should read have to fit in one chunk
    TODO: allow to divide readable header
4962 4963 4964 4965 4966
  */
  if (page_rest <
      (header_length1 + log_record_type_descriptor[type].read_header_len))
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
4967 4968
               ("Next page, size: %u  header: %u + %u",
                log_descriptor.bc.current_page_fill,
4969 4970 4971 4972
                header_length1,
                log_record_type_descriptor[type].read_header_len));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
unknown's avatar
unknown committed
4973
    /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
4974 4975 4976 4977 4978 4979 4980 4981
    page_rest= log_descriptor.page_capacity_chunk_2 + 1;
    DBUG_PRINT("info", ("page_rest: %u", page_rest));
  }

  /*
     To minimize compressed size we will compress always relative to
     very first chunk address (log_descriptor.horizon for now)
  */
unknown's avatar
unknown committed
4982
  if (log_record_type_descriptor[type].compressed_LSN > 0)
4983
  {
unknown's avatar
unknown committed
4984 4985 4986
    translog_relative_LSN_encode(parts, log_descriptor.horizon,
                                 log_record_type_descriptor[type].
                                 compressed_LSN, compressed_LSNs);
4987 4988 4989
    /* recalculate header length after compression */
    header_length1= 1 + 2 + 2 +
      translog_variable_record_length_bytes(parts->record_length);
unknown's avatar
unknown committed
4990 4991
    DBUG_PRINT("info", ("after compressing LSN(s) header length: %u  "
                        "record length: %lu",
4992
                        header_length1, (ulong)parts->record_length));
4993 4994 4995 4996 4997 4998
  }

  /* TODO: check space on current page for header + few bytes */
  if (page_rest >= parts->record_length + header_length1)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
4999
    res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
unknown's avatar
unknown committed
5000 5001
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
5002
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
5003
    DBUG_RETURN(res);
5004 5005 5006 5007 5008 5009 5010
  }

  buffer_rest= translog_get_current_group_size();

  if (buffer_rest >= parts->record_length + header_length1 - page_rest)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
5011
    res= translog_write_variable_record_1group(lsn, type, tbl_info,
unknown's avatar
unknown committed
5012 5013
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
5014
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
5015
    DBUG_RETURN(res);
5016 5017
  }
  /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
5018
  res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
unknown's avatar
unknown committed
5019 5020 5021
                                             short_trid,
                                             parts, buffer_to_flush,
                                             header_length1,
unknown's avatar
unknown committed
5022
                                             buffer_rest, trn, hook_arg);
unknown's avatar
unknown committed
5023
  DBUG_RETURN(res);
5024 5025 5026
}


unknown's avatar
unknown committed
5027 5028
/**
   @brief Write the fixed and pseudo-fixed log record.
5029

unknown's avatar
unknown committed
5030 5031 5032 5033 5034 5035 5036 5037
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
5038

unknown's avatar
unknown committed
5039 5040 5041
   @return Operation status
     @retval 0      OK
     @retval 1      Error
5042 5043 5044 5045
*/

static my_bool translog_write_fixed_record(LSN *lsn,
                                           enum translog_record_type type,
unknown's avatar
unknown committed
5046
                                           MARIA_HA *tbl_info,
5047 5048
                                           SHORT_TRANSACTION_ID short_trid,
                                           struct st_translog_parts *parts,
unknown's avatar
unknown committed
5049
                                           TRN *trn, void *hook_arg)
5050 5051
{
  struct st_translog_buffer *buffer_to_flush= NULL;
unknown's avatar
unknown committed
5052
  uchar chunk1_header[1 + 2];
unknown's avatar
unknown committed
5053
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
5054
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5055
    COMPRESSED_LSN_MAX_STORE_SIZE];
5056
  LEX_STRING *part;
5057 5058
  int rc;
  DBUG_ENTER("translog_write_fixed_record");
unknown's avatar
unknown committed
5059
  DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
5060 5061 5062
               LOGRECTYPE_FIXEDLENGTH &&
               parts->record_length ==
               log_record_type_descriptor[type].fixed_length) ||
unknown's avatar
unknown committed
5063
              (log_record_type_descriptor[type].rclass ==
5064
               LOGRECTYPE_PSEUDOFIXEDLENGTH &&
5065
               parts->record_length ==
5066 5067 5068
               log_record_type_descriptor[type].fixed_length));

  translog_lock();
unknown's avatar
unknown committed
5069
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
5070
                      LSN_IN_PARTS(log_descriptor.horizon)));
5071

unknown's avatar
unknown committed
5072
  DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
5073
  DBUG_PRINT("info",
unknown's avatar
unknown committed
5074 5075
             ("Page size: %u  record: %u  next cond: %d",
              log_descriptor.bc.current_page_fill,
5076
              (parts->record_length +
unknown's avatar
unknown committed
5077 5078
               log_record_type_descriptor[type].compressed_LSN * 2 + 3),
              ((((uint) log_descriptor.bc.current_page_fill) +
5079
                (parts->record_length +
unknown's avatar
unknown committed
5080
                 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
5081 5082
               TRANSLOG_PAGE_SIZE)));
  /*
5083 5084
     check that there is enough place on current page.
     NOTE: compressing may increase page LSN size on two bytes for every LSN
5085
  */
unknown's avatar
unknown committed
5086
  if ((((uint) log_descriptor.bc.current_page_fill) +
5087
       (parts->record_length +
unknown's avatar
unknown committed
5088
        log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
5089 5090 5091 5092 5093 5094 5095 5096
      TRANSLOG_PAGE_SIZE)
  {
    DBUG_PRINT("info", ("Next page"));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
  }

  *lsn= log_descriptor.horizon;
5097 5098 5099 5100
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info,
unknown's avatar
unknown committed
5101
                                                         lsn, hook_arg)))
5102
  {
unknown's avatar
unknown committed
5103 5104
    rc= 1;
    goto err;
5105 5106 5107
  }

  /* compress LSNs */
unknown's avatar
unknown committed
5108 5109
  if (log_record_type_descriptor[type].rclass ==
      LOGRECTYPE_PSEUDOFIXEDLENGTH)
5110
  {
unknown's avatar
unknown committed
5111
    DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
unknown's avatar
unknown committed
5112 5113 5114
    translog_relative_LSN_encode(parts, *lsn,
                                 log_record_type_descriptor[type].
                                 compressed_LSN, compressed_LSNs);
5115 5116 5117
  }

  /*
unknown's avatar
unknown committed
5118 5119
    Write the whole record at once (we know that there is enough place on
    the destination page)
5120
  */
unknown's avatar
unknown committed
5121
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
5122 5123 5124
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk1_header;
unknown's avatar
unknown committed
5125
  *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
5126 5127 5128 5129 5130 5131 5132
  int2store(chunk1_header + 1, short_trid);

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);

  log_descriptor.bc.buffer->last_lsn= *lsn;
unknown's avatar
unknown committed
5133

5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151
err:
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162
/**
   @brief Writes the log record

   If share has no 2-byte-id yet, gives an id to the share and logs
   LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
   yet, logs it.

   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
unknown's avatar
unknown committed
5163
   @param  tbl_info        MARIA_HA of table or NULL
5164 5165 5166 5167 5168 5169
   @param  rec_len         record length or 0 (count it)
   @param  part_no         number of parts or 0 (count it)
   @param  parts_data      zero ended (in case of number of parts is 0)
                           array of LEX_STRINGs (parts), first
                           TRANSLOG_INTERNAL_PARTS positions in the log
                           should be unused (need for loghandler)
unknown's avatar
unknown committed
5170 5171 5172
   @param  store_share_id  if tbl_info!=NULL then share's id will
                           automatically be stored in the two first bytes
                           pointed (so pointer is assumed to be !=NULL)
unknown's avatar
unknown committed
5173 5174 5175
   @param  hook_arg        argument which will be passed to pre-write and
                           in-write hooks of this record.

5176 5177 5178
   @return Operation status
     @retval 0      OK
     @retval 1      Error
5179 5180 5181 5182
*/

my_bool translog_write_record(LSN *lsn,
                              enum translog_record_type type,
unknown's avatar
unknown committed
5183
                              TRN *trn, MARIA_HA *tbl_info,
5184 5185
                              translog_size_t rec_len,
                              uint part_no,
5186
                              LEX_STRING *parts_data,
unknown's avatar
unknown committed
5187 5188
                              uchar *store_share_id,
                              void *hook_arg)
5189 5190
{
  struct st_translog_parts parts;
5191
  LEX_STRING *part;
5192
  int rc;
5193
  uint short_trid= trn->short_id;
5194
  DBUG_ENTER("translog_write_record");
unknown's avatar
unknown committed
5195 5196
  DBUG_PRINT("enter", ("type: %u  ShortTrID: %u  rec_len: %lu",
                       (uint) type, (uint) short_trid, (ulong) rec_len));
unknown's avatar
unknown committed
5197
  DBUG_ASSERT(translog_inited == 1);
5198

unknown's avatar
unknown committed
5199
  if (tbl_info)
5200
  {
unknown's avatar
unknown committed
5201
    MARIA_SHARE *share= tbl_info->s;
unknown's avatar
unknown committed
5202
    DBUG_ASSERT(share->now_transactional);
5203 5204 5205 5206 5207 5208 5209 5210 5211
    if (unlikely(share->id == 0))
    {
      /*
        First log write for this MARIA_SHARE; give it a short id.
        When the lock manager is enabled and needs a short id, it should be
        assigned in the lock manager (because row locks will be taken before
        log records are written; for example SELECT FOR UPDATE takes locks but
        writes no log record.
      */
unknown's avatar
unknown committed
5212
      if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
5213 5214 5215 5216 5217 5218
        DBUG_RETURN(1);
    }
    fileid_store(store_share_id, share->id);
  }
  if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
  {
unknown's avatar
unknown committed
5219
    LSN dummy_lsn;
5220 5221 5222 5223 5224 5225
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
    uchar log_data[6];
    int6store(log_data, trn->trid);
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
unknown's avatar
unknown committed
5226
    if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
5227 5228
                                       trn, NULL, sizeof(log_data),
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
5229
                                       log_array, NULL, NULL)))
5230
      DBUG_RETURN(1);
5231
  }
unknown's avatar
unknown committed
5232

5233
  parts.parts= parts_data;
5234

5235 5236
  /* count parts if they are not counted by upper level */
  if (part_no == 0)
unknown's avatar
unknown committed
5237
  {
5238 5239 5240
    for (part_no= TRANSLOG_INTERNAL_PARTS;
         parts_data[part_no].length != 0;
         part_no++);
unknown's avatar
unknown committed
5241
  }
5242 5243
  parts.elements= part_no;
  parts.current= TRANSLOG_INTERNAL_PARTS;
5244

5245
  /* clear TRANSLOG_INTERNAL_PARTS */
unknown's avatar
unknown committed
5246
  compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
5247 5248 5249 5250 5251
  parts_data[0].str= 0;
  parts_data[0].length= 0;

  /* count length of the record */
  if (rec_len == 0)
unknown's avatar
unknown committed
5252
  {
5253 5254 5255
    for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
        part < parts_data + part_no;
        part++)
5256
    {
5257
      rec_len+= part->length;
5258 5259
    }
  }
5260
  parts.record_length= rec_len;
unknown's avatar
unknown committed
5261

5262 5263 5264 5265
#ifndef DBUG_OFF
  {
    uint i;
    uint len= 0;
5266
#ifdef HAVE_purify
unknown's avatar
unknown committed
5267 5268
    ha_checksum checksum= 0;
#endif
5269
    for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
unknown's avatar
unknown committed
5270
    {
5271
#ifdef HAVE_purify
unknown's avatar
unknown committed
5272 5273 5274 5275
      /* Find unitialized bytes early */
      checksum+= my_checksum(checksum, parts_data[i].str,
                             parts_data[i].length);
#endif
5276
      len+= parts_data[i].length;
unknown's avatar
unknown committed
5277
    }
5278 5279 5280
    DBUG_ASSERT(len == rec_len);
  }
#endif
unknown's avatar
unknown committed
5281 5282 5283 5284 5285
  /*
    Start total_record_length from record_length then overhead will
    be add
  */
  parts.total_record_length= parts.record_length;
unknown's avatar
unknown committed
5286
  DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
5287 5288 5289

  /* process this parts */
  if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
5290
             (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
unknown's avatar
unknown committed
5291
                                                                tbl_info,
unknown's avatar
unknown committed
5292
                                                                hook_arg))))
5293
  {
unknown's avatar
unknown committed
5294
    switch (log_record_type_descriptor[type].rclass) {
5295
    case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
5296
      rc= translog_write_variable_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5297
                                         short_trid, &parts, trn, hook_arg);
5298 5299 5300
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
    case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
5301
      rc= translog_write_fixed_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5302
                                      short_trid, &parts, trn, hook_arg);
5303 5304 5305 5306 5307 5308 5309 5310
      break;
    case LOGRECTYPE_NOT_ALLOWED:
    default:
      DBUG_ASSERT(0);
      rc= 1;
    }
  }

unknown's avatar
unknown committed
5311
  DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(*lsn)));
5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329
  DBUG_RETURN(rc);
}


/*
  Decode compressed (relative) LSN(s)

  SYNOPSIS
   translog_relative_lsn_decode()
   base_lsn              LSN for encoding
   src                   Decode LSN(s) from here
   dst                   Put decoded LSNs here
   lsns                  number of LSN(s)

   RETURN
     position in sources after decoded LSN(s)
*/

unknown's avatar
unknown committed
5330 5331
static uchar *translog_relative_LSN_decode(LSN base_lsn,
                                          uchar *src, uchar *dst, uint lsns)
5332 5333
{
  uint i;
unknown's avatar
unknown committed
5334
  for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
5335 5336 5337 5338 5339 5340
  {
    src= translog_get_LSN_from_diff(base_lsn, src, dst);
  }
  return src;
}

5341 5342 5343
/**
   @brief Get header of fixed/pseudo length record and call hook for
   it processing
5344

5345 5346 5347 5348
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
5349

5350 5351 5352
   @return Length of header or operation status
     @retval #  number of bytes in TRANSLOG_HEADER_BUFFER::header where
                stored decoded part of the header
5353 5354
*/

5355 5356 5357
static int translog_fixed_length_header(uchar *page,
                                        translog_size_t page_offset,
                                        TRANSLOG_HEADER_BUFFER *buff)
5358 5359 5360
{
  struct st_log_record_type_descriptor *desc=
    log_record_type_descriptor + buff->type;
unknown's avatar
unknown committed
5361 5362 5363
  uchar *src= page + page_offset + 3;
  uchar *dst= buff->header;
  uchar *start= src;
unknown's avatar
unknown committed
5364
  uint lsns= desc->compressed_LSN;
5365
  uint length= desc->fixed_length;
5366 5367 5368 5369 5370

  DBUG_ENTER("translog_fixed_length_header");

  buff->record_length= length;

unknown's avatar
unknown committed
5371
  if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
5372 5373
  {
    DBUG_ASSERT(lsns > 0);
5374
    src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
unknown's avatar
unknown committed
5375
    lsns*= LSN_STORE_SIZE;
5376 5377
    dst+= lsns;
    length-= lsns;
5378
    buff->compressed_LSN_economy= (lsns - (src - start));
5379 5380 5381 5382
  }
  else
    buff->compressed_LSN_economy= 0;

unknown's avatar
unknown committed
5383
  memcpy(dst, src, length);
5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400
  buff->non_header_data_start_offset= page_offset +
    ((src + length) - (page + page_offset));
  buff->non_header_data_len= 0;
  DBUG_RETURN(buff->record_length);
}


/*
  Free resources used by TRANSLOG_HEADER_BUFFER

  SYNOPSIS
    translog_free_record_header();
*/

void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
{
  DBUG_ENTER("translog_free_record_header");
unknown's avatar
unknown committed
5401
  DBUG_ASSERT(translog_inited == 1);
5402 5403
  if (buff->groups_no != 0)
  {
unknown's avatar
unknown committed
5404
    my_free((uchar*) buff->groups, MYF(0));
5405 5406 5407 5408 5409 5410
    buff->groups_no= 0;
  }
  DBUG_VOID_RETURN;
}


5411 5412
/**
   @brief Returns the current horizon at the end of the current log
5413

5414
   @return Horizon
5415 5416
*/

5417
TRANSLOG_ADDRESS translog_get_horizon()
5418
{
5419
  TRANSLOG_ADDRESS res;
unknown's avatar
unknown committed
5420
  DBUG_ASSERT(translog_inited == 1);
5421
  translog_lock();
5422
  res= log_descriptor.horizon;
5423
  translog_unlock();
5424
  return res;
5425 5426 5427
}


unknown's avatar
unknown committed
5428 5429 5430 5431 5432 5433 5434 5435 5436
/**
   @brief Returns the current horizon at the end of the current log, caller is
   assumed to already hold the lock

   @return Horizon
*/

TRANSLOG_ADDRESS translog_get_horizon_no_lock()
{
unknown's avatar
unknown committed
5437
  DBUG_ASSERT(translog_inited == 1);
unknown's avatar
unknown committed
5438 5439 5440 5441 5442
  translog_lock_assert_owner();
  return log_descriptor.horizon;
}


5443 5444 5445 5446 5447 5448 5449 5450
/*
  Set last page in the scanner data structure

  SYNOPSIS
    translog_scanner_set_last_page()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5451 5452
    0  OK
    1  Error
5453 5454
*/

unknown's avatar
unknown committed
5455
static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
5456 5457
{
  my_bool page_ok;
5458 5459 5460 5461 5462 5463 5464 5465
  if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
  {
    /* It is last file => we can easy find last page address by horizon */
    uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
    scanner->last_file_page= (scanner->horizon -
                              (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
    return (0);
  }
5466
  scanner->last_file_page= scanner->page_addr;
unknown's avatar
unknown committed
5467
  return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok));
5468 5469 5470
}


5471 5472
/**
  @brief Get page from page cache according to requested method
5473

5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501
  @param scanner         The scanner data

  @return operation status
  @retval 0 OK
  @retval 1 Error
*/

static my_bool
translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
{
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_scanner_get_page");
  data.addr= &scanner->page_addr;
  data.was_recovered= 0;
  DBUG_RETURN((scanner->page=
               translog_get_page(&data, scanner->buffer,
                                 (scanner->use_direct_link ?
                                  &scanner->direct_link :
                                  NULL))) ==
               NULL);
}


/**
  @brief Initialize reader scanner.

  @param lsn             LSN with which it have to be inited
  @param fixed_horizon   true if it is OK do not read records which was written
5502
                         after scanning beginning
5503 5504 5505
  @param scanner         scanner which have to be inited
  @param use_direct      prefer using direct lings from page handler
                         where it is possible.
5506

5507 5508 5509 5510 5511 5512
  @note If direct link was used translog_destroy_scanner should be
        called after it using

  @return status of the operation
  @retval 0 OK
  @retval 1 Error
5513 5514
*/

unknown's avatar
unknown committed
5515
my_bool translog_scanner_init(LSN lsn,
unknown's avatar
unknown committed
5516
                              my_bool fixed_horizon,
5517 5518
                              TRANSLOG_SCANNER_DATA *scanner,
                              my_bool use_direct)
unknown's avatar
unknown committed
5519 5520
{
  TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
5521
  DBUG_ENTER("translog_scanner_init");
5522 5523
  DBUG_PRINT("enter", ("Scanner: 0x%lx  LSN: (0x%lu,0x%lx)",
                       (ulong) scanner, LSN_IN_PARTS(lsn)));
unknown's avatar
unknown committed
5524
  DBUG_ASSERT(translog_inited == 1);
unknown's avatar
unknown committed
5525 5526 5527 5528

  data.addr= &scanner->page_addr;
  data.was_recovered= 0;

5529
  scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
5530 5531

  scanner->fixed_horizon= fixed_horizon;
5532 5533
  scanner->use_direct_link= use_direct;
  scanner->direct_link= NULL;
5534

5535
  scanner->horizon= translog_get_horizon();
unknown's avatar
unknown committed
5536
  DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)",
unknown's avatar
unknown committed
5537
                      LSN_IN_PARTS(scanner->horizon)));
5538 5539

  /* lsn < horizon */
5540
  DBUG_ASSERT(lsn < scanner->horizon);
5541

5542 5543
  scanner->page_addr= lsn;
  scanner->page_addr-= scanner->page_offset; /*decrease offset */
5544 5545 5546 5547

  if (translog_scanner_set_last_page(scanner))
    DBUG_RETURN(1);

5548
  if (translog_scanner_get_page(scanner))
5549 5550 5551 5552 5553
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


5554 5555 5556 5557 5558 5559 5560 5561
/**
  @brief Destroy scanner object;

  @param scanner         The scanner object to destroy
*/

void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
{
5562 5563
  DBUG_ENTER("translog_destroy_scanner");
  DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
5564
  translog_free_link(scanner->direct_link);
5565
  DBUG_VOID_RETURN;
5566 5567 5568
}


5569 5570 5571 5572 5573 5574 5575 5576
/*
  Checks End of the Log

  SYNOPSIS
    translog_scanner_eol()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5577 5578
    1  End of the Log
    0  OK
5579
*/
5580

unknown's avatar
unknown committed
5581
static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
5582 5583 5584
{
  DBUG_ENTER("translog_scanner_eol");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
5585
             ("Horizon: (%lu, 0x%lx)  Current: (%lu, 0x%lx+0x%x=0x%lx)",
unknown's avatar
unknown committed
5586 5587
              LSN_IN_PARTS(scanner->horizon),
              LSN_IN_PARTS(scanner->page_addr),
5588
              (uint) scanner->page_offset,
5589 5590 5591
              (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
  if (scanner->horizon > (scanner->page_addr +
                          scanner->page_offset))
5592 5593 5594 5595 5596 5597 5598 5599 5600
  {
    DBUG_PRINT("info", ("Horizon is not reached"));
    DBUG_RETURN(0);
  }
  if (scanner->fixed_horizon)
  {
    DBUG_PRINT("info", ("Horizon is fixed and reached"));
    DBUG_RETURN(1);
  }
5601
  scanner->horizon= translog_get_horizon();
5602 5603
  DBUG_PRINT("info",
             ("Horizon is re-read, EOL: %d",
5604 5605 5606 5607
              scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset)));
  DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset));
5608 5609 5610
}


unknown's avatar
unknown committed
5611 5612
/**
  @brief Cheks End of the Page
5613

unknown's avatar
unknown committed
5614
  @param scanner         Information about current chunk during scanning
5615

unknown's avatar
unknown committed
5616 5617
  @retval 1  End of the Page
  @retval 0  OK
5618
*/
5619

unknown's avatar
unknown committed
5620
static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
5621 5622 5623
{
  DBUG_ENTER("translog_scanner_eop");
  DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
unknown's avatar
unknown committed
5624
              scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
5625 5626 5627
}


unknown's avatar
unknown committed
5628 5629 5630
/**
  @brief Checks End of the File (i.e. we are scanning last page, which do not
    mean end of this page)
5631

unknown's avatar
unknown committed
5632
  @param scanner         Information about current chunk during scanning
5633

unknown's avatar
unknown committed
5634 5635
  @retval 1 End of the File
  @retval 0 OK
5636
*/
5637

unknown's avatar
unknown committed
5638
static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
5639 5640
{
  DBUG_ENTER("translog_scanner_eof");
5641 5642
  DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
              LSN_FILE_NO(scanner->last_file_page));
unknown's avatar
unknown committed
5643 5644
  DBUG_PRINT("enter", ("curr Page: 0x%lx  last page: 0x%lx  "
                       "normal EOF: %d",
5645 5646 5647 5648
                       (ulong) LSN_OFFSET(scanner->page_addr),
                       (ulong) LSN_OFFSET(scanner->last_file_page),
                       LSN_OFFSET(scanner->page_addr) ==
                       LSN_OFFSET(scanner->last_file_page)));
5649 5650 5651 5652
  /*
     TODO: detect damaged file EOF,
     TODO: issue warning if damaged file EOF detected
  */
5653 5654
  DBUG_RETURN(scanner->page_addr ==
              scanner->last_file_page);
5655 5656 5657 5658 5659 5660 5661 5662 5663 5664
}

/*
  Move scanner to the next chunk

  SYNOPSIS
    translog_get_next_chunk()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5665 5666
    0  OK
    1  Error
5667 5668
*/

unknown's avatar
unknown committed
5669 5670
static my_bool
translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
5671
{
unknown's avatar
unknown committed
5672
  uint16 len;
5673
  DBUG_ENTER("translog_get_next_chunk");
unknown's avatar
unknown committed
5674

5675 5676 5677 5678
  if (translog_scanner_eop(scanner))
    len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
  else if ((len= translog_get_total_chunk_length(scanner->page,
                                                 scanner->page_offset)) == 0)
5679 5680 5681 5682 5683
    DBUG_RETURN(1);
  scanner->page_offset+= len;

  if (translog_scanner_eol(scanner))
  {
unknown's avatar
unknown committed
5684
    scanner->page= END_OF_LOG;
5685 5686 5687 5688 5689
    scanner->page_offset= 0;
    DBUG_RETURN(0);
  }
  if (translog_scanner_eop(scanner))
  {
5690 5691
    /* before reading next page we should unpin current one if it was pinned */
    translog_free_link(scanner->direct_link);
5692 5693
    if (translog_scanner_eof(scanner))
    {
unknown's avatar
unknown committed
5694
      DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  pageaddr: (%lu,0x%lx)",
unknown's avatar
unknown committed
5695 5696
                          LSN_IN_PARTS(scanner->horizon),
                          LSN_IN_PARTS(scanner->page_addr)));
5697
      /* if it is log end it have to be caught before */
5698 5699 5700 5701 5702
      DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
                  LSN_FILE_NO(scanner->page_addr));
      scanner->page_addr+= LSN_ONE_FILE;
      scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
                                             TRANSLOG_PAGE_SIZE);
5703 5704 5705 5706 5707
      if (translog_scanner_set_last_page(scanner))
        DBUG_RETURN(1);
    }
    else
    {
5708
      scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
5709
    }
unknown's avatar
unknown committed
5710

5711
    if (translog_scanner_get_page(scanner))
unknown's avatar
unknown committed
5712 5713
      DBUG_RETURN(1);

5714 5715 5716
    scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
    if (translog_scanner_eol(scanner))
    {
unknown's avatar
unknown committed
5717
      scanner->page= END_OF_LOG;
5718 5719 5720
      scanner->page_offset= 0;
      DBUG_RETURN(0);
    }
unknown's avatar
unknown committed
5721
    DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
5722 5723 5724 5725 5726
  }
  DBUG_RETURN(0);
}


5727 5728
/**
   @brief Get header of variable length record and call hook for it processing
5729

5730 5731 5732 5733 5734
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
   @param scanner         If present should be moved to the header page if
5735 5736 5737
                          it differ from LSN page

   @return                Length of header or operation status
5738
     @retval RECHEADER_READ_ERROR  error
unknown's avatar
unknown committed
5739
     @retval RECHEADER_READ_EOF    End of the log reached during the read
5740 5741 5742
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
5743 5744
*/

unknown's avatar
unknown committed
5745 5746 5747 5748
static int
translog_variable_length_header(uchar *page, translog_size_t page_offset,
                                TRANSLOG_HEADER_BUFFER *buff,
                                TRANSLOG_SCANNER_DATA *scanner)
5749
{
unknown's avatar
unknown committed
5750 5751
  struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
                                               buff->type);
unknown's avatar
unknown committed
5752 5753
  uchar *src= page + page_offset + 1 + 2;
  uchar *dst= buff->header;
5754
  LSN base_lsn;
unknown's avatar
unknown committed
5755
  uint lsns= desc->compressed_LSN;
5756
  uint16 chunk_len;
5757
  uint16 length= desc->read_header_len;
5758 5759
  uint16 buffer_length= length;
  uint16 body_len;
unknown's avatar
unknown committed
5760
  TRANSLOG_SCANNER_DATA internal_scanner;
5761 5762 5763 5764
  DBUG_ENTER("translog_variable_length_header");

  buff->record_length= translog_variable_record_1group_decode_len(&src);
  chunk_len= uint2korr(src);
unknown's avatar
unknown committed
5765
  DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  length: %u  bufflen: %u",
5766 5767 5768 5769 5770 5771 5772 5773 5774 5775
                      (ulong) buff->record_length, (uint) chunk_len,
                      (uint) length, (uint) buffer_length));
  if (chunk_len == 0)
  {
    uint16 page_rest;
    DBUG_PRINT("info", ("1 group"));
    src+= 2;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);

    base_lsn= buff->lsn;
unknown's avatar
unknown committed
5776
    body_len= min(page_rest, buff->record_length);
5777 5778 5779 5780 5781 5782 5783 5784 5785
  }
  else
  {
    uint grp_no, curr;
    uint header_to_skip;
    uint16 page_rest;

    DBUG_PRINT("info", ("multi-group"));
    grp_no= buff->groups_no= uint2korr(src + 2);
unknown's avatar
unknown committed
5786 5787 5788
    if (!(buff->groups=
          (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
                                      MYF(0))))
5789
      DBUG_RETURN(RECHEADER_READ_ERROR);
5790 5791 5792 5793 5794 5795 5796 5797 5798
    DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
    src+= (2 + 2);
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    curr= 0;
    header_to_skip= src - (page + page_offset);
    buff->chunk0_pages= 0;

    for (;;)
    {
5799
      uint i, read_length= grp_no;
5800 5801 5802

      buff->chunk0_pages++;
      if (page_rest < grp_no * (7 + 1))
5803
        read_length= page_rest / (7 + 1);
unknown's avatar
unknown committed
5804 5805
      DBUG_PRINT("info", ("Read chunk0 page#%u  read: %u  left: %u  "
                          "start from: %u",
5806 5807
                          buff->chunk0_pages, read_length, grp_no, curr));
      for (i= 0; i < read_length; i++, curr++)
5808 5809
      {
        DBUG_ASSERT(curr < buff->groups_no);
unknown's avatar
unknown committed
5810
        buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
5811
        buff->groups[curr].num= src[i * (7 + 1) + 7];
unknown's avatar
unknown committed
5812
        DBUG_PRINT("info", ("group #%u (%lu,0x%lx)  chunks: %u",
5813
                            curr,
unknown's avatar
unknown committed
5814
                            LSN_IN_PARTS(buff->groups[curr].addr),
5815 5816
                            (uint) buff->groups[curr].num));
      }
5817
      grp_no-= read_length;
5818 5819 5820 5821 5822
      if (grp_no == 0)
      {
        if (scanner)
        {
          buff->chunk0_data_addr= scanner->page_addr;
5823
          /* offset increased */
5824
          buff->chunk0_data_addr+= (page_offset + header_to_skip +
5825
                                    read_length * (7 + 1));
5826 5827 5828 5829
        }
        else
        {
          buff->chunk0_data_addr= buff->lsn;
5830
          /* offset increased */
5831
          buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
5832
        }
5833
        buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
unknown's avatar
unknown committed
5834
        DBUG_PRINT("info", ("Data address: (%lu,0x%lx)  len: %u",
unknown's avatar
unknown committed
5835
                            LSN_IN_PARTS(buff->chunk0_data_addr),
5836 5837 5838 5839 5840
                            buff->chunk0_data_len));
        break;
      }
      if (scanner == NULL)
      {
unknown's avatar
unknown committed
5841
        DBUG_PRINT("info", ("use internal scanner for header reading"));
5842
        scanner= &internal_scanner;
unknown's avatar
unknown committed
5843
        if (translog_scanner_init(buff->lsn, 1, scanner, 0))
5844
          DBUG_RETURN(RECHEADER_READ_ERROR);
5845
      }
5846
      if (translog_get_next_chunk(scanner))
unknown's avatar
unknown committed
5847 5848 5849
      {
        if (scanner == &internal_scanner)
          translog_destroy_scanner(scanner);
5850
        DBUG_RETURN(RECHEADER_READ_ERROR);
unknown's avatar
unknown committed
5851 5852 5853 5854 5855 5856 5857
      }
      if (scanner->page == END_OF_LOG)
      {
        if (scanner == &internal_scanner)
          translog_destroy_scanner(scanner);
        DBUG_RETURN(RECHEADER_READ_EOF);
      }
5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870
      page= scanner->page;
      page_offset= scanner->page_offset;
      src= page + page_offset + header_to_skip;
      chunk_len= uint2korr(src - 2 - 2);
      DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
      page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    }

    if (scanner == NULL)
    {
      DBUG_PRINT("info", ("use internal scanner"));
      scanner= &internal_scanner;
    }
5871 5872 5873 5874
    else
    {
      translog_destroy_scanner(scanner);
    }
5875
    base_lsn= buff->groups[0].addr;
unknown's avatar
unknown committed
5876
    translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
5877 5878 5879 5880 5881 5882
    /* first group chunk is always chunk type 2 */
    page= scanner->page;
    page_offset= scanner->page_offset;
    src= page + page_offset + 1;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    body_len= page_rest;
5883 5884
    if (scanner == &internal_scanner)
      translog_destroy_scanner(scanner);
5885 5886 5887
  }
  if (lsns)
  {
unknown's avatar
unknown committed
5888
    uchar *start= src;
5889
    src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
unknown's avatar
unknown committed
5890
    lsns*= LSN_STORE_SIZE;
5891 5892 5893
    dst+= lsns;
    length-= lsns;
    buff->record_length+= (buff->compressed_LSN_economy=
5894 5895
                           (lsns - (src - start)));
    DBUG_PRINT("info", ("lsns: %u  length: %u  economy: %d  new length: %lu",
unknown's avatar
unknown committed
5896
                        lsns / LSN_STORE_SIZE, (uint) length,
5897
                        (int) buff->compressed_LSN_economy,
5898 5899 5900 5901 5902 5903 5904 5905
                        (ulong) buff->record_length));
    body_len-= (src - start);
  }
  else
    buff->compressed_LSN_economy= 0;

  DBUG_ASSERT(body_len >= length);
  body_len-= length;
unknown's avatar
unknown committed
5906
  memcpy(dst, src, length);
5907 5908
  buff->non_header_data_start_offset= src + length - page;
  buff->non_header_data_len= body_len;
unknown's avatar
unknown committed
5909
  DBUG_PRINT("info", ("non_header_data_start_offset: %u  len: %u  buffer: %u",
5910 5911 5912 5913 5914 5915
                      buff->non_header_data_start_offset,
                      buff->non_header_data_len, buffer_length));
  DBUG_RETURN(buffer_length);
}


5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928
/**
   @brief Read record header from the given buffer

   @param page            page content buffer
   @param page_offset     offset of the chunk in the page
   @param buff            destination buffer
   @param scanner         If this is set the scanner will be moved to the
                          record header page (differ from LSN page in case of
                          multi-group records)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
unknown's avatar
unknown committed
5929
                                   TRANSLOG_HEADER_BUFFER::header where
5930
                                   stored decoded part of the header
5931 5932
*/

5933 5934 5935 5936
int translog_read_record_header_from_buffer(uchar *page,
                                            uint16 page_offset,
                                            TRANSLOG_HEADER_BUFFER *buff,
                                            TRANSLOG_SCANNER_DATA *scanner)
5937
{
unknown's avatar
unknown committed
5938
  translog_size_t res;
5939 5940 5941 5942 5943
  DBUG_ENTER("translog_read_record_header_from_buffer");
  DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_LSN ||
              (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_FIXED);
unknown's avatar
unknown committed
5944
  DBUG_ASSERT(translog_inited == 1);
5945 5946
  buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
  buff->short_trid= uint2korr(page + page_offset + 1);
5947
  DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)",
5948
                      (uint) buff->type, (uint)buff->short_trid,
unknown's avatar
unknown committed
5949
                      LSN_IN_PARTS(buff->lsn)));
5950
  /* Read required bytes from the header and call hook */
unknown's avatar
unknown committed
5951
  switch (log_record_type_descriptor[buff->type].rclass) {
5952
  case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
5953 5954 5955
    res= translog_variable_length_header(page, page_offset, buff,
                                         scanner);
    break;
5956 5957
  case LOGRECTYPE_PSEUDOFIXEDLENGTH:
  case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
5958 5959
    res= translog_fixed_length_header(page, page_offset, buff);
    break;
5960
  default:
unknown's avatar
unknown committed
5961
    DBUG_ASSERT(0); /* we read some junk (got no LSN) */
5962
    res= RECHEADER_READ_ERROR;
5963
  }
unknown's avatar
unknown committed
5964
  DBUG_RETURN(res);
5965 5966 5967
}


5968 5969 5970
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
5971

5972 5973
   @param lsn             log record serial number (address of the record)
   @param buff            log record header buffer
5974

5975 5976 5977 5978 5979 5980 5981 5982 5983 5984
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
5985 5986
*/

5987
int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
5988
{
unknown's avatar
unknown committed
5989
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
5990
  translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
unknown's avatar
unknown committed
5991
  PAGECACHE_BLOCK_LINK *direct_link;
unknown's avatar
unknown committed
5992 5993
  TRANSLOG_ADDRESS addr;
  TRANSLOG_VALIDATOR_DATA data;
5994
  DBUG_ENTER("translog_read_record_header");
unknown's avatar
unknown committed
5995
  DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", LSN_IN_PARTS(lsn)));
5996
  DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
unknown's avatar
unknown committed
5997
  DBUG_ASSERT(translog_inited == 1);
5998

5999
  buff->lsn= lsn;
6000
  buff->groups_no= 0;
unknown's avatar
unknown committed
6001 6002 6003 6004
  data.addr= &addr;
  data.was_recovered= 0;
  addr= lsn;
  addr-= page_offset; /* offset decreasing */
6005 6006
  res= (!(page= translog_get_page(&data, buffer, &direct_link))) ?
    RECHEADER_READ_ERROR :
unknown's avatar
unknown committed
6007
    translog_read_record_header_from_buffer(page, page_offset, buff, 0);
6008
  translog_free_link(direct_link);
unknown's avatar
unknown committed
6009
  DBUG_RETURN(res);
6010 6011 6012
}


6013 6014 6015
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
6016

6017 6018 6019
   @param scan            scanner position to read
   @param buff            log record header buffer
   @param move_scanner    request to move scanner to the header position
6020

6021 6022 6023 6024 6025 6026 6027 6028 6029 6030
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where stored
                                   decoded part of the header
6031 6032
*/

6033 6034 6035
int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff,
                                     my_bool move_scanner)
6036
{
unknown's avatar
unknown committed
6037
  translog_size_t res;
6038
  DBUG_ENTER("translog_read_record_header_scan");
unknown's avatar
unknown committed
6039 6040
  DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                       "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed %d",
unknown's avatar
unknown committed
6041 6042 6043
                       LSN_IN_PARTS(scanner->page_addr),
                       LSN_IN_PARTS(scanner->horizon),
                       LSN_IN_PARTS(scanner->last_file_page),
6044 6045
                       (uint) scanner->page_offset,
                       (uint) scanner->page_offset, scanner->fixed_horizon));
unknown's avatar
unknown committed
6046
  DBUG_ASSERT(translog_inited == 1);
6047 6048
  buff->groups_no= 0;
  buff->lsn= scanner->page_addr;
6049
  buff->lsn+= scanner->page_offset; /* offset increasing */
unknown's avatar
unknown committed
6050 6051 6052 6053 6054 6055
  res= translog_read_record_header_from_buffer(scanner->page,
                                               scanner->page_offset,
                                               buff,
                                               (move_scanner ?
                                                scanner : 0));
  DBUG_RETURN(res);
6056 6057 6058
}


6059 6060 6061
/**
   @brief Read record header and some fixed part of the next record (the part
   depend on record type).
6062

6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074
   @param scanner         data for scanning if lsn is NULL scanner data
                          will be used for continue scanning.
                          The scanner can be NULL.

   @param buff            log record header buffer

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval RECHEADER_READ_EOF    EOF
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
6075
*/
6076

6077 6078
int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff)
6079 6080
{
  uint8 chunk_type;
unknown's avatar
unknown committed
6081
  translog_size_t res;
6082
  buff->groups_no= 0;        /* to be sure that we will free it right */
6083 6084 6085

  DBUG_ENTER("translog_read_next_record_header");
  DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
unknown's avatar
unknown committed
6086 6087
  DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
unknown's avatar
unknown committed
6088 6089 6090
                      LSN_IN_PARTS(scanner->page_addr),
                      LSN_IN_PARTS(scanner->horizon),
                      LSN_IN_PARTS(scanner->last_file_page),
6091 6092
                      (uint) scanner->page_offset,
                      (uint) scanner->page_offset, scanner->fixed_horizon));
unknown's avatar
unknown committed
6093
  DBUG_ASSERT(translog_inited == 1);
6094 6095 6096 6097

  do
  {
    if (translog_get_next_chunk(scanner))
6098
      DBUG_RETURN(RECHEADER_READ_ERROR);
unknown's avatar
unknown committed
6099 6100 6101 6102 6103 6104 6105
    if (scanner->page == END_OF_LOG)
    {
       DBUG_PRINT("info", ("End of file from the scanner"));
       /* Last record was read */
       buff->lsn= LSN_IMPOSSIBLE;
       DBUG_RETURN(RECHEADER_READ_EOF);
    }
6106
    chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
unknown's avatar
unknown committed
6107 6108 6109 6110
    DBUG_PRINT("info", ("Page: (%lu,0x%lx)  offset: %lu  type: %x  byte: %x",
                        LSN_IN_PARTS(scanner->page_addr),
                        (ulong) scanner->page_offset,
                        (uint) chunk_type,
6111
                        (uint) scanner->page[scanner->page_offset]));
unknown's avatar
unknown committed
6112 6113 6114
  } while (chunk_type != TRANSLOG_CHUNK_LSN &&
           chunk_type != TRANSLOG_CHUNK_FIXED &&
           scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6115

unknown's avatar
unknown committed
6116
  if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
6117
  {
unknown's avatar
unknown committed
6118
    DBUG_PRINT("info", ("End of file"));
6119
    /* Last record was read */
unknown's avatar
unknown committed
6120
    buff->lsn= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
6121
    /* Return 'end of log' marker */
6122
    res= RECHEADER_READ_EOF;
6123
  }
unknown's avatar
unknown committed
6124 6125 6126
  else
    res= translog_read_record_header_scan(scanner, buff, 0);
  DBUG_RETURN(res);
6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138
}


/*
  Moves record data reader to the next chunk and fill the data reader
  information about that chunk.

  SYNOPSIS
    translog_record_read_next_chunk()
    data                 data cursor

  RETURN
unknown's avatar
unknown committed
6139 6140
    0  OK
    1  Error
6141
*/
6142

unknown's avatar
unknown committed
6143
static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162
{
  translog_size_t new_current_offset= data->current_offset + data->chunk_size;
  uint16 chunk_header_len, chunk_len;
  uint8 type;
  DBUG_ENTER("translog_record_read_next_chunk");

  if (data->eor)
  {
    DBUG_PRINT("info", ("end of the record flag set"));
    DBUG_RETURN(1);
  }

  if (data->header.groups_no &&
      data->header.groups_no - 1 != data->current_group &&
      data->header.groups[data->current_group].num == data->current_chunk)
  {
    /* Goto next group */
    data->current_group++;
    data->current_chunk= 0;
unknown's avatar
unknown committed
6163
    DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
6164
    translog_destroy_scanner(&data->scanner);
unknown's avatar
unknown committed
6165
    translog_scanner_init(data->header.groups[data->current_group].addr,
6166
                          1, &data->scanner, 1);
6167 6168 6169 6170 6171 6172
  }
  else
  {
    data->current_chunk++;
    if (translog_get_next_chunk(&data->scanner))
      DBUG_RETURN(1);
unknown's avatar
unknown committed
6173 6174 6175
     if (data->scanner.page == END_OF_LOG)
     {
       /*
unknown's avatar
unknown committed
6176 6177
         Actually it should not happened, but we want to quit nicely in case
         of a truncated log
unknown's avatar
unknown committed
6178 6179 6180
       */
       DBUG_RETURN(1);
     }
6181 6182 6183 6184 6185 6186
  }
  type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;

  if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6187
               ("Last chunk: data len: %u  offset: %u  group: %u of %u",
6188 6189 6190
                data->header.chunk0_data_len, data->scanner.page_offset,
                data->current_group, data->header.groups_no - 1));
    DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
6191 6192
    DBUG_ASSERT(data->header.lsn ==
                data->scanner.page_addr + data->scanner.page_offset);
6193
    translog_destroy_scanner(&data->scanner);
unknown's avatar
unknown committed
6194
    translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208
    data->chunk_size= data->header.chunk0_data_len;
    data->body_offset= data->scanner.page_offset;
    data->current_offset= new_current_offset;
    data->eor= 1;
    DBUG_RETURN(0);
  }

  if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
  {
    data->eor= 1;
    DBUG_RETURN(1);                             /* End of record */
  }

  chunk_header_len=
unknown's avatar
unknown committed
6209
    translog_get_chunk_header_length(data->scanner.page +
6210 6211 6212 6213 6214 6215
                                     data->scanner.page_offset);
  chunk_len= translog_get_total_chunk_length(data->scanner.page,
                                             data->scanner.page_offset);
  data->chunk_size= chunk_len - chunk_header_len;
  data->body_offset= data->scanner.page_offset + chunk_header_len;
  data->current_offset= new_current_offset;
unknown's avatar
unknown committed
6216 6217
  DBUG_PRINT("info", ("grp: %u  chunk: %u  body_offset: %u  chunk_size: %u  "
                      "current_offset: %lu",
6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234
                      (uint) data->current_group,
                      (uint) data->current_chunk,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


/*
  Initialize record reader data from LSN

  SYNOPSIS
    translog_init_reader_data()
    lsn                  reference to LSN we should start from
    data                 reader data to initialize

  RETURN
unknown's avatar
unknown committed
6235 6236
    0  OK
    1  Error
6237 6238
*/

6239
static my_bool translog_init_reader_data(LSN lsn,
unknown's avatar
unknown committed
6240
                                         TRANSLOG_READER_DATA *data)
6241
{
unknown's avatar
unknown committed
6242
  int read_header;
6243
  DBUG_ENTER("translog_init_reader_data");
unknown's avatar
unknown committed
6244
  if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
unknown's avatar
unknown committed
6245 6246 6247
      ((read_header=
        translog_read_record_header_scan(&data->scanner, &data->header, 1))
       == RECHEADER_READ_ERROR))
6248
    DBUG_RETURN(1);
unknown's avatar
unknown committed
6249
  data->read_header= read_header;
6250 6251 6252 6253 6254 6255
  data->body_offset= data->header.non_header_data_start_offset;
  data->chunk_size= data->header.non_header_data_len;
  data->current_offset= data->read_header;
  data->current_group= 0;
  data->current_chunk= 0;
  data->eor= 0;
unknown's avatar
unknown committed
6256 6257
  DBUG_PRINT("info", ("read_header: %u  "
                      "body_offset: %u  chunk_size: %u  current_offset: %lu",
6258 6259 6260 6261 6262 6263 6264
                      (uint) data->read_header,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


6265 6266 6267 6268
/**
  @brief Destroy reader data object
*/

unknown's avatar
unknown committed
6269
static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
6270 6271 6272 6273 6274
{
  translog_destroy_scanner(&data->scanner);
}


6275 6276 6277 6278 6279 6280
/*
  Read a part of the record.

  SYNOPSIS
    translog_read_record_header()
    lsn                  log record serial number (address of the record)
unknown's avatar
unknown committed
6281
    offset               From the beginning of the record beginning (read
6282
                         by translog_read_record_header).
unknown's avatar
unknown committed
6283 6284
    length               Length of record part which have to be read.
    buffer               Buffer where to read the record part (have to be at
6285 6286 6287 6288 6289 6290
                         least 'length' bytes length)

  RETURN
    length of data actually read
*/

6291
translog_size_t translog_read_record(LSN lsn,
6292 6293
                                     translog_size_t offset,
                                     translog_size_t length,
unknown's avatar
unknown committed
6294
                                     uchar *buffer,
unknown's avatar
unknown committed
6295
                                     TRANSLOG_READER_DATA *data)
6296 6297 6298
{
  translog_size_t requested_length= length;
  translog_size_t end= offset + length;
unknown's avatar
unknown committed
6299
  TRANSLOG_READER_DATA internal_data;
6300
  DBUG_ENTER("translog_read_record");
unknown's avatar
unknown committed
6301
  DBUG_ASSERT(translog_inited == 1);
6302 6303 6304

  if (data == NULL)
  {
unknown's avatar
unknown committed
6305
    DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
6306 6307 6308 6309 6310 6311 6312 6313 6314
    data= &internal_data;
  }
  if (lsn ||
      (offset < data->current_offset &&
       !(offset < data->read_header && offset + length < data->read_header)))
  {
    if (translog_init_reader_data(lsn, data))
      DBUG_RETURN(0);
  }
unknown's avatar
unknown committed
6315 6316 6317
  DBUG_PRINT("info", ("Offset: %lu  length: %lu  "
                      "Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
6318
                      (ulong) offset, (ulong) length,
unknown's avatar
unknown committed
6319 6320 6321
                      LSN_IN_PARTS(data->scanner.page_addr),
                      LSN_IN_PARTS(data->scanner.horizon),
                      LSN_IN_PARTS(data->scanner.last_file_page),
6322 6323 6324 6325 6326
                      (uint) data->scanner.page_offset,
                      (uint) data->scanner.page_offset,
                      data->scanner.fixed_horizon));
  if (offset < data->read_header)
  {
unknown's avatar
unknown committed
6327
    uint16 len= min(data->read_header, end) - offset;
6328
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6329
               ("enter header offset: %lu  length: %lu",
6330
                (ulong) offset, (ulong) length));
unknown's avatar
unknown committed
6331
    memcpy(buffer, data->header.header + offset, len);
6332 6333
    length-= len;
    if (length == 0)
6334 6335
    {
      translog_destroy_reader_data(data);
6336
      DBUG_RETURN(requested_length);
6337
    }
6338 6339 6340
    offset+= len;
    buffer+= len;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6341
               ("len: %u  offset: %lu   curr: %lu  length: %lu",
6342 6343 6344 6345 6346 6347
                len, (ulong) offset, (ulong) data->current_offset,
                (ulong) length));
  }
  /* TODO: find first page which we should read by offset */

  /* read the record chunk by chunk */
unknown's avatar
unknown committed
6348
  for(;;)
6349 6350 6351
  {
    uint page_end= data->current_offset + data->chunk_size;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6352 6353
               ("enter body offset: %lu  curr: %lu  "
                "length: %lu  page_end: %lu",
6354 6355 6356 6357 6358
                (ulong) offset, (ulong) data->current_offset, (ulong) length,
                (ulong) page_end));
    if (offset < page_end)
    {
      uint len= page_end - offset;
unknown's avatar
unknown committed
6359
      set_if_smaller(len, length); /* in case we read beyond record's end */
unknown's avatar
unknown committed
6360 6361
      DBUG_ASSERT(offset >= data->current_offset);
      memcpy(buffer,
6362 6363 6364 6365
              data->scanner.page + data->body_offset +
              (offset - data->current_offset), len);
      length-= len;
      if (length == 0)
6366 6367
      {
        translog_destroy_reader_data(data);
6368
        DBUG_RETURN(requested_length);
6369
      }
6370 6371 6372
      offset+= len;
      buffer+= len;
      DBUG_PRINT("info",
unknown's avatar
unknown committed
6373
                 ("len: %u  offset: %lu  curr: %lu  length: %lu",
6374 6375 6376 6377
                  len, (ulong) offset, (ulong) data->current_offset,
                  (ulong) length));
    }
    if (translog_record_read_next_chunk(data))
6378 6379
    {
      translog_destroy_reader_data(data);
6380
      DBUG_RETURN(requested_length - length);
6381
    }
unknown's avatar
unknown committed
6382
  }
6383 6384 6385 6386
}


/*
6387
  @brief Force skipping to the next buffer
6388

6389 6390
  @todo Do not copy old page content if all page protections are switched off
  (because we do not need calculate something or change old parts of the page)
6391 6392 6393 6394
*/

static void translog_force_current_buffer_to_finish()
{
unknown's avatar
unknown committed
6395
  TRANSLOG_ADDRESS new_buff_beginning;
unknown's avatar
unknown committed
6396 6397 6398 6399
  uint16 old_buffer_no= log_descriptor.bc.buffer_no;
  uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
  struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
                                          new_buffer_no);
6400
  struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
6401
  uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6402 6403
  uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  uint16 current_page_fill, write_counter, previous_offset;
6404
  DBUG_ENTER("translog_force_current_buffer_to_finish");
unknown's avatar
unknown committed
6405 6406 6407 6408
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size: %lu (%lu)  Pg: %u  left: %u",
6409 6410
                       (uint) log_descriptor.bc.buffer_no,
                       (ulong) log_descriptor.bc.buffer,
unknown's avatar
unknown committed
6411
                       LSN_IN_PARTS(log_descriptor.bc.buffer->offset),
6412 6413
                       (ulong) LSN_FILE_NO(log_descriptor.horizon),
                       (ulong) (LSN_OFFSET(log_descriptor.horizon) -
unknown's avatar
unknown committed
6414
                                log_descriptor.bc.current_page_fill),
6415 6416 6417
                       (ulong) log_descriptor.bc.buffer->size,
                       (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                buffer->buffer),
unknown's avatar
unknown committed
6418
                       (uint) log_descriptor.bc.current_page_fill,
6419
                       (uint) left));
unknown's avatar
unknown committed
6420

unknown's avatar
unknown committed
6421 6422 6423
  LINT_INIT(current_page_fill);
  new_buff_beginning= log_descriptor.bc.buffer->offset;
  new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */
unknown's avatar
unknown committed
6424

6425
  DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
6426 6427
  DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
              LSN_FILE_NO(log_descriptor.bc.buffer->offset));
unknown's avatar
unknown committed
6428
  translog_check_cursor(&log_descriptor.bc);
unknown's avatar
unknown committed
6429 6430
  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
6431 6432 6433 6434 6435
  {
    /*
       TODO: if 'left' is so small that can't hold any other record
       then do not move the page
    */
unknown's avatar
unknown committed
6436
    DBUG_PRINT("info", ("left: %u", (uint) left));
6437

6438
    /* decrease offset */
unknown's avatar
unknown committed
6439
    new_buff_beginning-= log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6440
    current_page_fill= log_descriptor.bc.current_page_fill;
6441

unknown's avatar
unknown committed
6442
    memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
6443
    log_descriptor.bc.buffer->size+= left;
unknown's avatar
unknown committed
6444
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
6445 6446 6447 6448 6449 6450 6451 6452 6453
                        "Size: %lu",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        (ulong) log_descriptor.bc.buffer->size));
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
  }
  else
  {
unknown's avatar
unknown committed
6454
    log_descriptor.bc.current_page_fill= 0;
6455 6456 6457 6458 6459
  }

  translog_buffer_lock(new_buffer);
  translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
6460 6461 6462
  write_counter= log_descriptor.bc.write_counter;
  previous_offset= log_descriptor.bc.previous_offset;
  translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
unknown's avatar
unknown committed
6463
  /* Fix buffer offset (which was incorrectly set to horizon) */
unknown's avatar
unknown committed
6464
  log_descriptor.bc.buffer->offset= new_buff_beginning;
unknown's avatar
unknown committed
6465 6466
  log_descriptor.bc.write_counter= write_counter;
  log_descriptor.bc.previous_offset= previous_offset;
6467

unknown's avatar
unknown committed
6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490
  /*
    Advances this log pointer, increases writers and let other threads to
    write to the log while we process old page content
  */
  if (left)
  {
    log_descriptor.bc.ptr+= current_page_fill;
    log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
      current_page_fill;
    new_buffer->overlay= old_buffer;
  }
  else
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  translog_buffer_increase_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

  /*
    We have to wait until all writers finish before start changing the
    pages by applying protection and copying the page content in the
    new buffer.
  */
  translog_wait_for_writers(old_buffer);

unknown's avatar
unknown committed
6491
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
6492 6493 6494 6495 6496
  {
    translog_put_sector_protection(data, &log_descriptor.bc);
    if (left)
    {
      log_descriptor.bc.write_counter++;
unknown's avatar
unknown committed
6497
      log_descriptor.bc.previous_offset= current_page_fill;
6498 6499 6500 6501 6502 6503 6504 6505 6506
    }
    else
    {
      DBUG_PRINT("info", ("drop write_counter"));
      log_descriptor.bc.write_counter= 0;
      log_descriptor.bc.previous_offset= 0;
    }
  }

unknown's avatar
unknown committed
6507
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
6508
  {
unknown's avatar
unknown committed
6509 6510 6511
    uint32 crc= translog_crc(data + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
6512 6513 6514 6515 6516 6517
    DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
    int4store(data + 3 + 3 + 1, crc);
  }

  if (left)
  {
6518
    /*
unknown's avatar
unknown committed
6519
      TODO: do not copy beginning of the page if we have no CRC or sector
6520 6521
      checks on
    */
unknown's avatar
unknown committed
6522
    memcpy(new_buffer->buffer, data, current_page_fill);
6523
  }
6524
  old_buffer->next_buffer_offset= new_buffer->offset;
6525

unknown's avatar
unknown committed
6526 6527 6528 6529
  translog_buffer_lock(new_buffer);
  translog_buffer_decrease_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

6530 6531 6532
  DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
6533

6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558
/**
   @brief Flush the log up to given LSN (included)

   @param  lsn             log record serial number up to which (inclusive)
                           the log has to be flushed

   @return Operation status
     @retval 0      OK
     @retval 1      Error

  @todo LOG: when a log write fails, we should not write to this log anymore
  (if we add more log records to this log they will be unreadable: we will hit
  the broken log record): all translog_flush() should be made to fail (because
  translog_flush() is when a a transaction wants something durable and we
  cannot make anything durable as log is corrupted). For that, a "my_bool
  st_translog_descriptor::write_error" could be set to 1 when a
  translog_write_record() or translog_flush() fails, and translog_flush()
  would test this var (and translog_write_record() could also test this var if
  it wants, though it's not absolutely needed).
  Then, either shut Maria down immediately, or switch to a new log (but if we
  get write error after write error, that would create too many logs).
  A popular open-source transactional engine intentionally crashes as soon as
  a log flush fails (we however don't want to crash the entire mysqld, but
  stopping all engine's operations immediately would make sense).
  Same applies to translog_write_record().
6559 6560

  @todo: remove serialization and make group commit.
6561 6562
*/

unknown's avatar
unknown committed
6563
my_bool translog_flush(TRANSLOG_ADDRESS lsn)
6564
{
6565
  LSN old_flushed, sent_to_disk;
6566
  TRANSLOG_ADDRESS flush_horizon;
6567
  int rc= 0;
6568 6569 6570 6571
  /* We can't have more different files then buffers */
  File file_handlers[TRANSLOG_BUFFERS_NO];
  int current_file_handler= -1;
  uint32 prev_file= 0;
6572 6573
  my_bool full_circle= 0;
  DBUG_ENTER("translog_flush");
unknown's avatar
unknown committed
6574
  DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
unknown's avatar
unknown committed
6575
  DBUG_ASSERT(translog_inited == 1);
6576
  LINT_INIT(sent_to_disk);
6577

6578
  pthread_mutex_lock(&log_descriptor.log_flush_lock);
6579
  translog_lock();
6580
  flush_horizon= LSN_IMPOSSIBLE;
6581 6582 6583
  old_flushed= log_descriptor.flushed;
  for (;;)
  {
unknown's avatar
unknown committed
6584 6585
    uint16 buffer_no= log_descriptor.bc.buffer_no;
    uint16 buffer_start= buffer_no;
6586 6587
    struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
    struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
6588
    if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
6589
    {
unknown's avatar
unknown committed
6590
      DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)",
unknown's avatar
unknown committed
6591
                          LSN_IN_PARTS(log_descriptor.flushed)));
6592 6593
      translog_unlock();
      goto out;
6594 6595
    }
    /* send to the file if it is not sent */
6596
    sent_to_disk= translog_get_sent_to_disk();
6597
    if (cmp_translog_addr(sent_to_disk, lsn) >= 0 || full_circle)
6598 6599 6600 6601 6602 6603 6604 6605 6606
      break;

    do
    {
      buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
      buffer= log_descriptor.buffers + buffer_no;
      translog_buffer_lock(buffer);
      translog_buffer_unlock(buffer_unlock);
      buffer_unlock= buffer;
unknown's avatar
unknown committed
6607
      if (buffer->file != -1)
6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618
      {
        buffer_unlock= NULL;
        if (buffer_start == buffer_no)
        {
          /* we made a circle */
          full_circle= 1;
          translog_force_current_buffer_to_finish();
        }
        break;
      }
    } while ((buffer_start != buffer_no) &&
6619
             cmp_translog_addr(log_descriptor.flushed, lsn) < 0);
6620
    if (buffer_unlock != NULL && buffer_unlock != buffer)
6621
      translog_buffer_unlock(buffer_unlock);
6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636

    if (prev_file != LSN_FILE_NO(buffer->offset))
    {
      uint cache_index;
      uint32 fn= LSN_FILE_NO(buffer->offset);
      prev_file= fn;
      if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - fn) <
          OPENED_FILES_NUM)
      {
        /* file in the cache */
        if (log_descriptor.log_file_num[cache_index] == -1)
        {
          if ((log_descriptor.log_file_num[cache_index]=
               open_logfile_by_number_no_cache(fn)) == -1)
          {
6637 6638
            /* We don't need translog_unlock() here */
            translog_buffer_unlock(buffer);
6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650
            rc= 1;
            goto out;
          }
        }
        current_file_handler++;
        file_handlers[current_file_handler]=
          log_descriptor.log_file_num[cache_index];
      }
      /* We sync file when we are closing it => do nothing if file closed */
    }
    DBUG_ASSERT(flush_horizon <= buffer->offset + buffer->size);
    flush_horizon= buffer->offset + buffer->size;
unknown's avatar
unknown committed
6651
    rc= translog_buffer_flush(buffer);
6652
    translog_buffer_unlock(buffer);
unknown's avatar
unknown committed
6653
    if (rc)
6654
      goto out;                                 /* rc is 1 */
unknown's avatar
unknown committed
6655
    translog_lock();
6656
  }
unknown's avatar
unknown committed
6657
  translog_unlock();
6658 6659

  {
6660 6661
    File *handler= file_handlers;
    File *end= file_handlers + current_file_handler;
unknown's avatar
unknown committed
6662
    for (; handler <= end; handler++)
6663
      rc|= my_sync(*handler, MYF(MY_WME));
6664
  }
6665
  log_descriptor.flushed= sent_to_disk;
6666 6667 6668 6669 6670 6671 6672 6673 6674 6675
  /*
    If we should flush (due to directory flush mode) and
    previous flush horizon was not within one page border with this one.
  */
  if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
      (LSN_FILE_NO(log_descriptor.previous_flush_horizon) !=
       LSN_FILE_NO(flush_horizon) ||
       ((LSN_OFFSET(log_descriptor.previous_flush_horizon) - 1) /
        TRANSLOG_PAGE_SIZE) !=
       ((LSN_OFFSET(flush_horizon) - 1) / TRANSLOG_PAGE_SIZE)))
6676
    rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
6677
  log_descriptor.previous_flush_horizon= flush_horizon;
6678
out:
6679
  pthread_mutex_unlock(&log_descriptor.log_flush_lock);
6680 6681
  DBUG_RETURN(rc);
}
6682 6683


6684 6685 6686 6687 6688 6689 6690
/**
   @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact

   If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
   open MARIA_SHAREs), give it one and record this assignment in the log
   (LOGREC_FILE_ID log record).

unknown's avatar
unknown committed
6691
   @param  tbl_info        table
6692 6693 6694 6695 6696 6697 6698 6699 6700
   @param  trn             calling transaction

   @return Operation status
     @retval 0      OK
     @retval 1      Error

   @note Can be called even if share already has an id (then will do nothing)
*/

unknown's avatar
unknown committed
6701
int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
6702
{
unknown's avatar
unknown committed
6703
  MARIA_SHARE *share= tbl_info->s;
6704 6705 6706 6707 6708 6709
  /*
    If you give an id to a non-BLOCK_RECORD table, you also need to release
    this id somewhere. Then you can change the assertion.
  */
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  /* re-check under mutex to avoid having 2 ids for the same share */
6710
  pthread_mutex_lock(&share->intern_lock);
6711 6712 6713
  if (likely(share->id == 0))
  {
    /* Inspired by set_short_trid() of trnman.c */
unknown's avatar
unknown committed
6714 6715
    uint i= share->kfile.file % SHARE_ID_MAX + 1;
    do
6716
    {
unknown's avatar
unknown committed
6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730
      my_atomic_rwlock_wrlock(&LOCK_id_to_share);
      for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
      {
        void *tmp= NULL;
        if (id_to_share[i] == NULL &&
            my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
        {
          share->id= (uint16)i;
          break;
        }
      }
      my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
      i= 1; /* scan the whole array */
    } while (share->id == 0);
unknown's avatar
unknown committed
6731
    DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747
    LSN lsn;
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
    uchar log_data[FILEID_STORE_SIZE];
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    /*
      open_file_name is an unresolved name (symlinks are not resolved, datadir
      is not realpath-ed, etc) which is good: the log can be moved to another
      directory and continue working.
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name;
    /**
       @todo if we had the name's length in MARIA_SHARE we could avoid this
       strlen()
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
unknown's avatar
unknown committed
6748
      strlen(share->open_file_name) + 1;
unknown's avatar
unknown committed
6749
    if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
6750 6751 6752 6753
                                       sizeof(log_data) +
                                       log_array[TRANSLOG_INTERNAL_PARTS +
                                                 1].length,
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
6754
                                       log_array, log_data, NULL)))
6755 6756
      return 1;
  }
6757
  pthread_mutex_unlock(&share->intern_lock);
6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775
  return 0;
}


/**
   @brief Recycles a MARIA_SHARE's short id.

   @param  share           table

   @note Must be called only if share has an id (i.e. id != 0)
*/

void translog_deassign_id_from_share(MARIA_SHARE *share)
{
  DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
                      (ulong)share, share->id));
  /*
    We don't need any mutex as we are called only when closing the last
unknown's avatar
unknown committed
6776 6777 6778
    instance of the table or at the end of REPAIR: no writes can be
    happening. But a Checkpoint may be reading share->id, so we require this
    mutex:
6779
  */
unknown's avatar
unknown committed
6780
  safe_mutex_assert_owner(&share->intern_lock);
6781 6782 6783
  my_atomic_rwlock_rdlock(&LOCK_id_to_share);
  my_atomic_storeptr((void **)&id_to_share[share->id], 0);
  my_atomic_rwlock_rdunlock(&LOCK_id_to_share);
unknown's avatar
unknown committed
6784
  share->id= 0;
unknown's avatar
unknown committed
6785 6786
  /* useless but safety: */
  share->lsn_of_file_id= LSN_IMPOSSIBLE;
6787
}
unknown's avatar
unknown committed
6788 6789


unknown's avatar
unknown committed
6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800
void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
                                               uint16 id)
{
  DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  DBUG_ASSERT(share->id == 0);
  DBUG_ASSERT(id_to_share[id] == NULL);
  id_to_share[share->id= id]= share;
}


6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814
/**
   @brief check if such log file exists

   @param file_no number of the file to test

   @retval 0 no such file
   @retval 1 there is file with such number
*/

my_bool translog_is_file(uint file_no)
{
  MY_STAT stat_buff;
  char path[FN_REFLEN];
  return (test(my_stat(translog_filename_by_fileno(file_no, path),
6815
                       &stat_buff, MYF(0))));
6816 6817 6818
}


unknown's avatar
unknown committed
6819
/**
6820
  @brief returns minimum log file number
unknown's avatar
unknown committed
6821

6822 6823 6824 6825 6826
  @param horizon         the end of the log
  @param is_protected    true if it is under purge_log protection

  @retval minimum file number
  @retval 0 no files found
unknown's avatar
unknown committed
6827 6828
*/

6829
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
unknown's avatar
unknown committed
6830
{
6831
  uint min_file= 0, max_file;
6832 6833
  DBUG_ENTER("translog_first_file");
  if (!is_protected)
6834
    pthread_mutex_lock(&log_descriptor.purger_lock);
6835 6836 6837 6838 6839 6840
  if (log_descriptor.min_file_number &&
      translog_is_file(log_descriptor.min_file_number))
  {
    DBUG_PRINT("info", ("cached %lu",
                        (ulong) log_descriptor.min_file_number));
    if (!is_protected)
6841
      pthread_mutex_unlock(&log_descriptor.purger_lock);
6842 6843
    DBUG_RETURN(log_descriptor.min_file_number);
  }
6844

6845 6846 6847
  max_file= LSN_FILE_NO(horizon);

  if (MAKE_LSN(1, TRANSLOG_PAGE_SIZE) >= horizon)
6848 6849
  {
    /* there is no first page yet */
6850
    DBUG_RETURN(0);
6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865
  }

  /* binary search for last file */
  while (min_file != max_file && min_file != (max_file - 1))
  {
    uint test= (min_file + max_file) / 2;
    DBUG_PRINT("info", ("min_file: %u  test: %u  max_file: %u",
                        min_file, test, max_file));
    if (test == max_file)
      test--;
    if (translog_is_file(test))
      max_file= test;
    else
      min_file= test;
  }
6866 6867
  log_descriptor.min_file_number= max_file;
  if (!is_protected)
6868
    pthread_mutex_unlock(&log_descriptor.purger_lock);
6869 6870 6871 6872
  DBUG_RETURN(max_file);
}


6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887
/**
  @brief returns the most close LSN higher the given chunk address

  @param addr the chunk address to start from
  @param horizon the horizon if it is known or LSN_IMPOSSIBLE

  @retval LSN_ERROR Error
  @retval LSN_IMPOSSIBLE no LSNs after the address
  @retval # LSN of the most close LSN higher the given chunk address
*/

LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
{
  uint chunk_type;
  TRANSLOG_SCANNER_DATA scanner;
6888
  LSN result;
6889 6890 6891 6892 6893 6894 6895 6896
  DBUG_ENTER("translog_next_LSN");

  if (horizon == LSN_IMPOSSIBLE)
    horizon= translog_get_horizon();

  if (addr == horizon)
    DBUG_RETURN(LSN_IMPOSSIBLE);

unknown's avatar
unknown committed
6897
  translog_scanner_init(addr, 0, &scanner, 1);
unknown's avatar
unknown committed
6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917
  /*
    addr can point not to a chunk beginning but page end so next
    page beginning.
  */
  if (addr % TRANSLOG_PAGE_SIZE == 0)
  {
    /*
      We are emulating the page end which cased such horizon value to
      trigger translog_scanner_eop().

      We can't just increase addr on page header overhead because it
      can be file end so we allow translog_get_next_chunk() to skip
      to the next page in correct way
    */
    scanner.page_addr-= TRANSLOG_PAGE_SIZE;
    scanner.page_offset= TRANSLOG_PAGE_SIZE;
#ifndef DBUG_OFF
    scanner.page= NULL; /* prevent using incorrect page content */
#endif
  }
6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931
  /* addr can point not to a chunk beginning but to a page end */
  if (translog_scanner_eop(&scanner))
  {
    if (translog_get_next_chunk(&scanner))
    {
      result= LSN_ERROR;
      goto out;
    }
    if (scanner.page == END_OF_LOG)
    {
      result= LSN_IMPOSSIBLE;
      goto out;
    }
  }
6932 6933 6934 6935 6936 6937

  chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
  DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                      (uint) scanner.page[scanner.page_offset]));
  while (chunk_type != TRANSLOG_CHUNK_LSN &&
         chunk_type != TRANSLOG_CHUNK_FIXED &&
unknown's avatar
unknown committed
6938
         scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
6939 6940
  {
    if (translog_get_next_chunk(&scanner))
unknown's avatar
unknown committed
6941 6942 6943 6944 6945 6946 6947 6948 6949
    {
      result= LSN_ERROR;
      goto out;
    }
    if (scanner.page == END_OF_LOG)
    {
      result= LSN_IMPOSSIBLE;
      goto out;
    }
6950 6951 6952 6953
    chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
    DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                        (uint) scanner.page[scanner.page_offset]));
  }
6954

unknown's avatar
unknown committed
6955
  if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
6956 6957 6958
    result= LSN_IMPOSSIBLE; /* reached page filler */
  else
    result= scanner.page_addr + scanner.page_offset;
unknown's avatar
unknown committed
6959
out:
6960 6961
  translog_destroy_scanner(&scanner);
  DBUG_RETURN(result);
6962 6963
}

6964

6965 6966 6967 6968
/**
   @brief returns the LSN of the first record starting in this log

   @retval LSN_ERROR Error
unknown's avatar
unknown committed
6969
   @retval LSN_IMPOSSIBLE no log or the log is empty
6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980
   @retval # LSN of the first record
*/

LSN translog_first_lsn_in_log()
{
  TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
  TRANSLOG_VALIDATOR_DATA data;
  uint file;
  uint16 chunk_offset;
  uchar *page;
  DBUG_ENTER("translog_first_lsn_in_log");
unknown's avatar
unknown committed
6981
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
unknown's avatar
unknown committed
6982
  DBUG_ASSERT(translog_inited == 1);
6983 6984 6985 6986 6987 6988

  if (!(file= translog_first_file(horizon, 0)))
  {
    /* log has no records yet */
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }
6989

6990
  addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
6991 6992
  data.addr= &addr;
  {
6993
    uchar buffer[TRANSLOG_PAGE_SIZE];
6994
    if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
6995
        (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
6996 6997
      DBUG_RETURN(LSN_ERROR);
  }
6998 6999 7000
  addr+= chunk_offset;

  DBUG_RETURN(translog_next_LSN(addr, horizon));
7001 7002 7003 7004
}


/**
7005
   @brief Returns theoretical first LSN if first log is present
7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017

   @retval LSN_ERROR Error
   @retval LSN_IMPOSSIBLE no log
   @retval # LSN of the first record
*/

LSN translog_first_theoretical_lsn()
{
  TRANSLOG_ADDRESS addr= translog_get_horizon();
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_first_theoretical_lsn");
unknown's avatar
unknown committed
7018
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
unknown's avatar
unknown committed
7019
  DBUG_ASSERT(translog_inited == 1);
7020 7021 7022 7023 7024

  if (!translog_is_file(1))
    DBUG_RETURN(LSN_IMPOSSIBLE);
  if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
  {
7025
    /* log has no records yet */
7026 7027 7028 7029 7030 7031
    DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                         log_descriptor.page_overhead));
  }

  addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
  data.addr= &addr;
7032
  if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
7033 7034 7035 7036
    DBUG_RETURN(LSN_ERROR);

  DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                       page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
unknown's avatar
unknown committed
7037
}
7038 7039 7040


/**
7041
  @brief Checks given low water mark and purge files if it is need
7042

unknown's avatar
unknown committed
7043
  @param low the last (minimum) address which is need
7044 7045 7046 7047 7048

  @retval 0 OK
  @retval 1 Error
*/

unknown's avatar
unknown committed
7049
my_bool translog_purge(TRANSLOG_ADDRESS low)
7050 7051 7052 7053 7054
{
  uint32 last_need_file= LSN_FILE_NO(low);
  TRANSLOG_ADDRESS horizon= translog_get_horizon();
  int rc= 0;
  DBUG_ENTER("translog_purge");
unknown's avatar
unknown committed
7055
  DBUG_PRINT("enter", ("low: (%lu,0x%lx)", LSN_IN_PARTS(low)));
unknown's avatar
unknown committed
7056
  DBUG_ASSERT(translog_inited == 1);
7057

7058
  pthread_mutex_lock(&log_descriptor.purger_lock);
7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076
  if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
  {
    uint32 i;
    uint32 min_file= translog_first_file(horizon, 1);
    DBUG_ASSERT(min_file != 0); /* log is already started */
    for(i= min_file; i < last_need_file && rc == 0; i++)
    {
      LSN lsn= translog_get_file_max_lsn_stored(i);
      if (lsn == LSN_IMPOSSIBLE)
        break;   /* files are still in writing */
      if (lsn == LSN_ERROR)
      {
        rc= 1;
        break;
      }
      if (cmp_translog_addr(lsn, low) >= 0)
        break;
      DBUG_PRINT("info", ("purge file %lu", (ulong) i));
7077
      if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE)
7078 7079 7080 7081 7082 7083
      {
        char path[FN_REFLEN], *file_name;
        file_name= translog_filename_by_fileno(i, path);
        rc= test(my_delete(file_name, MYF(MY_WME)));
      }
    }
7084 7085 7086 7087 7088 7089
    if (unlikely(rc == 1))
      log_descriptor.min_need_file= 0; /* impossible value */
    else
      log_descriptor.min_need_file= i;
  }

unknown's avatar
unknown committed
7090
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119
  DBUG_RETURN(rc);
}


/**
  @brief Purges files by stored min need file in case of
    "ondemend" purge type

  @note This function do real work only if it is "ondemend" purge type
    and translog_purge() was called at least once and last time without
    errors

  @retval 0 OK
  @retval 1 Error
*/

my_bool translog_purge_at_flush()
{
  uint32 i, min_file;
  int rc= 0;
  DBUG_ENTER("translog_purge_at_flush");
  DBUG_ASSERT(translog_inited == 1);

  if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
  {
    DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
    DBUG_RETURN(0);
  }

unknown's avatar
unknown committed
7120
  pthread_mutex_lock(&log_descriptor.purger_lock);
7121 7122 7123 7124

  if (unlikely(log_descriptor.min_need_file == 0))
  {
    DBUG_PRINT("info", ("No info about min need file => exit"));
unknown's avatar
unknown committed
7125
    pthread_mutex_unlock(&log_descriptor.purger_lock);
7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136
    DBUG_RETURN(0);
  }

  min_file= translog_first_file(translog_get_horizon(), 1);
  DBUG_ASSERT(min_file != 0); /* log is already started */
  for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
  {
    char path[FN_REFLEN], *file_name;
    DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
    file_name= translog_filename_by_fileno(i, path);
    rc= test(my_delete(file_name, MYF(MY_WME)));
7137 7138
  }

7139
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7140 7141
  DBUG_RETURN(rc);
}
7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168


/**
  @brief Gets min file number

  @param horizon         the end of the log

  @retval minimum file number
  @retval 0 no files found
*/

uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
{
  return translog_first_file(horizon, 0);
}


/**
  @brief Gets min file number which is needed

  @retval minimum file number
  @retval 0 unknown
*/

uint32 translog_get_first_needed_file()
{
  uint32 file_no;
unknown's avatar
unknown committed
7169
  pthread_mutex_lock(&log_descriptor.purger_lock);
7170
  file_no= log_descriptor.min_need_file;
unknown's avatar
unknown committed
7171
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197
  return file_no;
}


/**
  @brief Gets transaction log file size

  @return transaction log file size
*/

uint32 translog_get_file_size()
{
  uint32 res;
  translog_lock();
  res= log_descriptor.log_file_max_size;
  translog_unlock();
  return (res);
}


/**
  @brief Sets transaction log file size

  @return Returns actually set transaction log size
*/

unknown's avatar
unknown committed
7198
void translog_set_file_size(uint32 size)
7199
{
7200
  struct st_translog_buffer *old_buffer= NULL;
unknown's avatar
unknown committed
7201
  DBUG_ENTER("translog_set_file_size");
7202
  translog_lock();
unknown's avatar
unknown committed
7203 7204
  DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
  DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0 &&
unknown's avatar
unknown committed
7205
              size >= TRANSLOG_MIN_FILE_SIZE);
unknown's avatar
unknown committed
7206
  log_descriptor.log_file_max_size= size;
7207 7208 7209
  /* if current file longer then finish it*/
  if (LSN_OFFSET(log_descriptor.horizon) >=  log_descriptor.log_file_max_size)
  {
7210
    old_buffer= log_descriptor.bc.buffer;
7211 7212 7213 7214
    translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
    translog_buffer_unlock(old_buffer);
  }
  translog_unlock();
7215 7216 7217 7218 7219 7220
  if (old_buffer)
  {
    translog_buffer_lock(old_buffer);
    translog_buffer_flush(old_buffer);
    translog_buffer_unlock(old_buffer);
  }
unknown's avatar
unknown committed
7221
  DBUG_VOID_RETURN;
7222 7223
}