ma_loghandler.c 218 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (C) 2007 MySQL AB & Sanja Belkin

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

16
#include "maria_def.h"
unknown's avatar
unknown committed
17 18
#include "ma_blockrec.h" /* for some constants and in-write hooks */
#include "trnman.h"      /* for access to members of TRN */
19

20 21 22 23 24 25 26 27
/**
   @file
   @brief Module which writes and reads to a transaction log

   @todo LOG: in functions where the log's lock is required, a
   translog_assert_owner() could be added.
*/

unknown's avatar
unknown committed
28
/* number of opened log files in the pagecache (should be at least 2) */
29 30 31 32 33 34 35 36 37 38 39 40
#define OPENED_FILES_NUM 3

/* records buffer size (should be LOG_PAGE_SIZE * n) */
#define TRANSLOG_WRITE_BUFFER (1024*1024)
/* min chunk length */
#define TRANSLOG_MIN_CHUNK 3
/*
  Number of buffers used by loghandler

  Should be at least 4, because one thread can block up to 2 buffers in
  normal circumstances (less then half of one and full other, or just
  switched one and other), But if we met end of the file in the middle and
unknown's avatar
unknown committed
41 42
  have to switch buffer it will be 3.  + 1 buffer for flushing/writing.
  We have a bigger number here for higher concurrency.
43 44
*/
#define TRANSLOG_BUFFERS_NO 5
unknown's avatar
unknown committed
45
/* number of bytes (+ header) which can be unused on first page in sequence */
46 47
#define TRANSLOG_MINCHUNK_CONTENT 1
/* version of log file */
unknown's avatar
unknown committed
48
#define TRANSLOG_VERSION_ID 10000               /* 1.00.00 */
49

unknown's avatar
unknown committed
50 51 52
#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */

/* QQ:  For temporary debugging */
53 54 55 56 57 58 59
#define UNRECOVERABLE_ERROR(E) \
  do { \
    DBUG_PRINT("error", E); \
    printf E; \
    putchar('\n'); \
  } while(0);

60 61 62
/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
unknown's avatar
unknown committed
63

64 65 66 67 68 69
/* log write buffer descriptor */
struct st_translog_buffer
{
  LSN last_lsn;
  /* This buffer offset in the file */
  TRANSLOG_ADDRESS offset;
70 71 72 73 74
  /*
    Next buffer offset in the file (it is not always offset + size,
    in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
  */
  TRANSLOG_ADDRESS next_buffer_offset;
75 76 77 78
  /*
     How much written (or will be written when copy_to_buffer_in_progress
     become 0) to this buffer
  */
unknown's avatar
unknown committed
79 80
  translog_size_t size;
  /* File handler for this buffer */
81 82 83 84
  File file;
  /* Threads which are waiting for buffer filling/freeing */
  WQUEUE waiting_filling_buffer;
  /* Number of record which are in copy progress */
unknown's avatar
unknown committed
85
  uint copy_to_buffer_in_progress;
86 87 88 89
  /* list of waiting buffer ready threads */
  struct st_my_thread_var *waiting_flush;
  struct st_translog_buffer *overlay;
#ifndef DBUG_OFF
unknown's avatar
unknown committed
90
  uint buffer_no;
91
#endif
unknown's avatar
unknown committed
92 93
  /* lock for the buffer. Current buffer also lock the handler */
  pthread_mutex_t mutex;
94
  /* IO cache for current log */
unknown's avatar
unknown committed
95
  uchar buffer[TRANSLOG_WRITE_BUFFER];
96 97 98 99 100 101
};


struct st_buffer_cursor
{
  /* pointer on the buffer */
unknown's avatar
unknown committed
102
  uchar *ptr;
unknown's avatar
unknown committed
103 104
  /* current buffer */
  struct st_translog_buffer *buffer;
105
  /* current page fill */
unknown's avatar
unknown committed
106
  uint16 current_page_fill;
unknown's avatar
unknown committed
107
  /* how many times we finish this page to write it (for sector protection) */
108 109 110
  uint16 write_counter;
  /* previous write offset */
  uint16 previous_offset;
unknown's avatar
unknown committed
111
  /* Number of current buffer */
112 113 114 115 116 117 118 119 120
  uint8 buffer_no;
  my_bool chaser, protected;
};


struct st_translog_descriptor
{
  /* *** Parameters of the log handler *** */

unknown's avatar
unknown committed
121 122 123 124
  /* Page cache for the log reads */
  PAGECACHE *pagecache;
  /* Flags */
  uint flags;
125 126 127 128 129 130 131 132 133 134
  /* max size of one log size (for new logs creation) */
  uint32 log_file_max_size;
  /* server version */
  uint32 server_version;
  /* server ID */
  uint32 server_id;
  /* Loghandler's buffer capacity in case of chunk 2 filling */
  uint32 buffer_capacity_chunk_2;
  /* Half of the buffer capacity in case of chunk 2 filling */
  uint32 half_buffer_capacity_chunk_2;
unknown's avatar
unknown committed
135 136 137 138 139 140
  /* Page overhead calculated by flags */
  uint16 page_overhead;
  /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */
  uint16 page_capacity_chunk_2;
  /* Directory to store files */
  char directory[FN_REFLEN];
141 142 143 144

  /* *** Current state of the log handler *** */
  /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */
  File log_file_num[OPENED_FILES_NUM];
unknown's avatar
unknown committed
145
  File directory_fd;
146 147 148 149 150 151 152 153 154
  /* buffers for log writing */
  struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
  /*
     horizon - visible end of the log (here is absolute end of the log:
     position where next chunk can start
  */
  TRANSLOG_ADDRESS horizon;
  /* horizon buffer cursor */
  struct st_buffer_cursor bc;
155 156
  /* maximum LSN of the current (not finished) file */
  LSN max_lsn;
157

unknown's avatar
unknown committed
158
  /* Last flushed LSN (protected by log_flush_lock) */
159
  LSN flushed;
160
  /* Last LSN sent to the disk (but maybe not written yet) */
161
  LSN sent_to_file;
162
  /* All what is after this address is not sent to disk yet */
163
  TRANSLOG_ADDRESS in_buffers_only;
164
  pthread_mutex_t sent_to_file_lock;
165
  pthread_mutex_t log_flush_lock;
166 167 168 169 170 171 172 173 174 175

  /* Protects changing of headers of finished files (max_lsn) */
  pthread_mutex_t file_header_lock;

  /*
    Sorted array (with protection) of files where we started writing process
    and so we can't give last LSN yet
  */
  pthread_mutex_t unfinished_files_lock;
  DYNAMIC_ARRAY unfinished_files;
176 177 178 179 180 181 182

  /* Purger data: minimum file in the log (or 0 if unknown) */
  uint32 min_file_number;
  /* Protect purger from many calls and it's data */
  pthread_mutex_t purger_lock;
  /* last low water mark checked */
  LSN last_lsn_checked;
183 184 185 186
};

static struct st_translog_descriptor log_descriptor;

unknown's avatar
unknown committed
187
/* Marker for end of log */
unknown's avatar
unknown committed
188
static uchar end_of_log= 0;
189

190
my_bool translog_inited= 0;
191

192
/* chunk types */
unknown's avatar
unknown committed
193 194 195 196 197
#define TRANSLOG_CHUNK_LSN   0x00      /* 0 chunk refer as LSN (head or tail */
#define TRANSLOG_CHUNK_FIXED (1 << 6)  /* 1 (pseudo)fixed record (also LSN) */
#define TRANSLOG_CHUNK_NOHDR (2 << 6)  /* 2 no head chunk (till page end) */
#define TRANSLOG_CHUNK_LNGTH (3 << 6)  /* 3 chunk with chunk length */
#define TRANSLOG_CHUNK_TYPE  (3 << 6)  /* Mask to get chunk type */
198 199 200
#define TRANSLOG_REC_TYPE    0x3F               /* Mask to get record type */

/* compressed (relative) LSN constants */
unknown's avatar
unknown committed
201
#define TRANSLOG_CLSN_LEN_BITS 0xC0    /* Mask to get compressed LSN length */
202 203


unknown's avatar
unknown committed
204

205 206 207 208 209 210
#include <my_atomic.h>
/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
static MARIA_SHARE **id_to_share= NULL;
/* lock for id_to_share */
static my_atomic_rwlock_t LOCK_id_to_share;

211 212
static my_bool translog_page_validator(uchar *page_addr, uchar* data_ptr);

213 214 215 216 217 218
/*
  Initialize log_record_type_descriptors

  NOTE that after first public Maria release, these can NOT be changed
*/

unknown's avatar
unknown committed
219
LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
220

221 222

#ifndef DBUG_OFF
unknown's avatar
unknown committed
223 224 225 226 227 228 229 230

#define translog_buffer_lock_assert_owner(B) \
  safe_mutex_assert_owner(&B->mutex);
void translog_lock_assert_owner()
{
  translog_buffer_lock_assert_owner(log_descriptor.bc.buffer);
}

231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
/**
  @brief check the description table validity

  @param num             how many records should be filled
*/

static void check_translog_description_table(int num)
{
  int i;
  DBUG_ENTER("check_translog_description_table");
  DBUG_PRINT("enter", ("last record: %d", num));
  DBUG_ASSERT(num > 0);
  /* last is reserved for extending the table */
  DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
  DBUG_PRINT("info", ("records number: OK"));
  DBUG_PRINT("info",
             ("record type: %d  class: %d  fixed: %u  header: %u  LSNs: %u  "
              "name: %s",
              0,
              log_record_type_descriptor[0].class,
              (uint)log_record_type_descriptor[0].fixed_length,
              (uint)log_record_type_descriptor[0].read_header_len,
              (uint)log_record_type_descriptor[0].compressed_LSN,
              log_record_type_descriptor[0].name));
  DBUG_ASSERT(log_record_type_descriptor[0].class == LOGRECTYPE_NOT_ALLOWED);
  DBUG_PRINT("info", ("record type 0: OK"));
  for (i= 1; i <= num; i++)
  {
    DBUG_PRINT("info",
               ("record type: %d  class: %d  fixed: %u  header: %u  LSNs: %u  "
                "name: %s",
                i, log_record_type_descriptor[i].class,
                (uint)log_record_type_descriptor[i].fixed_length,
                (uint)log_record_type_descriptor[i].read_header_len,
                (uint)log_record_type_descriptor[i].compressed_LSN,
                log_record_type_descriptor[i].name));
    switch (log_record_type_descriptor[i].class) {
    case LOGRECTYPE_NOT_ALLOWED:
      DBUG_ASSERT(0);
      break;
    case LOGRECTYPE_VARIABLE_LENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
      DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 1) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE)) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 2) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE * 2)));
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
      break;
    case LOGRECTYPE_FIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
      break;
    default:
      DBUG_ASSERT(0);
    }
    DBUG_PRINT("info", ("record type %d: OK", i));
  }
  DBUG_PRINT("info", ("All filled records are OK"));
  for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
  {
    DBUG_ASSERT(log_record_type_descriptor[i].class == LOGRECTYPE_NOT_ALLOWED);
    DBUG_PRINT("info", ("record type %d: OK", i));
  }
  DBUG_VOID_RETURN;
}
unknown's avatar
unknown committed
305 306 307
#else
#define translog_buffer_lock_assert_owner(B)
#define translog_lock_assert_owner()
308 309
#endif

310
static LOG_DESC INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE=
unknown's avatar
unknown committed
311
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
312
 "fixed0example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
313 314

static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE=
unknown's avatar
unknown committed
315
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
316
"variable0example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
317 318

static LOG_DESC INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE=
unknown's avatar
unknown committed
319
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
320
"fixed1example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
321 322

static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE=
unknown's avatar
unknown committed
323
{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
324
"variable1example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
325 326

static LOG_DESC INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE=
unknown's avatar
unknown committed
327
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2,
unknown's avatar
unknown committed
328
"fixed2example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
329 330

static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE=
unknown's avatar
unknown committed
331
{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2,
unknown's avatar
unknown committed
332
"variable2example", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
333 334 335 336


void example_loghandler_init()
{
337
  int i;
338 339 340 341 342 343 344 345 346 347 348 349
  log_record_type_descriptor[LOGREC_FIXED_RECORD_0LSN_EXAMPLE]=
    INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE;
  log_record_type_descriptor[LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE]=
    INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE;
  log_record_type_descriptor[LOGREC_FIXED_RECORD_1LSN_EXAMPLE]=
    INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE;
  log_record_type_descriptor[LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE]=
    INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE;
  log_record_type_descriptor[LOGREC_FIXED_RECORD_2LSN_EXAMPLE]=
    INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE;
  log_record_type_descriptor[LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE]=
    INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE;
350 351 352 353 354 355
  for (i= LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE + 1;
       i < LOGREC_NUMBER_OF_TYPES;
       i++)
    log_record_type_descriptor[i].class= LOGRECTYPE_NOT_ALLOWED;
  DBUG_EXECUTE("info",
               check_translog_description_table(LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE););
356 357 358
}


359
static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
unknown's avatar
unknown committed
360
{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
361
 "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
362 363 364

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
365
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
366
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
367
 "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
368 369

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
370 371
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
372
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
373
 "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
374

unknown's avatar
unknown committed
375
/** @todo RECOVERY BUG unused, remove? */
376
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOB=
unknown's avatar
unknown committed
377
{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
378
 "redo_insert_row_blob", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
379

unknown's avatar
unknown committed
380
/** @todo RECOVERY BUG handle it in recovery */
381 382
/*QQQ:TODO:header???*/
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
unknown's avatar
unknown committed
383 384
{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
385
 "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
386 387 388 389 390

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
391
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
392
 "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
393 394 395 396 397

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
398
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
399
 "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
400 401

static LOG_DESC INIT_LOGREC_REDO_PURGE_BLOCKS=
402 403
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
unknown's avatar
unknown committed
404
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
405
 "redo_purge_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
406

unknown's avatar
unknown committed
407
/* not yet used; for when we have versioning */
408
static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
unknown's avatar
unknown committed
409
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
410
 "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
411

unknown's avatar
unknown committed
412
/** @todo RECOVERY BUG unused, remove? */
413
static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
unknown's avatar
unknown committed
414
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
415
 "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
416 417

static LOG_DESC INIT_LOGREC_REDO_INDEX=
unknown's avatar
unknown committed
418
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
419
 "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
420 421

static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
unknown's avatar
unknown committed
422
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
423
 "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
424 425

static LOG_DESC INIT_LOGREC_CLR_END=
unknown's avatar
unknown committed
426 427
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
 CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
unknown's avatar
unknown committed
428
 "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
429 430

static LOG_DESC INIT_LOGREC_PURGE_END=
unknown's avatar
unknown committed
431
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
432
 "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
433 434

static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
unknown's avatar
unknown committed
435
{LOGRECTYPE_VARIABLE_LENGTH, 0,
436
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
437
 NULL, write_hook_for_undo_row_insert, NULL, 1,
unknown's avatar
unknown committed
438
 "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
439 440 441 442

static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
443
 NULL, write_hook_for_undo_row_delete, NULL, 1,
unknown's avatar
unknown committed
444
 "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
445 446 447 448

static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
449
 NULL, write_hook_for_undo_row_update, NULL, 1,
unknown's avatar
unknown committed
450
 "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
451 452

static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
unknown's avatar
unknown committed
453
{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1,
unknown's avatar
unknown committed
454
 "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
455 456

static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
unknown's avatar
unknown committed
457 458
{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 1,
 "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
459 460

static LOG_DESC INIT_LOGREC_PREPARE=
unknown's avatar
unknown committed
461
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
462
 "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
463 464

static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
465
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
466
 "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
467 468

static LOG_DESC INIT_LOGREC_COMMIT=
unknown's avatar
unknown committed
469 470 471
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
 NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
 NULL};
472 473

static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
unknown's avatar
unknown committed
474
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
475
 "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
476

477
static LOG_DESC INIT_LOGREC_CHECKPOINT=
unknown's avatar
unknown committed
478
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
479
 "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
480 481

static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
unknown's avatar
unknown committed
482
{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
483
"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
484 485

static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
unknown's avatar
unknown committed
486
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
487
 "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
488 489

static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
490
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
491
 "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
492

493 494
static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
unknown's avatar
unknown committed
495
 NULL, write_hook_for_redo_delete_all, NULL, 0,
unknown's avatar
unknown committed
496
 "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
497 498 499

static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4, FILEID_STORE_SIZE + 4,
unknown's avatar
unknown committed
500
 NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
501
 "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
502 503

static LOG_DESC INIT_LOGREC_FILE_ID=
unknown's avatar
unknown committed
504
{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
unknown's avatar
unknown committed
505
 "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
506 507

static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
unknown's avatar
unknown committed
508
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
509
 "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
510

511
const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
512

513
static void loghandler_init()
514
{
515
  int i;
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
  log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
    INIT_LOGREC_RESERVED_FOR_CHUNKS23;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
    INIT_LOGREC_REDO_INSERT_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
    INIT_LOGREC_REDO_INSERT_ROW_TAIL;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOB]=
    INIT_LOGREC_REDO_INSERT_ROW_BLOB;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
    INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
    INIT_LOGREC_REDO_PURGE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
    INIT_LOGREC_REDO_PURGE_ROW_TAIL;
  log_record_type_descriptor[LOGREC_REDO_PURGE_BLOCKS]=
    INIT_LOGREC_REDO_PURGE_BLOCKS;
  log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
    INIT_LOGREC_REDO_DELETE_ROW;
  log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
    INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INDEX]=
    INIT_LOGREC_REDO_INDEX;
  log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
    INIT_LOGREC_REDO_UNDELETE_ROW;
  log_record_type_descriptor[LOGREC_CLR_END]=
    INIT_LOGREC_CLR_END;
  log_record_type_descriptor[LOGREC_PURGE_END]=
    INIT_LOGREC_PURGE_END;
  log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
    INIT_LOGREC_UNDO_ROW_INSERT;
  log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
    INIT_LOGREC_UNDO_ROW_DELETE;
  log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
    INIT_LOGREC_UNDO_ROW_UPDATE;
  log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
    INIT_LOGREC_UNDO_KEY_INSERT;
  log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
    INIT_LOGREC_UNDO_KEY_DELETE;
  log_record_type_descriptor[LOGREC_PREPARE]=
    INIT_LOGREC_PREPARE;
  log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
    INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
  log_record_type_descriptor[LOGREC_COMMIT]=
    INIT_LOGREC_COMMIT;
  log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
    INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
562 563
  log_record_type_descriptor[LOGREC_CHECKPOINT]=
    INIT_LOGREC_CHECKPOINT;
564 565 566 567 568 569
  log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
    INIT_LOGREC_REDO_CREATE_TABLE;
  log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
    INIT_LOGREC_REDO_RENAME_TABLE;
  log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
    INIT_LOGREC_REDO_DROP_TABLE;
570 571 572 573
  log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
    INIT_LOGREC_REDO_DELETE_ALL;
  log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
    INIT_LOGREC_REDO_REPAIR_TABLE;
574 575 576 577
  log_record_type_descriptor[LOGREC_FILE_ID]=
    INIT_LOGREC_FILE_ID;
  log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
    INIT_LOGREC_LONG_TRANSACTION_ID;
578 579 580 581 582 583
  for (i= LOGREC_LONG_TRANSACTION_ID + 1;
       i < LOGREC_NUMBER_OF_TYPES;
       i++)
    log_record_type_descriptor[i].class= LOGRECTYPE_NOT_ALLOWED;
  DBUG_EXECUTE("info",
               check_translog_description_table(LOGREC_LONG_TRANSACTION_ID););
584 585
};

586

unknown's avatar
unknown committed
587 588
/* all possible flags page overheads */
static uint page_overhead[TRANSLOG_FLAGS_NUM];
589 590 591 592 593 594 595 596 597 598 599

typedef struct st_translog_validator_data
{
  TRANSLOG_ADDRESS *addr;
  my_bool was_recovered;
} TRANSLOG_VALIDATOR_DATA;


const char *maria_data_root;


unknown's avatar
unknown committed
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
/*
  Check cursor/buffer consistence

  SYNOPSIS
    translog_check_cursor
    cursor               cursor which will be checked
*/

#ifndef DBUG_OFF
static void translog_check_cursor(struct st_buffer_cursor *cursor)
{
  DBUG_ASSERT(cursor->chaser ||
              ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
               cursor->buffer->size));
  DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
  DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
              cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
  DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
}
#endif

621 622 623 624 625 626 627
/*
  Get file name of the log by log number

  SYNOPSIS
    translog_filename_by_fileno()
    file_no              Number of the log we want to open
    path                 Pointer to buffer where file name will be
unknown's avatar
unknown committed
628
                         stored (must be FN_REFLEN bytes at least)
629 630 631 632
  RETURN
    pointer to path
*/

633
static char *translog_filename_by_fileno(uint32 file_no, char *path)
634
{
unknown's avatar
unknown committed
635 636
  char buff[11], *end;
  uint length;
637
  DBUG_ENTER("translog_filename_by_fileno");
unknown's avatar
unknown committed
638
  DBUG_ASSERT(file_no <= 0xfffffff);
unknown's avatar
unknown committed
639 640 641 642 643 644

  /* log_descriptor.directory is already formated */
  end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS);
  length= (uint) (int10_to_str(file_no, buff, 10) - buff);
  strmov(end-length+1, buff);

unknown's avatar
unknown committed
645
  DBUG_PRINT("info", ("Path: '%s'  path: 0x%lx", path, (ulong) path));
unknown's avatar
unknown committed
646
  DBUG_RETURN(path);
647 648 649 650 651 652 653 654 655 656 657
}


/*
  Open log file with given number without cache

  SYNOPSIS
    open_logfile_by_number_no_cache()
    file_no              Number of the log we want to open

  RETURN
unknown's avatar
unknown committed
658 659
    -1  error
    #   file descriptor number
660 661
*/

662
static File open_logfile_by_number_no_cache(uint32 file_no)
663 664 665 666 667
{
  File file;
  char path[FN_REFLEN];
  DBUG_ENTER("open_logfile_by_number_no_cache");

668
  /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
669
  /* TODO: use my_create() */
unknown's avatar
unknown committed
670 671
  if ((file= my_open(translog_filename_by_fileno(file_no, path),
                     O_CREAT | O_BINARY | O_RDWR,
672 673 674
                     MYF(MY_WME))) < 0)
  {
    UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path));
unknown's avatar
unknown committed
675
    DBUG_RETURN(-1);
676
  }
unknown's avatar
unknown committed
677
  DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
678 679 680 681 682 683 684 685 686 687
  DBUG_RETURN(file);
}


/*
  Write log file page header in the just opened new log file

  SYNOPSIS
    translog_write_file_header();

unknown's avatar
unknown committed
688 689 690
   NOTES
    First page is just a marker page; We don't store any real log data in it.

691 692 693 694 695
  RETURN
    0 OK
    1 ERROR
*/

unknown's avatar
unknown committed
696 697 698 699
uchar	NEAR maria_trans_file_magic[]=
{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
 'L', 'O', 'G' };

700
static my_bool translog_write_file_header()
701 702
{
  ulonglong timestamp;
unknown's avatar
unknown committed
703
  uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
704 705 706
  DBUG_ENTER("translog_write_file_header");

  /* file tag */
unknown's avatar
unknown committed
707 708
  memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
  page+= sizeof(maria_trans_file_magic);
709 710
  /* timestamp */
  timestamp= my_getsystime();
unknown's avatar
unknown committed
711 712
  int8store(page, timestamp);
  page+= 8;
713
  /* maria version */
unknown's avatar
unknown committed
714 715
  int4store(page, TRANSLOG_VERSION_ID);
  page+= 4;
716
  /* mysql version (MYSQL_VERSION_ID) */
unknown's avatar
unknown committed
717 718
  int4store(page, log_descriptor.server_version);
  page+= 4;
719
  /* server ID */
unknown's avatar
unknown committed
720 721 722 723 724
  int4store(page, log_descriptor.server_id);
  page+= 4;
  /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */
  int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE);
  page+= 2;
725
  /* file number */
unknown's avatar
unknown committed
726 727
  int3store(page, LSN_FILE_NO(log_descriptor.horizon));
  page+= 3;
728 729 730 731 732 733
  /*
    Here should be max lsn storing for current file (which is LSN_IPOSSIBLE):
  lsn_store(page, LSN_IPOSSIBLE);
  page+= LSN_STORE_SIZE;
    But it is zeros so we can rely on bzero() in this case
  */
unknown's avatar
unknown committed
734
  bzero(page, sizeof(page_buff) - (page- page_buff));
735

unknown's avatar
unknown committed
736
  DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff,
737
                        sizeof(page_buff), 0, log_write_flags) != 0);
738 739
}

740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
/*
  @brief write the new LSN on the given file header

  @param file            The file descriptor
  @param lsn             That LSN which should be written

  @retval 0 OK
  @retval 1 Error
*/

static my_bool translog_max_lsn_to_header(File file, LSN lsn)
{
  uchar lsn_buff[LSN_STORE_SIZE];
  DBUG_ENTER("translog_max_lsn_to_header");
  DBUG_PRINT("enter", ("File descriptor: %ld  "
                       "lsn: (%lu,0x%lx)",
                       (long) file,
unknown's avatar
unknown committed
757
                       LSN_IN_PARTS(lsn)));
758 759 760 761 762 763 764 765 766 767 768

  lsn_store(lsn_buff, lsn);

  DBUG_RETURN(my_pwrite(file, lsn_buff,
                        LSN_STORE_SIZE,
                        (sizeof(maria_trans_file_magic) +
                         8 + 4 + 4 + 4 + 2 + 3),
                        log_write_flags) != 0 ||
              my_sync(file, MYF(MY_WME)) != 0);
}

769

770 771 772 773 774 775
/*
  Information from transaction log file header
*/

typedef struct st_loghandler_file_info
{
776
  /*
777
    LSN_IMPOSSIBLE for current file and max LSN which parts stored in the
778 779 780
    file for all other (finished) files.
  */
  LSN max_lsn;
781 782 783 784 785 786 787 788 789
  ulonglong timestamp;   /* Time stamp */
  ulong maria_version;   /* Version of maria loghandler */
  ulong mysql_versiob;   /* Version of mysql server */
  ulong server_id;       /* Server ID */
  uint page_size;        /* Loghandler page size */
  uint file_number;      /* Number of the file (from the file header) */
} LOGHANDLER_FILE_INFO;

/*
790
  @brief Read hander file information from loghandler file
791 792

  @param desc header information descriptor to be filled with information
793
  @param file file descriptor to read
794 795 796 797 798

  @retval 0 OK
  @retval 1 Error
*/

799 800 801 802 803
#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
                              8 + 4 + 4 + 4 + 2 + 3 + \
                              LSN_STORE_SIZE)

my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
804
{
805
  uchar page_buff[LOG_HEADER_DATA_SIZE], *ptr;
806 807
  DBUG_ENTER("translog_read_file_header");

808
  if (my_pread(file, page_buff,
809 810 811 812 813 814 815 816 817 818 819 820 821
               sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
  {
    DBUG_PRINT("info", ("log read fail error: %d", my_errno));
    DBUG_RETURN(1);
  }
  ptr= page_buff + sizeof(maria_trans_file_magic);
  desc->timestamp= uint8korr(ptr);
  ptr+= 8;
  desc->maria_version= uint4korr(ptr);
  ptr+= 4;
  desc->mysql_versiob= uint4korr(ptr);
  ptr+= 4;
  desc->server_id= uint4korr(ptr);
822
  ptr+= 4;
823 824 825
  desc->page_size= uint2korr(ptr);
  ptr+= 2;
  desc->file_number= uint3korr(ptr);
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
  ptr+=3;
  desc->max_lsn= lsn_korr(ptr);
  DBUG_RETURN(0);
}


/*
  @brief set the lsn to the files from_file - to_file if it is greater
  then written in the file

  @param from_file       first file number (min)
  @param to_file         last file number (max)
  @param lsn             the lsn for writing
  @param is_locked       true if current thread locked the log handler

  @retval 0 OK
  @retval 1 Error
*/

845
static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
846 847
                                          LSN lsn, my_bool is_locked)
{
848
  uint32 file;
849 850
  DBUG_ENTER("translog_set_lsn_for_files");
  DBUG_PRINT("enter", ("From: %lu  to: %lu  lsn: (%lu,0x%lx)  locked: %d",
851
                       (ulong) from_file, (ulong) to_file,
unknown's avatar
unknown committed
852
                       LSN_IN_PARTS(lsn),
853 854 855 856 857 858 859
                       is_locked));
  DBUG_ASSERT(from_file <= to_file);
  DBUG_ASSERT(from_file > 0); /* we have not file 0 */

  /* Checks the current file (not finished yet file) */
  if (!is_locked)
    translog_lock();
860
  if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
  {
    if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
      log_descriptor.max_lsn= lsn;
    to_file--;
  }
  if (!is_locked)
    translog_unlock();

  /* Checks finished files if they are */
  pthread_mutex_lock(&log_descriptor.file_header_lock);
  for (file= from_file; file <= to_file; file++)
  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd) ||
        (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
         translog_max_lsn_to_header(fd, lsn)))
      DBUG_RETURN(1);
  }
  pthread_mutex_unlock(&log_descriptor.file_header_lock);

883 884 885 886
  DBUG_RETURN(0);
}


887 888 889
/* descriptor of file in unfinished_files */
struct st_file_counter
{
890 891
  uint32 file;            /* file number */
  uint32 counter;         /* counter for started writes */
892 893 894 895 896 897 898 899 900
};


/*
  @brief mark file "in progress" (for multi-group records)

  @param file            log file number
*/

901
static void translog_mark_file_unfinished(uint32 file)
902 903 904 905 906 907
{
  int place, i;
  struct st_file_counter fc, *fc_ptr;
  fc.file= file; fc.counter= 1;

  DBUG_ENTER("translog_mark_file_unfinished");
908
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
909 910 911 912 913 914 915 916 917 918

  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);

  if (log_descriptor.unfinished_files.elements == 0)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The first element inserted"));
    goto end;
  }

919
  for (place= log_descriptor.unfinished_files.elements - 1;
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
       place >= 0;
       place--)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            place, struct st_file_counter *);
    if (fc_ptr->file <= file)
      break;
  }

  if (place >= 0 && fc_ptr->file == file)
  {
     fc_ptr->counter++;
     DBUG_PRINT("info", ("counter increased"));
     goto end;
  }

  if (place == (int)log_descriptor.unfinished_files.elements)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The last element inserted"));
    goto end;
  }
  /* shift and assign new element */
  insert_dynamic(&log_descriptor.unfinished_files,
                 (uchar*)
                 dynamic_element(&log_descriptor.unfinished_files,
                                 log_descriptor.unfinished_files.elements- 1,
                                 struct st_file_counter *));
  for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
  {
    /* we do not use set_dynamic() to avoid unneeded checks */
    memcpy(dynamic_element(&log_descriptor.unfinished_files,
                           i, struct st_file_counter *),
           dynamic_element(&log_descriptor.unfinished_files,
                           i + 1, struct st_file_counter *),
           sizeof(struct st_file_counter));
  }
  memcpy(dynamic_element(&log_descriptor.unfinished_files,
                         place + 1, struct st_file_counter *),
         &fc, sizeof(struct st_file_counter));
end:
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
  DBUG_VOID_RETURN;
}



/*
  @brief remove file mark "in progress" (for multi-group records)

  @param file            log file number
*/

973
static void translog_mark_file_finished(uint32 file)
974 975 976 977
{
  int i;
  struct st_file_counter *fc_ptr;
  DBUG_ENTER("translog_mark_file_finished");
978
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
979

unknown's avatar
unknown committed
980 981
  LINT_INIT(fc_ptr);

982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);

  DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
  for (i= 0;
       i < (int) log_descriptor.unfinished_files.elements;
       i++)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            i, struct st_file_counter *);
    if (fc_ptr->file == file)
    {
      break;
    }
  }
  DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);

  if (! --fc_ptr->counter)
    delete_dynamic_element(&log_descriptor.unfinished_files, i);
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
  DBUG_VOID_RETURN;
}


/*
  @brief get max LSN of the record which parts stored in this file

  @param file            file number

  @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
    @retval LSN_IMPOSSIBLE File is still not finished
    @retval LSN_ERROR Error opening file
    @retval # LSN of the record which parts stored in this file
*/

1016
LSN translog_get_file_max_lsn_stored(uint32 file)
1017
{
1018
  uint32 limit= FILENO_IMPOSSIBLE;
1019
  DBUG_ENTER("translog_get_file_max_lsn_stored");
1020
  DBUG_PRINT("enter", ("file: %lu", (ulong)file));
unknown's avatar
unknown committed
1021
  DBUG_ASSERT(translog_inited == 1);
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060

  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);

  /* find file with minimum file number "in progress" */
  if (log_descriptor.unfinished_files.elements > 0)
  {
    struct st_file_counter *fc_ptr;
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            0, struct st_file_counter *);
    limit= fc_ptr->file; /* minimal file number "in progress" */
  }
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);

  /*
    if there is no "in progress file" then unfinished file is in progress
    for sure
  */
  if (limit == FILENO_IMPOSSIBLE)
  {
    TRANSLOG_ADDRESS horizon= translog_get_horizon();
    limit= LSN_FILE_NO(horizon);
  }

  if (file >= limit)
  {
    DBUG_PRINT("info", ("The file in in progress"));
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }

  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd))
    {
      DBUG_PRINT("error", ("Can't read file header"));
      DBUG_RETURN(LSN_ERROR);
    }
    DBUG_PRINT("error", ("Max lsn: (%lu,0x%lx)",
unknown's avatar
unknown committed
1061
                         LSN_IN_PARTS(info.max_lsn)));
1062 1063 1064 1065
    DBUG_RETURN(info.max_lsn);
  }
}

1066 1067 1068 1069 1070 1071 1072 1073
/*
  Initialize transaction log file buffer

  SYNOPSIS
    translog_buffer_init()
    buffer               The buffer to initialize

  RETURN
unknown's avatar
unknown committed
1074 1075
    0  OK
    1  Error
1076 1077
*/

1078
static my_bool translog_buffer_init(struct st_translog_buffer *buffer)
1079 1080
{
  DBUG_ENTER("translog_buffer_init");
unknown's avatar
unknown committed
1081
  buffer->last_lsn= LSN_IMPOSSIBLE;
1082
  /* This Buffer File */
unknown's avatar
unknown committed
1083
  buffer->file= -1;
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
  buffer->overlay= 0;
  /* IO cache for current log */
  bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER);
  /* Buffer size */
  buffer->size= 0;
  /* cond of thread which is waiting for buffer filling */
  buffer->waiting_filling_buffer.last_thread= 0;
  /* Number of record which are in copy progress */
  buffer->copy_to_buffer_in_progress= 0;
  /* list of waiting buffer ready threads */
  buffer->waiting_flush= 0;
  /* lock for the buffer. Current buffer also lock the handler */
  if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


/*
  Close transaction log file by descriptor

  SYNOPSIS
    translog_close_log_file()
    file                 file descriptor

  RETURN
unknown's avatar
unknown committed
1110 1111
    0  OK
    1  Error
1112 1113 1114 1115
*/

static my_bool translog_close_log_file(File file)
{
unknown's avatar
unknown committed
1116 1117 1118
  int rc;
  PAGECACHE_FILE fl;
  fl.file= file;
1119
  flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE);
unknown's avatar
unknown committed
1120 1121 1122 1123 1124 1125 1126
  /*
    Sync file when we close it
    TODO: sync only we have changed the log
  */
  rc= my_sync(file, MYF(MY_WME));
  rc|= my_close(file, MYF(MY_WME));
  return test(rc);
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
}


/*
  Create and fill header of new file

  SYNOPSIS
    translog_create_new_file()

  RETURN
    0 OK
    1 Error
*/

1141
static my_bool translog_create_new_file()
1142 1143
{
  int i;
1144
  uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1145 1146
  DBUG_ENTER("translog_create_new_file");

1147 1148 1149 1150 1151 1152 1153 1154
  /*
    Writes max_lsn to the file header before finishing it (it is no need to
    lock file header buffer because it is still unfinished file)
  */
  translog_max_lsn_to_header(log_descriptor.log_file_num[0],
                             log_descriptor.max_lsn);
  log_descriptor.max_lsn= LSN_IMPOSSIBLE;

unknown's avatar
unknown committed
1155
  if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 &&
1156 1157 1158 1159 1160 1161 1162
      translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM -
                                                          1]))
    DBUG_RETURN(1);
  for (i= OPENED_FILES_NUM - 1; i > 0; i--)
    log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1];

  if ((log_descriptor.log_file_num[0]=
unknown's avatar
unknown committed
1163
       open_logfile_by_number_no_cache(file_no)) == -1 ||
1164 1165 1166
      translog_write_file_header())
    DBUG_RETURN(1);

unknown's avatar
unknown committed
1167
  if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no,
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
                                      CONTROL_FILE_UPDATE_ONLY_LOGNO))
    DBUG_RETURN(1);

  DBUG_RETURN(0);
}


/*
  Lock the loghandler buffer

  SYNOPSIS
    translog_buffer_lock()
    buffer               This buffer which should be locked

  RETURN
unknown's avatar
unknown committed
1183 1184
    0  OK
    1  Error
1185 1186 1187 1188 1189 1190 1191
*/

#ifndef DBUG_OFF
static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
{
  int res;
  DBUG_ENTER("translog_buffer_lock");
unknown's avatar
unknown committed
1192 1193 1194 1195
  DBUG_PRINT("enter",
             ("Lock buffer #%u: (0x%lx)  mutex: 0x%lx",
              (uint) buffer->buffer_no, (ulong) buffer,
              (ulong) &buffer->mutex));
1196 1197 1198 1199 1200
  res= (pthread_mutex_lock(&buffer->mutex) != 0);
  DBUG_RETURN(res);
}
#else
#define translog_buffer_lock(B) \
unknown's avatar
unknown committed
1201
  pthread_mutex_lock(&B->mutex)
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
#endif


/*
  Unlock the loghandler buffer

  SYNOPSIS
    translog_buffer_unlock()
    buffer               This buffer which should be unlocked

  RETURN
unknown's avatar
unknown committed
1213 1214
    0  OK
    1  Error
1215 1216 1217 1218 1219 1220 1221
*/

#ifndef DBUG_OFF
static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
{
  int res;
  DBUG_ENTER("translog_buffer_unlock");
unknown's avatar
unknown committed
1222 1223
  DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)  "
                       "mutex: 0x%lx",
1224 1225 1226 1227
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (ulong) &buffer->mutex));

  res= (pthread_mutex_unlock(&buffer->mutex) != 0);
unknown's avatar
unknown committed
1228
  DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx  mutex: 0x%lx",
1229 1230 1231 1232 1233 1234
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (ulong) &buffer->mutex));
  DBUG_RETURN(res);
}
#else
#define translog_buffer_unlock(B) \
unknown's avatar
unknown committed
1235
  pthread_mutex_unlock(&B->mutex)
1236 1237 1238 1239
#endif


/*
unknown's avatar
unknown committed
1240
  Write a header on the page
1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253

  SYNOPSIS
    translog_new_page_header()
    horizon              Where to write the page
    cursor               Where to write the page

  NOTE
    - space for page header should be checked before
*/

static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
                                     struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1254
  uchar *ptr;
1255 1256

  DBUG_ENTER("translog_new_page_header");
unknown's avatar
unknown committed
1257
  DBUG_ASSERT(cursor->ptr);
1258 1259 1260 1261 1262

  cursor->protected= 0;

  ptr= cursor->ptr;
  /* Page number */
1263 1264
  int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
  ptr+= 3;
1265
  /* File number */
1266 1267
  int3store(ptr, LSN_FILE_NO(*horizon));
  ptr+= 3;
unknown's avatar
unknown committed
1268
  *(ptr++)= (uchar) log_descriptor.flags;
1269 1270 1271 1272
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
  {
#ifndef DBUG_OFF
    DBUG_PRINT("info", ("write  0x11223344 CRC to (%lu,0x%lx)",
unknown's avatar
unknown committed
1273
                        LSN_IN_PARTS(*horizon)));
unknown's avatar
unknown committed
1274
    /* This will be overwritten by real CRC; This is just for debugging */
1275 1276
    int4store(ptr, 0x11223344);
#endif
unknown's avatar
unknown committed
1277 1278
    /* CRC will be put when page is finished */
    ptr+= CRC_LENGTH;
1279 1280 1281 1282
  }
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
  {
    time_t tm;
unknown's avatar
unknown committed
1283 1284 1285
    uint16 tmp_time= time(&tm);
    int2store(ptr, tmp_time);
    ptr+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
1286 1287
  }
  {
unknown's avatar
unknown committed
1288 1289 1290
    uint len= (ptr - cursor->ptr);
    (*horizon)+= len; /* it is increasing of offset part of the address */
    cursor->current_page_fill= len;
1291 1292 1293 1294
    if (!cursor->chaser)
      cursor->buffer->size+= len;
  }
  cursor->ptr= ptr;
unknown's avatar
unknown committed
1295
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
1296 1297
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1298
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1299
  DBUG_EXECUTE("info", translog_check_cursor(cursor););
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
  DBUG_VOID_RETURN;
}


/*
  Put sector protection on the page image

  SYNOPSIS
    translog_put_sector_protection()
    page                 reference on the page content
    cursor               cursor of the buffer
unknown's avatar
unknown committed
1311 1312 1313 1314

  NOTES
    We put a sector protection on all following sectors on the page,
    except the first sector that is protected by page header.
1315 1316
*/

unknown's avatar
unknown committed
1317
static void translog_put_sector_protection(uchar *page,
1318 1319
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1320
  uchar *table= page + log_descriptor.page_overhead -
unknown's avatar
unknown committed
1321
    (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
1322
  uint16 value= uint2korr(table) + cursor->write_counter;
unknown's avatar
unknown committed
1323 1324 1325
  uint16 last_protected_sector= ((cursor->previous_offset - 1) /
                                 DISK_DRIVE_SECTOR_SIZE);
  uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
1326 1327
  uint i, offset;
  DBUG_ENTER("translog_put_sector_protection");
unknown's avatar
unknown committed
1328

1329
  if (start_sector == 0)
unknown's avatar
unknown committed
1330
    start_sector= 1;                            /* First sector is protected */
1331

unknown's avatar
unknown committed
1332 1333
  DBUG_PRINT("enter", ("Write counter:%u  value:%u  offset:%u, "
                       "last protected:%u  start sector:%u",
1334 1335 1336 1337 1338 1339 1340
                       (uint) cursor->write_counter,
                       (uint) value,
                       (uint) cursor->previous_offset,
                       (uint) last_protected_sector, (uint) start_sector));
  if (last_protected_sector == start_sector)
  {
    i= last_protected_sector * 2;
unknown's avatar
unknown committed
1341
    offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1342 1343 1344 1345 1346 1347 1348
    /* restore data, because we modified sector which was protected */
    if (offset < cursor->previous_offset)
      page[offset]= table[i];
    offset++;
    if (offset < cursor->previous_offset)
      page[offset]= table[i + 1];
  }
unknown's avatar
unknown committed
1349 1350 1351
  for (i= start_sector * 2, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
       i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
       (i+= 2), (offset+= DISK_DRIVE_SECTOR_SIZE))
1352
  {
unknown's avatar
unknown committed
1353
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x%x",
1354 1355 1356 1357 1358
                        i / 2, offset, (uint) page[offset],
                        (uint) page[offset + 1]));
    table[i]= page[offset];
    table[i + 1]= page[offset + 1];
    int2store(page + offset, value);
unknown's avatar
unknown committed
1359
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x%x",
1360 1361 1362 1363 1364 1365 1366 1367
                        i / 2, offset, (uint) page[offset],
                        (uint) page[offset + 1]));
  }
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1368
  Calculate CRC32 of given area
1369 1370

  SYNOPSIS
unknown's avatar
unknown committed
1371
    translog_crc()
1372 1373 1374 1375
    area                 Pointer of the area beginning
    length               The Area length

  RETURN
unknown's avatar
unknown committed
1376
    CRC32
1377 1378
*/

unknown's avatar
unknown committed
1379
static uint32 translog_crc(uchar *area, uint length)
1380
{
1381 1382
  DBUG_ENTER("translog_crc");
  DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
}


/*
  Finish current page with zeros

  SYNOPSIS
    translog_finish_page()
    horizon              \ horizon & buffer pointers
    cursor               /
*/

static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
                                 struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1398
  uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
unknown's avatar
unknown committed
1399
  uchar *page= cursor->ptr -cursor->current_page_fill;
1400
  DBUG_ENTER("translog_finish_page");
unknown's avatar
unknown committed
1401 1402 1403 1404
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size:%lu (%lu)  Pg:%u  left:%u",
1405
                       (uint) cursor->buffer_no, (ulong) cursor->buffer,
unknown's avatar
unknown committed
1406
                       LSN_IN_PARTS(cursor->buffer->offset),
1407 1408
                       (ulong) LSN_FILE_NO(*horizon),
                       (ulong) (LSN_OFFSET(*horizon) -
unknown's avatar
unknown committed
1409
                                cursor->current_page_fill),
1410 1411
                       (ulong) cursor->buffer->size,
                       (ulong) (cursor->ptr -cursor->buffer->buffer),
unknown's avatar
unknown committed
1412
                       (uint) cursor->current_page_fill, (uint) left));
1413
  DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
unknown's avatar
unknown committed
1414
  DBUG_EXECUTE("info", translog_check_cursor(cursor););
1415 1416 1417 1418 1419
  if (cursor->protected)
  {
    DBUG_PRINT("info", ("Already protected and finished"));
    DBUG_VOID_RETURN;
  }
unknown's avatar
unknown committed
1420 1421 1422 1423
  cursor->protected= 1;

  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
1424
  {
unknown's avatar
unknown committed
1425
    DBUG_PRINT("info", ("left: %u", (uint) left));
1426 1427
    bzero(cursor->ptr, left);
    cursor->ptr +=left;
unknown's avatar
unknown committed
1428
    (*horizon)+= left; /* offset increasing */
1429 1430
    if (!cursor->chaser)
      cursor->buffer->size+= left;
unknown's avatar
unknown committed
1431 1432 1433
    cursor->current_page_fill= 0;
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
                        "chaser: %d  Size: %lu (%lu)",
1434 1435 1436
                        (uint) cursor->buffer->buffer_no,
                        (ulong) cursor->buffer, cursor->chaser,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1437
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1438
    DBUG_EXECUTE("info", translog_check_cursor(cursor););
1439
  }
unknown's avatar
unknown committed
1440
  if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
1441 1442 1443 1444 1445 1446
  {
    translog_put_sector_protection(page, cursor);
    DBUG_PRINT("info", ("drop write_counter"));
    cursor->write_counter= 0;
    cursor->previous_offset= 0;
  }
unknown's avatar
unknown committed
1447
  if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
1448
  {
unknown's avatar
unknown committed
1449 1450 1451 1452 1453
    uint32 crc= translog_crc(page + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
    DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
    /* We have page number, file number and flag before crc */
1454 1455 1456 1457 1458 1459 1460
    int4store(page + 3 + 3 + 1, crc);
  }
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1461
  Wait until all thread finish filling this buffer
1462 1463 1464 1465 1466 1467 1468 1469

  SYNOPSIS
    translog_wait_for_writers()
    buffer               This buffer should be check

  NOTE
    This buffer should be locked
*/
1470

1471 1472
static void translog_wait_for_writers(struct st_translog_buffer *buffer)
{
unknown's avatar
unknown committed
1473
  struct st_my_thread_var *thread= my_thread_var;
1474
  DBUG_ENTER("translog_wait_for_writers");
unknown's avatar
unknown committed
1475
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  copies in progress: %u",
1476 1477 1478
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress));

unknown's avatar
unknown committed
1479
  while (buffer->copy_to_buffer_in_progress)
1480
  {
unknown's avatar
unknown committed
1481 1482
    DBUG_PRINT("info", ("wait for writers... "
                        "buffer: #%u 0x%lx  "
1483
                        "mutex: 0x%lx",
1484
                        (uint) buffer->buffer_no, (ulong) buffer,
1485
                        (ulong) &buffer->mutex));
unknown's avatar
unknown committed
1486
    DBUG_ASSERT(buffer->file != -1);
1487 1488
    wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
                        &buffer->mutex);
unknown's avatar
unknown committed
1489 1490
    DBUG_PRINT("info", ("wait for writers done  "
                        "buffer: #%u 0x%lx  "
1491
                        "mutex: 0x%lx",
1492
                        (uint) buffer->buffer_no, (ulong) buffer,
1493
                        (ulong) &buffer->mutex));
unknown's avatar
unknown committed
1494
  }
1495 1496 1497 1498 1499 1500 1501

  DBUG_VOID_RETURN;
}


/*

unknown's avatar
unknown committed
1502
  Wait for buffer to become free
1503 1504 1505

  SYNOPSIS
    translog_wait_for_buffer_free()
unknown's avatar
unknown committed
1506
    buffer               The buffer we are waiting for
1507 1508 1509 1510 1511 1512 1513 1514 1515

  NOTE
    - this buffer should be locked
*/

static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
{
  struct st_my_thread_var *thread= my_thread_var;
  DBUG_ENTER("translog_wait_for_buffer_free");
unknown's avatar
unknown committed
1516 1517
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  copies in progress: %u  "
                       "File: %d  size: 0x%lu",
1518 1519
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress,
unknown's avatar
unknown committed
1520
                       buffer->file, (ulong) buffer->size));
1521 1522 1523

  translog_wait_for_writers(buffer);

unknown's avatar
unknown committed
1524
  while (buffer->file != -1)
1525
  {
unknown's avatar
unknown committed
1526 1527
    DBUG_PRINT("info", ("wait for writers...  "
                        "buffer: #%u 0x%lx  "
1528
                        "mutex: 0x%lx",
1529
                        (uint) buffer->buffer_no, (ulong) buffer,
1530 1531 1532
                        (ulong) &buffer->mutex));
    wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
                        &buffer->mutex);
unknown's avatar
unknown committed
1533 1534
    DBUG_PRINT("info", ("wait for writers done.  "
                        "buffer: #%u 0x%lx  "
1535
                        "mutex: 0x%lx",
1536
                        (uint) buffer->buffer_no, (ulong) buffer,
1537
                        (ulong) &buffer->mutex));
unknown's avatar
unknown committed
1538 1539
  }
  DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
1540 1541 1542 1543 1544
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1545
  Initialize the cursor for a buffer
1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561

  SYNOPSIS
    translog_cursor_init()
    buffer               The buffer
    cursor               It's cursor
    buffer_no            Number of buffer
*/

static void translog_cursor_init(struct st_buffer_cursor *cursor,
                                 struct st_translog_buffer *buffer,
                                 uint8 buffer_no)
{
  DBUG_ENTER("translog_cursor_init");
  cursor->ptr= buffer->buffer;
  cursor->buffer= buffer;
  cursor->buffer_no= buffer_no;
unknown's avatar
unknown committed
1562
  cursor->current_page_fill= 0;
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
  cursor->chaser= (cursor != &log_descriptor.bc);
  cursor->write_counter= 0;
  cursor->previous_offset= 0;
  cursor->protected= 0;
  DBUG_VOID_RETURN;
}


/*
  Initialize buffer for current file

  SYNOPSIS
    translog_start_buffer()
    buffer               The buffer
    cursor               It's cursor
    buffer_no            Number of buffer
*/
1580

1581 1582
static void translog_start_buffer(struct st_translog_buffer *buffer,
                                  struct st_buffer_cursor *cursor,
unknown's avatar
unknown committed
1583
                                  uint buffer_no)
1584 1585 1586
{
  DBUG_ENTER("translog_start_buffer");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
1587
             ("Assign buffer: #%u (0x%lx)  to file: %d  offset: 0x%lx(%lu)",
1588
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
1589
              log_descriptor.log_file_num[0],
1590 1591
              (ulong) LSN_OFFSET(log_descriptor.horizon),
              (ulong) LSN_OFFSET(log_descriptor.horizon)));
1592
  DBUG_ASSERT(buffer_no == buffer->buffer_no);
unknown's avatar
unknown committed
1593
  buffer->last_lsn= LSN_IMPOSSIBLE;
1594
  buffer->offset= log_descriptor.horizon;
1595
  buffer->next_buffer_offset= LSN_IMPOSSIBLE;
1596 1597 1598 1599
  buffer->file= log_descriptor.log_file_num[0];
  buffer->overlay= 0;
  buffer->size= 0;
  translog_cursor_init(cursor, buffer, buffer_no);
unknown's avatar
unknown committed
1600
  DBUG_PRINT("info", ("init cursor #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
1601 1602
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1603
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1604
  DBUG_EXECUTE("info", translog_check_cursor(cursor););
1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622
  DBUG_VOID_RETURN;
}


/*
  Switch to the next buffer in a chain

  SYNOPSIS
    translog_buffer_next()
    horizon              \ Pointers on current position in file and buffer
    cursor               /
    next_file            Also start new file

  NOTE:
   - loghandler should be locked
   - after return new and old buffer still are locked

  RETURN
unknown's avatar
unknown committed
1623 1624
    0  OK
    1  Error
1625 1626 1627 1628 1629 1630
*/

static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
                                    struct st_buffer_cursor *cursor,
                                    my_bool new_file)
{
unknown's avatar
unknown committed
1631 1632
  uint old_buffer_no= cursor->buffer_no;
  uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
1633 1634 1635 1636
  struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
  my_bool chasing= cursor->chaser;
  DBUG_ENTER("translog_buffer_next");

unknown's avatar
unknown committed
1637
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  chasing: %d",
unknown's avatar
unknown committed
1638
                      LSN_IN_PARTS(log_descriptor.horizon), chasing));
1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654

  DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);

  translog_finish_page(horizon, cursor);

  if (!chasing)
  {
    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);
  }
#ifndef DBUG_OFF
  else
    DBUG_ASSERT(new_buffer->file != 0);
#endif
  if (new_file)
  {
1655

1656
    /* move the horizon to the next file and its header page */
unknown's avatar
unknown committed
1657 1658
    (*horizon)+= LSN_ONE_FILE;
    (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
    if (!chasing && translog_create_new_file())
    {
      DBUG_RETURN(1);
    }
  }

  /* prepare next page */
  if (chasing)
    translog_cursor_init(cursor, new_buffer, new_buffer_no);
  else
    translog_start_buffer(new_buffer, cursor, new_buffer_no);
1670
  log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
1671 1672 1673 1674 1675 1676
  translog_new_page_header(horizon, cursor);
  DBUG_RETURN(0);
}


/*
1677
  Sets max LSN sent to file, and address from which data is only in the buffer
1678 1679 1680 1681

  SYNOPSIS
    translog_set_sent_to_file()
    lsn                  LSN to assign
1682 1683 1684
    in_buffers           to assign to in_buffers_only

  TODO: use atomic operations if possible (64bit architectures?)
1685 1686
*/

1687
static void translog_set_sent_to_file(LSN lsn, TRANSLOG_ADDRESS in_buffers)
1688 1689 1690
{
  DBUG_ENTER("translog_set_sent_to_file");
  pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
1691 1692
  DBUG_PRINT("enter", ("lsn: (%lu,0x%lx) in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1693 1694 1695
                       LSN_IN_PARTS(lsn),
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1696 1697 1698 1699 1700 1701 1702 1703
  DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_file) >= 0);
  log_descriptor.sent_to_file= lsn;
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
1704 1705 1706 1707 1708 1709
  pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
  DBUG_VOID_RETURN;
}


/*
1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723
  Sets address from which data is only in the buffer

  SYNOPSIS
    translog_set_only_in_buffers()
    lsn                  LSN to assign
    in_buffers           to assign to in_buffers_only
*/

static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
{
  DBUG_ENTER("translog_set_only_in_buffers");
  pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
  DBUG_PRINT("enter", ("in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1724 1725
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
  pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
  DBUG_VOID_RETURN;
}


/*
  Gets address from which data is only in the buffer

  SYNOPSIS
    translog_only_in_buffers()

  RETURN
    address from which data is only in the buffer
*/

static TRANSLOG_ADDRESS translog_only_in_buffers()
{
  register TRANSLOG_ADDRESS addr;
  DBUG_ENTER("translog_only_in_buffers");
  pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
  addr= log_descriptor.in_buffers_only;
  pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
  DBUG_RETURN(addr);
}


/*
  Get max LSN sent to file
1760 1761 1762

  SYNOPSIS
    translog_get_sent_to_file()
1763 1764 1765

  RETURN
    max LSN send to file
1766 1767
*/

1768
static LSN translog_get_sent_to_file()
1769
{
1770
  register LSN lsn;
1771 1772
  DBUG_ENTER("translog_get_sent_to_file");
  pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
1773
  lsn= log_descriptor.sent_to_file;
1774
  pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
1775
  DBUG_RETURN(lsn);
1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
}


/*
  Get first chunk address on the given page

  SYNOPSIS
    translog_get_first_chunk_offset()
    page                 The page where to find first chunk

  RETURN
    first chunk offset
*/

unknown's avatar
unknown committed
1790
static my_bool translog_get_first_chunk_offset(uchar *page)
1791 1792 1793 1794
{
  uint16 page_header= 7;
  DBUG_ENTER("translog_get_first_chunk_offset");

unknown's avatar
unknown committed
1795
  if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
1796
    page_header+= 4;
unknown's avatar
unknown committed
1797 1798
  if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
    page_header+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
  DBUG_RETURN(page_header);
}


/*
  Write coded length of record

  SYNOPSIS
    translog_write_variable_record_1group_code_len
    dst                  Destination buffer pointer
    length               Length which should be coded
    header_len           Calculated total header length
*/

static void
unknown's avatar
unknown committed
1814
translog_write_variable_record_1group_code_len(uchar *dst,
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
                                               translog_size_t length,
                                               uint16 header_len)
{
  switch (header_len) {
  case 6:                                      /* (5 + 1) */
    DBUG_ASSERT(length <= 250);
    *dst= (uint8) length;
    return;
  case 8:                                      /* (5 + 3) */
    DBUG_ASSERT(length <= 0xFFFF);
    *dst= 251;
    int2store(dst + 1, length);
    return;
  case 9:                                      /* (5 + 4) */
unknown's avatar
unknown committed
1829
    DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854
    *dst= 252;
    int3store(dst + 1, length);
    return;
  case 10:                                     /* (5 + 5) */
    *dst= 253;
    int4store(dst + 1, length);
    return;
  default:
    DBUG_ASSERT(0);
  }
  return;
}


/*
  Decode record data length and advance given pointer to the next field

  SYNOPSIS
    translog_variable_record_1group_decode_len()
    src                  The pointer to the pointer to the length beginning

  RETURN
    decoded length
*/

unknown's avatar
unknown committed
1855
static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
1856 1857 1858 1859
{
  uint8 first= (uint8) (**src);
  switch (first) {
  case 251:
unknown's avatar
unknown committed
1860
    (*src)+= 3;
1861 1862
    return (uint2korr((*src) - 2));
  case 252:
unknown's avatar
unknown committed
1863
    (*src)+= 4;
1864 1865
    return (uint3korr((*src) - 3));
  case 253:
unknown's avatar
unknown committed
1866
    (*src)+= 5;
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
    return (uint4korr((*src) - 4));
  case 254:
  case 255:
    DBUG_ASSERT(0);                             /* reserved for future use */
    return (0);
  default:
    (*src)++;
    return (first);
  }
}


/*
  Get total length of this chunk (not only body)

  SYNOPSIS
    translog_get_total_chunk_length()
    page                 The page where chunk placed
    offset               Offset of the chunk on this place

  RETURN
    total length of the chunk
*/

unknown's avatar
unknown committed
1891
static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
1892 1893 1894
{
  DBUG_ENTER("translog_get_total_chunk_length");
  switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
unknown's avatar
unknown committed
1895
  case TRANSLOG_CHUNK_LSN:
1896
  {
unknown's avatar
unknown committed
1897
    /* 0 chunk referred as LSN (head or tail) */
1898
    translog_size_t rec_len;
unknown's avatar
unknown committed
1899 1900
    uchar *start= page + offset;
    uchar *ptr= start + 1 + 2;
1901 1902 1903 1904
    uint16 chunk_len, header_len, page_rest;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
1905 1906
    header_len= (ptr -start) + 2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
      DBUG_PRINT("info", ("chunk len: %u + %u = %u",
                          (uint) header_len, (uint) chunk_len,
                          (uint) (chunk_len + header_len)));
      DBUG_RETURN(chunk_len + header_len);
    }
    page_rest= TRANSLOG_PAGE_SIZE - offset;
    DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
    if (rec_len + header_len < page_rest)
      DBUG_RETURN(rec_len + header_len);
    DBUG_RETURN(page_rest);
  }
unknown's avatar
unknown committed
1921
  case TRANSLOG_CHUNK_FIXED:
1922
  {
unknown's avatar
unknown committed
1923
    uchar *ptr;
1924
    uint type= page[offset] & TRANSLOG_REC_TYPE;
unknown's avatar
unknown committed
1925 1926 1927 1928
    uint length;
    int i;
    /* 1 (pseudo)fixed record (also LSN) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
    DBUG_ASSERT(log_record_type_descriptor[type].class ==
                LOGRECTYPE_FIXEDLENGTH ||
                log_record_type_descriptor[type].class ==
                LOGRECTYPE_PSEUDOFIXEDLENGTH);
    if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH)
    {
      DBUG_PRINT("info",
                 ("Fixed length: %u",
                  (uint) (log_record_type_descriptor[type].fixed_length + 3)));
      DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
    }
unknown's avatar
unknown committed
1940 1941 1942 1943

    ptr= page + offset + 3;            /* first compressed LSN */
    length= log_record_type_descriptor[type].fixed_length + 3;
    for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
1944
    {
unknown's avatar
unknown committed
1945 1946
      /* first 2 bits is length - 2 */
      uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2;
1947 1948
      if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
        len+= LSN_STORE_SIZE; /* case of full LSN storing */
unknown's avatar
unknown committed
1949 1950
      ptr+= len;
      /* subtract economized bytes */
1951
      length-= (LSN_STORE_SIZE - len);
1952
    }
unknown's avatar
unknown committed
1953 1954
    DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
    DBUG_RETURN(length);
1955
  }
unknown's avatar
unknown committed
1956 1957 1958
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR  length: %u",
1959 1960 1961 1962 1963
                        (uint) (TRANSLOG_PAGE_SIZE - offset)));
    DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
  case TRANSLOG_CHUNK_LNGTH:                   /* 3 chunk with chunk length */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
    DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
unknown's avatar
unknown committed
1964
    DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
1965 1966 1967
    DBUG_RETURN(uint2korr(page + offset + 1) + 3);
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
1968
    DBUG_RETURN(0);
1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
  }
}


/*
  Flush given buffer

  SYNOPSIS
    translog_buffer_flush()
    buffer               This buffer should be flushed

  RETURN
unknown's avatar
unknown committed
1981 1982
    0  OK
    1  Error
1983 1984 1985 1986 1987
*/

static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
{
  uint32 i;
1988
  PAGECACHE_FILE file;
1989 1990
  DBUG_ENTER("translog_buffer_flush");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
1991 1992
             ("Buffer: #%u 0x%lx: "
              "file: %d  offset: (%lu,0x%lx)  size: %lu",
1993
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
1994
              buffer->file,
unknown's avatar
unknown committed
1995
              LSN_IN_PARTS(buffer->offset),
1996 1997
              (ulong) buffer->size));

unknown's avatar
unknown committed
1998
  DBUG_ASSERT(buffer->file != -1);
1999 2000

  translog_wait_for_writers(buffer);
unknown's avatar
unknown committed
2001
  if (buffer->overlay && buffer->overlay->file != -1)
2002 2003 2004 2005 2006 2007 2008 2009 2010
  {
    struct st_translog_buffer *overlay= buffer->overlay;
    translog_buffer_unlock(buffer);
    translog_buffer_lock(overlay);
    translog_wait_for_buffer_free(overlay);
    translog_buffer_unlock(overlay);
    translog_buffer_lock(buffer);
  }

2011
  file.file= buffer->file;
2012 2013
  for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE)
  {
2014 2015 2016
    TRANSLOG_ADDRESS addr= (buffer->offset + i);
    TRANSLOG_VALIDATOR_DATA data;
    data.addr= &addr;
unknown's avatar
unknown committed
2017
    DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2018
    DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2019
    if (pagecache_inject(log_descriptor.pagecache,
2020
                        &file,
2021
                        (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE,
2022 2023 2024 2025
                        3,
                        buffer->buffer + i,
                        PAGECACHE_PLAIN_PAGE,
                        PAGECACHE_LOCK_LEFT_UNLOCKED,
2026 2027
                        PAGECACHE_PIN_LEFT_UNPINNED, 0,
                        &translog_page_validator, (uchar*) &data))
2028
    {
unknown's avatar
unknown committed
2029
      UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache",
2030
                           (ulong) buffer->file,
2031
                           (ulong) (LSN_OFFSET(buffer->offset)+ i)));
2032 2033 2034
    }
  }
  if (my_pwrite(buffer->file, (char*) buffer->buffer,
2035
                buffer->size, LSN_OFFSET(buffer->offset),
2036
                log_write_flags))
2037
  {
unknown's avatar
unknown committed
2038 2039
    UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu "
                         "to the disk (%d)",
2040
                         (ulong) buffer->file,
2041
                         (ulong) LSN_OFFSET(buffer->offset),
2042 2043 2044
                         (ulong) buffer->size, errno));
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2045

2046 2047 2048 2049 2050
  if (LSN_OFFSET(buffer->last_lsn) != 0)    /* if buffer->last_lsn is set */
    translog_set_sent_to_file(buffer->last_lsn,
                              buffer->next_buffer_offset);
  else
    translog_set_only_in_buffers(buffer->next_buffer_offset);
2051
  /* Free buffer */
unknown's avatar
unknown committed
2052
  buffer->file= -1;
2053
  buffer->overlay= 0;
unknown's avatar
unknown committed
2054
  if (buffer->waiting_filling_buffer.last_thread)
2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070
  {
    wqueue_release_queue(&buffer->waiting_filling_buffer);
  }
  DBUG_RETURN(0);
}


/*
  Recover page with sector protection (wipe out failed chunks)

  SYNOPSYS
    translog_recover_page_up_to_sector()
    page                 reference on the page
    offset               offset of failed sector

  RETURN
unknown's avatar
unknown committed
2071 2072
    0  OK
    1  Error
2073 2074
*/

unknown's avatar
unknown committed
2075
static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2076 2077 2078
{
  uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
  DBUG_ENTER("translog_recover_page_up_to_sector");
unknown's avatar
unknown committed
2079
  DBUG_PRINT("enter", ("offset: %u  first chunk: %u",
2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091
                       (uint) offset, (uint) chunk_offset));

  while (page[chunk_offset] != '\0' && chunk_offset < offset)
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
    {
      UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)",
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2092
    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2093 2094 2095
                        (uint) chunk_offset, (uint) chunk_length));
    if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
    {
unknown's avatar
unknown committed
2096
      UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area",
2097 2098 2099 2100 2101 2102 2103
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
    chunk_offset+= chunk_length;
  }

  valid_chunk_end= chunk_offset;
unknown's avatar
unknown committed
2104
  /* end of trusted area - sector parsing */
2105 2106 2107 2108 2109 2110
  while (page[chunk_offset] != '\0')
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
      break;
unknown's avatar
unknown committed
2111 2112

    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2113
                        (uint) chunk_offset, (uint) chunk_length));
unknown's avatar
unknown committed
2114 2115
    if (((ulong) chunk_offset) + ((ulong) chunk_length) >
        (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2116
      break;
unknown's avatar
unknown committed
2117

2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137
    chunk_offset+= chunk_length;
    valid_chunk_end= chunk_offset;
  }
  DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));

  bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end);

  DBUG_RETURN(0);
}


/*
  Log page validator

  SYNOPSIS
    translog_page_validator()
    page_addr            The page to check
    data                 data, need for validation (address in this case)

  RETURN
unknown's avatar
unknown committed
2138 2139
    0  OK
    1  Error
2140
*/
unknown's avatar
unknown committed
2141
static my_bool translog_page_validator(uchar *page_addr, uchar* data_ptr)
2142
{
unknown's avatar
unknown committed
2143 2144
  uint this_page_page_overhead;
  uint flags;
unknown's avatar
unknown committed
2145
  uchar *page= (uchar*) page_addr, *page_pos;
unknown's avatar
unknown committed
2146 2147
  TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr;
  TRANSLOG_ADDRESS addr= *(data->addr);
2148 2149
  DBUG_ENTER("translog_page_validator");

unknown's avatar
unknown committed
2150
  data->was_recovered= 0;
2151

2152 2153
  if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE ||
      uint3korr(page + 3) != LSN_FILE_NO(addr))
2154 2155
  {
    UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
unknown's avatar
unknown committed
2156
                         "page address written in the page is incorrect: "
2157
                         "File %lu instead of %lu or page %lu instead of %lu",
unknown's avatar
unknown committed
2158
                         LSN_IN_PARTS(addr),
2159
                         (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr),
2160
                         (ulong) uint3korr(page),
2161
                         (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE));
2162 2163
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2164 2165
  flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
  this_page_page_overhead= page_overhead[flags];
2166 2167 2168 2169 2170
  if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                TRANSLOG_RECORD_CRC))
  {
    UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
                         "Garbage in the page flags field detected : %x",
unknown's avatar
unknown committed
2171
                         LSN_IN_PARTS(addr), (uint) flags));
2172 2173
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2174
  page_pos= page + (3 + 3 + 1);
2175 2176
  if (flags & TRANSLOG_PAGE_CRC)
  {
unknown's avatar
unknown committed
2177 2178 2179 2180
    uint32 crc= translog_crc(page + this_page_page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             this_page_page_overhead);
    if (crc != uint4korr(page_pos))
2181 2182 2183
    {
      UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
                           "CRC mismatch: calculated: %lx on the page %lx",
unknown's avatar
unknown committed
2184
                           LSN_IN_PARTS(addr),
unknown's avatar
unknown committed
2185
                           (ulong) crc, (ulong) uint4korr(page_pos)));
2186 2187
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2188
    page_pos+= CRC_LENGTH;                      /* Skip crc */
2189 2190 2191 2192
  }
  if (flags & TRANSLOG_SECTOR_PROTECTION)
  {
    uint i, offset;
unknown's avatar
unknown committed
2193
    uchar *table= page_pos;
2194
    uint16 current= uint2korr(table);
unknown's avatar
unknown committed
2195 2196 2197
    for (i= 2, offset= DISK_DRIVE_SECTOR_SIZE;
         i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
         i+= 2, offset+= DISK_DRIVE_SECTOR_SIZE)
2198 2199
    {
      /*
unknown's avatar
unknown committed
2200 2201
         TODO: add chunk counting for "suspecting" sectors (difference is
         more than 1-2)
2202 2203
      */
      uint16 test= uint2korr(page + offset);
unknown's avatar
unknown committed
2204 2205
      DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx "
                          "read: 0x%x  stored: 0x%x%x",
2206
                          i / 2, offset, (ulong) current,
2207 2208
                          (uint) uint2korr(page + offset), (uint) table[i],
                          (uint) table[i + 1]));
unknown's avatar
unknown committed
2209 2210 2211 2212
      if (((test < current) &&
           (LL(0xFFFF) - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) ||
          ((test >= current) &&
           (test - current > DISK_DRIVE_SECTOR_SIZE / 3)))
2213 2214 2215
      {
        if (translog_recover_page_up_to_sector(page, offset))
          DBUG_RETURN(1);
unknown's avatar
unknown committed
2216
        data->was_recovered= 1;
2217 2218 2219 2220 2221 2222 2223
        DBUG_RETURN(0);
      }

      /* Return value on the page */
      page[offset]= table[i];
      page[offset + 1]= table[i + 1];
      current= test;
unknown's avatar
unknown committed
2224 2225
      DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx  "
                          "read: 0x%x  stored: 0x%x%x",
2226
                          i / 2, offset, (ulong) current,
2227 2228 2229 2230 2231 2232 2233
                          (uint) uint2korr(page + offset), (uint) table[i],
                          (uint) table[i + 1]));
    }
  }
  DBUG_RETURN(0);
}

2234

2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287
/*
  Lock the loghandler

  SYNOPSIS
    translog_lock()

  RETURN
    0  OK
    1  Error
*/

my_bool translog_lock()
{
  struct st_translog_buffer *current_buffer;
  DBUG_ENTER("translog_lock");

  /*
     Locking the loghandler mean locking current buffer, but it can change
     during locking, so we should check it
  */
  for (;;)
  {
    current_buffer= log_descriptor.bc.buffer;
    if (translog_buffer_lock(current_buffer))
      DBUG_RETURN(1);
    if (log_descriptor.bc.buffer == current_buffer)
      break;
    translog_buffer_unlock(current_buffer);
  }
  DBUG_RETURN(0);
}


/*
  Unlock the loghandler

  SYNOPSIS
    translog_unlock()

  RETURN
    0  OK
    1  Error
*/

my_bool translog_unlock()
{
  DBUG_ENTER("translog_unlock");
  translog_buffer_unlock(log_descriptor.bc.buffer);

  DBUG_RETURN(0);
}


2288 2289
/**
  @brief Get log page by file number and offset of the beginning of the page
2290

2291 2292
  @param data            validator data, which contains the page address
  @param buffer          buffer for page placing
2293
                         (might not be used in some cache implementations)
2294 2295
  @param direct_link     if it is not NULL then caller can accept direct
                         link to the page cache
2296

2297 2298
  @retval NULL Error
  @retval #    pointer to the page cache which should be used to read this page
2299 2300
*/

2301
static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
unknown's avatar
unknown committed
2302
                                PAGECACHE_BLOCK_LINK **direct_link)
2303
{
2304
  TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
2305
  uint cache_index;
2306
  uint32 file_no= LSN_FILE_NO(addr);
2307
  DBUG_ENTER("translog_get_page");
unknown's avatar
unknown committed
2308
  DBUG_PRINT("enter", ("File: %lu  Offset: %lu(0x%lx)",
2309 2310 2311
                       (ulong) file_no,
                       (ulong) LSN_OFFSET(addr),
                       (ulong) LSN_OFFSET(addr)));
2312 2313

  /* it is really page address */
2314
  DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
2315

2316 2317 2318
  if (direct_link)
    *direct_link= NULL;

2319 2320
  in_buffers= translog_only_in_buffers();
  DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)",
unknown's avatar
unknown committed
2321
                      LSN_IN_PARTS(in_buffers)));
2322 2323 2324 2325 2326 2327 2328 2329 2330
  if (in_buffers != LSN_IMPOSSIBLE &&
      cmp_translog_addr(addr, in_buffers) >= 0)
  {
    translog_lock();
    /* recheck with locked loghandler */
    in_buffers= translog_only_in_buffers();
    if (cmp_translog_addr(addr, in_buffers) >= 0)
    {
      uint16 buffer_no= log_descriptor.bc.buffer_no;
2331
#ifndef DBUG_OFF
2332
      uint16 buffer_start= buffer_no;
2333
#endif
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350
      struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
      struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
      for (;;)
      {
        /*
          if the page is in the buffer and it is the last version of the
          page (in case of devision the page bu buffer flush
        */
        if (curr_buffer->file != -1 &&
            cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
            cmp_translog_addr(addr,
                              (curr_buffer->next_buffer_offset ?
                               curr_buffer->next_buffer_offset:
                               curr_buffer->offset + curr_buffer->size)) < 0)
        {
          int is_last_unfinished_page;
          uint last_protected_sector= 0;
2351
          uchar *from, *table= NULL;
2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420
          translog_wait_for_writers(curr_buffer);
          DBUG_ASSERT(LSN_FILE_NO(addr) ==  LSN_FILE_NO(curr_buffer->offset));
          from= curr_buffer->buffer + (addr - curr_buffer->offset);
          memcpy(buffer, from, TRANSLOG_PAGE_SIZE);
          is_last_unfinished_page= ((log_descriptor.bc.buffer ==
                                     curr_buffer) &&
                                    (log_descriptor.bc.ptr >= from) &&
                                    (log_descriptor.bc.ptr <
                                     from + TRANSLOG_PAGE_SIZE));
          if (is_last_unfinished_page &&
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
          {
            last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
                                    DISK_DRIVE_SECTOR_SIZE);
            table= buffer + log_descriptor.page_overhead -
              (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
          }

          DBUG_ASSERT(buffer_unlock == curr_buffer);
          translog_buffer_unlock(buffer_unlock);
          if (is_last_unfinished_page)
          {
            uint i;
            /*
              This is last unfinished page => we should not check CRC and
              remove only that protection which already installed (no need
              to check it)

              We do not check the flag of sector protection, because if
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
              not set then last_protected_sector will be 0 so following loop
              will be never executed
            */
            DBUG_PRINT("info", ("This is last unfinished page, "
                                "last protected sector %u",
                                last_protected_sector));
            for (i= 1; i <= last_protected_sector; i++)
            {
              uint index= i * 2;
              uint offset= i * DISK_DRIVE_SECTOR_SIZE;
              DBUG_PRINT("info", ("Sector %u: 0x%02x%02x <- 0x%02x%02x",
                                  i, buffer[offset],  buffer[offset + 1],
                                  table[index], table[index + 1]));
              buffer[offset]= table[index];
              buffer[offset + 1]= table[index + 1];
            }
          }
          else
          {
            /*
              This IF should be true because we use in-memory data which
              supposed to be correct.
            */
            if (translog_page_validator((uchar*) buffer, (uchar*) data))
              buffer= NULL;
          }
          DBUG_RETURN(buffer);
        }
        buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
        curr_buffer= log_descriptor.buffers + buffer_no;
        translog_buffer_lock(curr_buffer);
        translog_buffer_unlock(buffer_unlock);
        buffer_unlock= curr_buffer;
        /* we can't make full circle */
        DBUG_ASSERT(buffer_start != buffer_no);
      }
    }
    translog_unlock();
  }
2421
  if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) <
2422 2423 2424 2425
      OPENED_FILES_NUM)
  {
    PAGECACHE_FILE file;
    /* file in the cache */
unknown's avatar
unknown committed
2426
    if (log_descriptor.log_file_num[cache_index] == -1)
2427 2428
    {
      if ((log_descriptor.log_file_num[cache_index]=
unknown's avatar
unknown committed
2429
           open_logfile_by_number_no_cache(file_no)) == -1)
2430 2431 2432 2433
        DBUG_RETURN(NULL);
    }
    file.file= log_descriptor.log_file_num[cache_index];

2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450
    buffer=
      (uchar*) (direct_link ?
                pagecache_valid_read(log_descriptor.pagecache, &file,
                                     LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
                                     3, NULL,
                                     PAGECACHE_PLAIN_PAGE,
                                     PAGECACHE_LOCK_READ, direct_link,
                                     &translog_page_validator, (uchar*) data) :
                pagecache_valid_read(log_descriptor.pagecache, &file,
                                     LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
                                     3, (char*) buffer,
                                     PAGECACHE_PLAIN_PAGE,
                                     PAGECACHE_LOCK_LEFT_UNLOCKED, direct_link,
                                     &translog_page_validator, (uchar*) data));
    DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
                        (ulong) direct_link,
                        (ulong)(direct_link ? *direct_link : NULL)));
2451 2452 2453
  }
  else
  {
unknown's avatar
unknown committed
2454 2455 2456 2457 2458 2459 2460 2461
    /*
      TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT
      THE LAST USED FILES.  THIS WILL BE A NOTABLE PROBLEM IF WE ARE
      FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES.  WE WILL
      PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG
      FILES.
    */

2462
    File file= open_logfile_by_number_no_cache(file_no);
unknown's avatar
unknown committed
2463 2464
    if (file == -1)
        DBUG_RETURN(NULL);
2465
    if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE,
2466
                 LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME)))
2467
      buffer= NULL;
unknown's avatar
unknown committed
2468
    else if (translog_page_validator((uchar*) buffer, (uchar*) data))
2469 2470 2471 2472 2473 2474
      buffer= NULL;
    my_close(file, MYF(MY_WME));
  }
  DBUG_RETURN(buffer);
}

2475 2476 2477 2478 2479 2480 2481
/**
  @brief free direct log page link

  @param direct_link the direct log page link to be freed

*/

unknown's avatar
unknown committed
2482
static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
2483 2484 2485 2486 2487 2488 2489 2490 2491 2492
{
  DBUG_ENTER("translog_free_link");
  DBUG_PRINT("info", ("Direct link: 0x%lx",
                      (ulong) direct_link));
  if (direct_link)
    pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
                             PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE);
  DBUG_VOID_RETURN;
}
2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503

/*
  Finds last page of the given log file

  SYNOPSIS
    translog_get_last_page_addr()
    addr                 address structure to fill with data, which contain
                         file number of the log file
    last_page_ok         assigned 1 if last page was OK

  RETURN
unknown's avatar
unknown committed
2504 2505
    0  OK
    1  Error
2506 2507 2508 2509 2510 2511 2512
*/

static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
                                           my_bool *last_page_ok)
{
  MY_STAT stat_buff, *stat;
  char path[FN_REFLEN];
2513 2514
  uint32 rec_offset;
  uint32 file_no= LSN_FILE_NO(*addr);
2515 2516
  DBUG_ENTER("translog_get_last_page_addr");

unknown's avatar
unknown committed
2517 2518
  if (!(stat= my_stat(translog_filename_by_fileno(file_no, path),
                      &stat_buff, MYF(MY_WME))))
2519
    DBUG_RETURN(1);
unknown's avatar
unknown committed
2520
  DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size));
2521 2522
  if (stat->st_size > TRANSLOG_PAGE_SIZE)
  {
2523
    rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
2524
                       TRANSLOG_PAGE_SIZE);
2525
    *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
2526 2527 2528 2529
  }
  else
  {
    *last_page_ok= 0;
2530
    rec_offset= 0;
2531
  }
2532
  *addr= MAKE_LSN(file_no, rec_offset);
unknown's avatar
unknown committed
2533
  DBUG_PRINT("info", ("Last page: 0x%lx  ok: %d", (ulong) rec_offset,
2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548
                      *last_page_ok));
  DBUG_RETURN(0);
}


/*
  Get number bytes for record length storing

  SYNOPSIS
    translog_variable_record_length_bytes()
    length              Record length wich will be codded

  RETURN
    1,3,4,5 - number of bytes to store given length
*/
2549

2550 2551 2552 2553
static uint translog_variable_record_length_bytes(translog_size_t length)
{
  if (length < 250)
    return 1;
unknown's avatar
unknown committed
2554
  if (length < 0xFFFF)
2555
    return 3;
unknown's avatar
unknown committed
2556
  if (length < (ulong) 0xFFFFFF)
2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570
    return 4;
  return 5;
}


/*
  Get header of this chunk

  SYNOPSIS
    translog_get_chunk_header_length()
    page                 The page where chunk placed
    offset               Offset of the chunk on this place

  RETURN
unknown's avatar
unknown committed
2571 2572
    #  total length of the chunk
    0  Error
2573 2574
*/

unknown's avatar
unknown committed
2575
static uint16 translog_get_chunk_header_length(uchar *page, uint16 offset)
2576 2577
{
  DBUG_ENTER("translog_get_chunk_header_length");
unknown's avatar
unknown committed
2578 2579 2580
  page+= offset;
  switch (*page & TRANSLOG_CHUNK_TYPE) {
  case TRANSLOG_CHUNK_LSN:
2581
  {
unknown's avatar
unknown committed
2582
    /* 0 chunk referred as LSN (head or tail) */
2583
    translog_size_t rec_len;
unknown's avatar
unknown committed
2584 2585
    uchar *start= page;
    uchar *ptr= start + 1 + 2;
2586 2587 2588 2589
    uint16 chunk_len, header_len;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
2590 2591
    header_len= (ptr - start) +2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
2592 2593 2594
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
unknown's avatar
unknown committed
2595
      /* TODO: fine header end */
2596
      DBUG_ASSERT(0);
unknown's avatar
unknown committed
2597
      DBUG_RETURN(0);                               /* Keep compiler happy */
2598 2599 2600
    }
    DBUG_RETURN(header_len);
  }
unknown's avatar
unknown committed
2601
  case TRANSLOG_CHUNK_FIXED:
2602
  {
unknown's avatar
unknown committed
2603
    /* 1 (pseudo)fixed record (also LSN) */
2604 2605 2606
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
    DBUG_RETURN(3);
  }
unknown's avatar
unknown committed
2607 2608
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
2609 2610 2611
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
    DBUG_RETURN(1);
    break;
unknown's avatar
unknown committed
2612 2613
  case TRANSLOG_CHUNK_LNGTH:
    /* 3 chunk with chunk length */
2614 2615 2616 2617 2618
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
    DBUG_RETURN(3);
    break;
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
2619
    DBUG_RETURN(0);                               /* Keep compiler happy */
2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636
  }
}


/*
  Initialize transaction log

  SYNOPSIS
    translog_init()
    directory            Directory where log files are put
    log_file_max_size    max size of one log size (for new logs creation)
    server_version       version of MySQL server (MYSQL_VERSION_ID)
    server_id            server ID (replication & Co)
    pagecache            Page cache for the log reads
    flags                flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
                           TRANSLOG_RECORD_CRC)

2637 2638 2639
  TODO
    Free used resources in case of error.

2640
  RETURN
unknown's avatar
unknown committed
2641 2642
    0  OK
    1  Error
2643 2644 2645 2646 2647 2648 2649 2650 2651
*/

my_bool translog_init(const char *directory,
                      uint32 log_file_max_size,
                      uint32 server_version,
                      uint32 server_id, PAGECACHE *pagecache, uint flags)
{
  int i;
  int old_log_was_recovered= 0, logs_found= 0;
unknown's avatar
unknown committed
2652
  uint old_flags= flags;
2653
  TRANSLOG_ADDRESS sure_page, last_page, last_valid_page;
2654
  my_bool version_changed= 0;
2655
  DBUG_ENTER("translog_init");
unknown's avatar
unknown committed
2656
  DBUG_ASSERT(translog_inited == 0);
2657

2658
  loghandler_init();                            /* Safe to do many times */
2659

unknown's avatar
unknown committed
2660
  if (pthread_mutex_init(&log_descriptor.sent_to_file_lock,
2661 2662 2663 2664 2665
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.file_header_lock,
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.unfinished_files_lock,
                         MY_MUTEX_INIT_FAST) ||
2666 2667
      pthread_mutex_init(&log_descriptor.purger_lock,
                         MY_MUTEX_INIT_FAST) ||
2668 2669
      pthread_mutex_init(&log_descriptor.log_flush_lock,
                         MY_MUTEX_INIT_FAST) ||
2670 2671 2672
      init_dynamic_array(&log_descriptor.unfinished_files,
                         sizeof(struct st_file_counter),
                         10, 10 CALLER_INFO))
2673
    DBUG_RETURN(1);
2674 2675
  log_descriptor.min_file_number= 0;
  log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687

  /* Directory to store files */
  unpack_dirname(log_descriptor.directory, directory);

  if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
                                            O_RDONLY, MYF(MY_WME))) < 0)
  {
    UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'",
                         errno, log_descriptor.directory));
    DBUG_RETURN(1);
  }

2688
  log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702
  /* max size of one log size (for new logs creation) */
  log_descriptor.log_file_max_size=
    log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE);
  /* server version */
  log_descriptor.server_version= server_version;
  /* server ID */
  log_descriptor.server_id= server_id;
  /* Page cache for the log reads */
  log_descriptor.pagecache= pagecache;
  /* Flags */
  DBUG_ASSERT((flags &
               ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                 TRANSLOG_RECORD_CRC)) == 0);
  log_descriptor.flags= flags;
unknown's avatar
unknown committed
2703 2704 2705 2706 2707 2708 2709 2710 2711 2712
  for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
  {
     page_overhead[i]= 7;
     if (i & TRANSLOG_PAGE_CRC)
       page_overhead[i]+= CRC_LENGTH;
     if (i & TRANSLOG_SECTOR_PROTECTION)
       page_overhead[i]+= (TRANSLOG_PAGE_SIZE /
                           DISK_DRIVE_SECTOR_SIZE) * 2;
  }
  log_descriptor.page_overhead= page_overhead[flags];
2713 2714 2715 2716 2717 2718 2719 2720 2721
  log_descriptor.page_capacity_chunk_2=
    TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
  DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
  log_descriptor.buffer_capacity_chunk_2=
    (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
    log_descriptor.page_capacity_chunk_2;
  log_descriptor.half_buffer_capacity_chunk_2=
    log_descriptor.buffer_capacity_chunk_2 / 2;
  DBUG_PRINT("info",
unknown's avatar
unknown committed
2722
             ("Overhead: %u  pc2: %u  bc2: %u,  bc2/2: %u",
2723 2724 2725 2726 2727 2728 2729 2730 2731
              log_descriptor.page_overhead,
              log_descriptor.page_capacity_chunk_2,
              log_descriptor.buffer_capacity_chunk_2,
              log_descriptor.half_buffer_capacity_chunk_2));

  /* *** Current state of the log handler *** */

  /* Init log handler file handlers cache */
  for (i= 0; i < OPENED_FILES_NUM; i++)
unknown's avatar
unknown committed
2732
    log_descriptor.log_file_num[i]= -1;
2733 2734 2735 2736 2737 2738 2739

  /* just to init it somehow */
  translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);

  /* Buffers for log writing */
  for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
  {
unknown's avatar
unknown committed
2740 2741
    if (translog_buffer_init(log_descriptor.buffers + i))
      DBUG_RETURN(1);
2742 2743 2744
#ifndef DBUG_OFF
    log_descriptor.buffers[i].buffer_no= (uint8) i;
#endif
unknown's avatar
unknown committed
2745 2746
    DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
                        i, (ulong) log_descriptor.buffers + i));
2747 2748
  }

unknown's avatar
unknown committed
2749
  logs_found= (last_logno != FILENO_IMPOSSIBLE);
2750 2751 2752 2753 2754

  if (logs_found)
  {
    my_bool pageok;
    /*
unknown's avatar
unknown committed
2755
      TODO: scan directory for maria_log.XXXXXXXX files and find
2756
       highest XXXXXXXX & set logs_found
unknown's avatar
unknown committed
2757
      TODO: check that last checkpoint within present log addresses space
2758

unknown's avatar
unknown committed
2759
      find the log end
unknown's avatar
unknown committed
2760
    */
unknown's avatar
unknown committed
2761
    if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
2762
    {
2763
      DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
2764
      /* there was no checkpoints we will read from the beginning */
2765
      sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE);
2766 2767 2768 2769
    }
    else
    {
      sure_page= last_checkpoint_lsn;
2770 2771
      DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
      sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
2772
    }
2773
    log_descriptor.horizon= last_page= MAKE_LSN(last_logno,0);
2774 2775
    if (translog_get_last_page_addr(&last_page, &pageok))
      DBUG_RETURN(1);
2776
    if (LSN_OFFSET(last_page) == 0)
2777
    {
2778
      if (LSN_FILE_NO(last_page) == 1)
2779 2780 2781 2782 2783
      {
        logs_found= 0;                          /* file #1 has no pages */
      }
      else
      {
2784
        last_page-= LSN_ONE_FILE;
2785 2786 2787 2788 2789 2790 2791 2792 2793 2794
        if (translog_get_last_page_addr(&last_page, &pageok))
          DBUG_RETURN(1);
      }
    }
  }
  if (logs_found)
  {
    TRANSLOG_ADDRESS current_page= sure_page;
    my_bool pageok;

2795
    DBUG_ASSERT(sure_page <= last_page);
2796 2797 2798

    /* TODO: check page size */

unknown's avatar
unknown committed
2799
    last_valid_page= LSN_IMPOSSIBLE;
2800 2801 2802 2803
    /* scan and validate pages */
    do
    {
      TRANSLOG_ADDRESS current_file_last_page;
2804
      current_file_last_page= current_page;
2805 2806 2807 2808
      if (translog_get_last_page_addr(&current_file_last_page, &pageok))
        DBUG_RETURN(1);
      if (!pageok)
      {
2809 2810
        DBUG_PRINT("error", ("File %lu have no complete last page",
                             (ulong) LSN_FILE_NO(current_file_last_page)));
2811 2812 2813 2814 2815 2816 2817
        old_log_was_recovered= 1;
        /* This file is not written till the end so it should be last */
        last_page= current_file_last_page;
        /* TODO: issue warning */
      }
      do
      {
unknown's avatar
unknown committed
2818
        TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
2819
        uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
2820
        data.addr= &current_page;
2821
        if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
2822 2823 2824
          DBUG_RETURN(1);
        if (data.was_recovered)
        {
unknown's avatar
unknown committed
2825 2826 2827
          DBUG_PRINT("error", ("file no: %lu (%d)  "
                               "rec_offset: 0x%lx (%lu) (%d)",
                               (ulong) LSN_FILE_NO(current_page),
2828 2829 2830 2831
                               (uint3korr(page + 3) !=
                                LSN_FILE_NO(current_page)),
                               (ulong) LSN_OFFSET(current_page),
                               (ulong) (LSN_OFFSET(current_page) /
2832 2833
                                        TRANSLOG_PAGE_SIZE),
                               (uint3korr(page) !=
2834 2835
                                LSN_OFFSET(current_page) /
                                TRANSLOG_PAGE_SIZE)));
2836 2837 2838
          old_log_was_recovered= 1;
          break;
        }
unknown's avatar
unknown committed
2839
        old_flags= page[TRANSLOG_PAGE_FLAGS];
2840
        last_valid_page= current_page;
2841 2842 2843 2844 2845
        current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
      } while (current_page <= current_file_last_page);
      current_page+= LSN_ONE_FILE;
      current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
    } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
2846
             !old_log_was_recovered);
unknown's avatar
unknown committed
2847
    if (last_valid_page == LSN_IMPOSSIBLE)
2848 2849 2850 2851 2852
    {
      /* Panic!!! Even page which should be valid is invalid */
      /* TODO: issue error */
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2853 2854 2855 2856
    DBUG_PRINT("info", ("Last valid page is in file: %lu  "
                        "offset: %lu (0x%lx)  "
                        "Logs found: %d  was recovered: %d  "
                        "flags match: %d",
2857 2858 2859
                        (ulong) LSN_FILE_NO(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
unknown's avatar
unknown committed
2860 2861
                        logs_found, old_log_was_recovered,
                        (old_flags == flags)));
2862 2863

    /* TODO: check server ID */
unknown's avatar
unknown committed
2864
    if (logs_found && !old_log_was_recovered && old_flags == flags)
2865
    {
unknown's avatar
unknown committed
2866
      TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
2867
      uchar buffer[TRANSLOG_PAGE_SIZE], *page;
2868
      uint16 chunk_offset;
unknown's avatar
unknown committed
2869
      data.addr= &last_valid_page;
2870
      /* continue old log */
2871 2872
      DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
                  LSN_FILE_NO(log_descriptor.horizon));
2873
      if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
2874 2875 2876 2877 2878 2879 2880
          (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
        DBUG_RETURN(1);

      /* Puts filled part of old page in the buffer */
      log_descriptor.horizon= last_valid_page;
      translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
      /*
unknown's avatar
unknown committed
2881
         Free space if filled with 0 and first uchar of
2882 2883 2884 2885 2886 2887 2888 2889
         real chunk can't be 0
      */
      while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0')
      {
        uint16 chunk_length;
        if ((chunk_length=
             translog_get_total_chunk_length(page, chunk_offset)) == 0)
          DBUG_RETURN(1);
unknown's avatar
unknown committed
2890
        DBUG_PRINT("info", ("chunk: offset: %u  length: %u",
2891 2892 2893 2894 2895 2896
                            (uint) chunk_offset, (uint) chunk_length));
        chunk_offset+= chunk_length;

        /* chunk can't cross the page border */
        DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
      }
unknown's avatar
unknown committed
2897
      memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
2898 2899
      log_descriptor.bc.buffer->size+= chunk_offset;
      log_descriptor.bc.ptr+= chunk_offset;
unknown's avatar
unknown committed
2900
      log_descriptor.bc.current_page_fill= chunk_offset;
2901 2902 2903
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 (chunk_offset +
                                                  LSN_OFFSET(last_valid_page)));
unknown's avatar
unknown committed
2904
      DBUG_PRINT("info", ("Move Page #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
2905 2906 2907 2908
                          (uint) log_descriptor.bc.buffer_no,
                          (ulong) log_descriptor.bc.buffer,
                          log_descriptor.bc.chaser,
                          (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
2909
                          (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
2910
                                   buffer->buffer)));
unknown's avatar
unknown committed
2911
      DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
2912
    }
2913 2914 2915
    if (!old_log_was_recovered && old_flags == flags)
    {
      LOGHANDLER_FILE_INFO info;
2916
      if (translog_read_file_header(&info, log_descriptor.log_file_num[0]))
2917 2918 2919
        DBUG_RETURN(1);
      version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
    }
2920
  }
unknown's avatar
unknown committed
2921
  DBUG_PRINT("info", ("Logs found: %d  was recovered: %d",
2922 2923 2924 2925 2926
                      logs_found, old_log_was_recovered));
  if (!logs_found)
  {
    /* Start new log system from scratch */
    /* Used space */
unknown's avatar
unknown committed
2927
    log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */
2928
    /* Current logs file number in page cache */
unknown's avatar
unknown committed
2929 2930 2931
    if ((log_descriptor.log_file_num[0]=
         open_logfile_by_number_no_cache(1)) == -1 ||
        translog_write_file_header())
2932
      DBUG_RETURN(1);
unknown's avatar
unknown committed
2933
    if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, 1,
2934 2935 2936 2937 2938 2939
                                        CONTROL_FILE_UPDATE_ONLY_LOGNO))
      DBUG_RETURN(1);
    /* assign buffer 0 */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  }
2940
  else if (old_log_was_recovered || old_flags != flags || version_changed)
unknown's avatar
unknown committed
2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954
  {
    /* leave the damaged file untouched */
    log_descriptor.horizon+= LSN_ONE_FILE;
    /* header page */
    log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                               TRANSLOG_PAGE_SIZE);
    if (translog_create_new_file())
      DBUG_RETURN(1);
    /*
      Buffer system left untouched after recovery => we should init it
      (starting from buffer 0)
    */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
2955 2956 2957
  }

  /* all LSNs that are on disk are flushed */
2958 2959 2960
  log_descriptor.sent_to_file=
    log_descriptor.flushed= log_descriptor.horizon;
  log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
2961
  log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
unknown's avatar
unknown committed
2962 2963 2964 2965
  /*
    horizon is (potentially) address of the next LSN we need decrease
    it to signal that all LSNs before it are flushed
  */
2966 2967
  log_descriptor.flushed--; /* offset decreased */
  log_descriptor.sent_to_file--; /* offset decreased */
2968 2969 2970 2971 2972
  /*
    Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
    structures for generating 2-byte ids:
  */
  my_atomic_rwlock_init(&LOCK_id_to_share);
unknown's avatar
unknown committed
2973 2974
  id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
                                          MYF(MY_WME | MY_ZEROFILL));
2975 2976 2977
  if (unlikely(!id_to_share))
    DBUG_RETURN(1);
  id_to_share--; /* min id is 1 */
2978
  translog_inited= 1;
2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990
  DBUG_RETURN(0);
}


/*
  Free transaction log file buffer

  SYNOPSIS
    translog_buffer_destroy()
    buffer_no            The buffer to free

  NOTE
unknown's avatar
unknown committed
2991
    This buffer should be locked
2992 2993 2994 2995 2996 2997
*/

static void translog_buffer_destroy(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_destroy");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
2998
             ("Buffer #%u: 0x%lx  file: %d  offset: (%lu,0x%lx)  size: %lu",
2999
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
3000
              buffer->file,
unknown's avatar
unknown committed
3001
              LSN_IN_PARTS(buffer->offset),
3002 3003
              (ulong) buffer->size));
  DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0);
unknown's avatar
unknown committed
3004
  if (buffer->file != -1)
3005 3006
  {
    /*
unknown's avatar
unknown committed
3007
       We ignore errors here, because we can't do something about it
3008 3009 3010 3011
       (it is shutting down)
    */
    translog_buffer_flush(buffer);
  }
unknown's avatar
unknown committed
3012
  DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026
  pthread_mutex_destroy(&buffer->mutex);
  DBUG_VOID_RETURN;
}


/*
  Free log handler resources

  SYNOPSIS
    translog_destroy()
*/

void translog_destroy()
{
unknown's avatar
unknown committed
3027
  uint i;
3028
  DBUG_ENTER("translog_destroy");
3029

3030
  if (translog_inited)
3031
  {
3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047
    if (log_descriptor.bc.buffer->file != -1)
      translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);

    for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
    {
      struct st_translog_buffer *buffer= log_descriptor.buffers + i;
      translog_buffer_destroy(buffer);
    }

    /* close files */
    for (i= 0; i < OPENED_FILES_NUM; i++)
    {
      if (log_descriptor.log_file_num[i] != -1)
        translog_close_log_file(log_descriptor.log_file_num[i]);
    }
    pthread_mutex_destroy(&log_descriptor.sent_to_file_lock);
3048 3049
    pthread_mutex_destroy(&log_descriptor.file_header_lock);
    pthread_mutex_destroy(&log_descriptor.unfinished_files_lock);
3050
    pthread_mutex_destroy(&log_descriptor.purger_lock);
3051
    pthread_mutex_destroy(&log_descriptor.log_flush_lock);
3052 3053
    delete_dynamic(&log_descriptor.unfinished_files);

3054
    my_close(log_descriptor.directory_fd, MYF(MY_WME));
3055
    my_atomic_rwlock_destroy(&LOCK_id_to_share);
unknown's avatar
unknown committed
3056
    my_free((uchar*)(id_to_share + 1), MYF(MY_ALLOW_ZERO_PTR));
3057
    translog_inited= 0;
3058 3059 3060 3061 3062
  }
  DBUG_VOID_RETURN;
}


unknown's avatar
unknown committed
3063

3064

3065

3066 3067 3068 3069 3070 3071 3072 3073
/*
  Start new page

  SYNOPSIS
    translog_page_next()
    horizon              \ Position in file and buffer where we are
    cursor               /
    prev_buffer          Buffer which should be flushed will be assigned
unknown's avatar
unknown committed
3074
                         here if it is need. This is always set.
3075 3076 3077 3078 3079

  NOTE
    handler should be locked

  RETURN
unknown's avatar
unknown committed
3080 3081
    0  OK
    1  Error
3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092
*/

static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
                                  struct st_buffer_cursor *cursor,
                                  struct st_translog_buffer **prev_buffer)
{
  struct st_translog_buffer *buffer= cursor->buffer;
  DBUG_ENTER("translog_page_next");

  if ((cursor->ptr +TRANSLOG_PAGE_SIZE >
       cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
3093 3094
      (LSN_OFFSET(*horizon) >
       log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
3095
  {
unknown's avatar
unknown committed
3096 3097
    DBUG_PRINT("info", ("Switch to next buffer  Buffer Size: %lu (%lu) => %d  "
                        "File size: %lu  max: %lu => %d",
3098
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3099
                        (ulong) (cursor->ptr - cursor->buffer->buffer),
3100
                        (cursor->ptr + TRANSLOG_PAGE_SIZE >
3101
                         cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
3102
                        (ulong) LSN_OFFSET(*horizon),
3103
                        (ulong) log_descriptor.log_file_max_size,
3104 3105 3106
                        (LSN_OFFSET(*horizon) >
                         (log_descriptor.log_file_max_size -
                          TRANSLOG_PAGE_SIZE))));
3107
    if (translog_buffer_next(horizon, cursor,
3108 3109 3110
                             LSN_OFFSET(*horizon) >
                             (log_descriptor.log_file_max_size -
                              TRANSLOG_PAGE_SIZE)))
3111 3112
      DBUG_RETURN(1);
    *prev_buffer= buffer;
unknown's avatar
unknown committed
3113
    DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
3114 3115 3116 3117
                        (uint) buffer->buffer_no, (ulong) buffer));
  }
  else
  {
unknown's avatar
unknown committed
3118 3119
    DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
                        "Buffer Size: %lu (%lu)",
3120 3121 3122
                        (uint) buffer->buffer_no,
                        (ulong) buffer,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3123
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142
    translog_finish_page(horizon, cursor);
    translog_new_page_header(horizon, cursor);
    *prev_buffer= NULL;
  }
  DBUG_RETURN(0);
}


/*
  Write data of given length to the current page

  SYNOPSIS
    translog_write_data_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    buffer               buffer with data

  RETURN
unknown's avatar
unknown committed
3143 3144
    0  OK
    1  Error
3145 3146
*/

3147 3148 3149
static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor,
                                           translog_size_t length,
unknown's avatar
unknown committed
3150
                                           uchar *buffer)
3151 3152
{
  DBUG_ENTER("translog_write_data_on_page");
unknown's avatar
unknown committed
3153 3154
  DBUG_PRINT("enter", ("Chunk length: %lu  Page size %u",
                       (ulong) length, (uint) cursor->current_page_fill));
3155
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3156
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
3157 3158 3159
  DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
              TRANSLOG_WRITE_BUFFER);

unknown's avatar
unknown committed
3160
  memcpy(cursor->ptr, buffer, length);
3161
  cursor->ptr+= length;
unknown's avatar
unknown committed
3162 3163
  (*horizon)+= length; /* adds offset */
  cursor->current_page_fill+= length;
3164 3165
  if (!cursor->chaser)
    cursor->buffer->size+= length;
unknown's avatar
unknown committed
3166 3167
  DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)",
3168 3169
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3170
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
3171
  DBUG_EXECUTE("info", translog_check_cursor(cursor););
3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187

  DBUG_RETURN(0);
}


/*
  Write data from parts of given length to the current page

  SYNOPSIS
    translog_write_parts_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    parts                IN/OUT chunk source

  RETURN
unknown's avatar
unknown committed
3188 3189
    0  OK
    1  Error
3190 3191
*/

3192 3193 3194 3195
static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
                                            struct st_buffer_cursor *cursor,
                                            translog_size_t length,
                                            struct st_translog_parts *parts)
3196 3197 3198 3199
{
  translog_size_t left= length;
  uint cur= (uint) parts->current;
  DBUG_ENTER("translog_write_parts_on_page");
unknown's avatar
unknown committed
3200
  DBUG_PRINT("enter", ("Chunk length: %lu  parts: %u of %u. Page size: %u  "
3201 3202
                       "Buffer size: %lu (%lu)",
                       (ulong) length,
3203
                       (uint) (cur + 1), (uint) parts->elements,
unknown's avatar
unknown committed
3204
                       (uint) cursor->current_page_fill,
3205
                       (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3206
                       (ulong) (cursor->ptr - cursor->buffer->buffer)));
3207
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3208
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
3209 3210 3211 3212 3213 3214
  DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
              TRANSLOG_WRITE_BUFFER);

  do
  {
    translog_size_t len;
3215
    LEX_STRING *part;
unknown's avatar
unknown committed
3216
    uchar *buff;
3217

3218 3219
    DBUG_ASSERT(cur < parts->elements);
    part= parts->parts + cur;
unknown's avatar
unknown committed
3220
    buff= (uchar*) part->str;
3221 3222 3223
    DBUG_PRINT("info", ("Part: %u  Length: %lu  left: %lu  buff: 0x%lx",
                        (uint) (cur + 1), (ulong) part->length, (ulong) left,
                        (ulong) buff));
3224

3225
    if (part->length > left)
3226 3227 3228
    {
      /* we should write less then the current part */
      len= left;
3229 3230
      part->length-= len;
      part->str+= len;
unknown's avatar
unknown committed
3231
      DBUG_PRINT("info", ("Set new part: %u  Length: %lu",
3232
                          (uint) (cur + 1), (ulong) part->length));
3233 3234 3235
    }
    else
    {
3236
      len= part->length;
3237 3238 3239 3240 3241
      cur++;
      DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
    }
    DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx  %u",
                        (ulong) cursor->ptr, (ulong)buff, (uint)len));
3242 3243 3244 3245 3246 3247
    if (likely(len))
    {
      memcpy(cursor->ptr, buff, len);
      left-= len;
      cursor->ptr+= len;
    }
3248 3249
  } while (left);

unknown's avatar
unknown committed
3250
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)  Length %lu(0x%lx)",
unknown's avatar
unknown committed
3251
                      LSN_IN_PARTS(*horizon),
3252
                      (ulong) length, (ulong) length));
3253
  parts->current= cur;
unknown's avatar
unknown committed
3254 3255
  (*horizon)+= length; /* offset increasing */
  cursor->current_page_fill+= length;
3256 3257
  if (!cursor->chaser)
    cursor->buffer->size+= length;
3258 3259
  DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)  "
unknown's avatar
unknown committed
3260
                      "Horizon: (%lu,0x%lx)  buff offset: 0x%lx",
3261 3262
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3263
                      (ulong) (cursor->ptr - cursor->buffer->buffer),
unknown's avatar
unknown committed
3264
                      LSN_IN_PARTS(*horizon),
3265 3266
                      (ulong) (LSN_OFFSET(cursor->buffer->offset) +
                               cursor->buffer->size)));
unknown's avatar
unknown committed
3267
  DBUG_EXECUTE("info", translog_check_cursor(cursor););
3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278

  DBUG_RETURN(0);
}


/*
  Put 1 group chunk type 0 header into parts array

  SYNOPSIS
    translog_write_variable_record_1group_header()
    parts                Descriptor of record source parts
unknown's avatar
unknown committed
3279
    type                 The log record type
3280
    short_trid           Short transaction ID or 0 if it has no sense
3281 3282 3283 3284 3285 3286 3287 3288 3289
    header_length        Calculated header length of chunk type 0
    chunk0_header        Buffer for the chunk header writing
*/

static void
translog_write_variable_record_1group_header(struct st_translog_parts *parts,
                                             enum translog_record_type type,
                                             SHORT_TRANSACTION_ID short_trid,
                                             uint16 header_length,
unknown's avatar
unknown committed
3290
                                             uchar *chunk0_header)
3291
{
3292
  LEX_STRING *part;
unknown's avatar
unknown committed
3293
  DBUG_ASSERT(parts->current != 0);     /* first part is left for header */
3294 3295 3296
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= header_length);
  part->str= (char*)chunk0_header;
unknown's avatar
unknown committed
3297
  /* puts chunk type */
unknown's avatar
unknown committed
3298
  *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
3299
  int2store(chunk0_header + 1, short_trid);
unknown's avatar
unknown committed
3300
  /* puts record length */
3301 3302 3303
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
unknown's avatar
unknown committed
3304
  /* puts 0 as chunk length which indicate 1 group record */
3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316
  int2store(chunk0_header + header_length - 2, 0);
}


/*
  Increase number of writers for this buffer

  SYNOPSIS
    translog_buffer_increase_writers()
    buffer               target buffer
*/

unknown's avatar
unknown committed
3317 3318
static inline void
translog_buffer_increase_writers(struct st_translog_buffer *buffer)
3319 3320
{
  DBUG_ENTER("translog_buffer_increase_writers");
unknown's avatar
unknown committed
3321
  translog_buffer_lock_assert_owner(buffer);
3322
  buffer->copy_to_buffer_in_progress++;
unknown's avatar
unknown committed
3323
  DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d",
3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341
                      (uint) buffer->buffer_no, (ulong) buffer,
                      buffer->copy_to_buffer_in_progress));
  DBUG_VOID_RETURN;
}


/*
  Decrease number of writers for this buffer

  SYNOPSIS
    translog_buffer_decrease_writers()
    buffer               target buffer
*/


static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_decrease_writers");
unknown's avatar
unknown committed
3342
  translog_buffer_lock_assert_owner(buffer);
3343
  buffer->copy_to_buffer_in_progress--;
unknown's avatar
unknown committed
3344
  DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d",
3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363
                      (uint) buffer->buffer_no, (ulong) buffer,
                      buffer->copy_to_buffer_in_progress));
  if (buffer->copy_to_buffer_in_progress == 0 &&
      buffer->waiting_filling_buffer.last_thread != NULL)
    wqueue_release_queue(&buffer->waiting_filling_buffer);
  DBUG_VOID_RETURN;
}


/*
  Put chunk 2 from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk2_page()
    parts                Descriptor of record source parts
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
3364 3365
    0  OK
    1  Error
3366 3367 3368 3369 3370 3371 3372
*/

static my_bool
translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
3373
  struct st_translog_buffer *buffer_to_flush;
3374
  int rc;
unknown's avatar
unknown committed
3375
  uchar chunk2_header[1];
3376
  DBUG_ENTER("translog_write_variable_record_chunk2_page");
unknown's avatar
unknown committed
3377
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
3378

unknown's avatar
unknown committed
3379
  LINT_INIT(buffer_to_flush);
3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

unknown's avatar
unknown committed
3392
  /* Puts chunk type */
3393
  translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
unknown's avatar
unknown committed
3394
  /* Puts chunk body */
3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411
  translog_write_parts_on_page(horizon, cursor,
                               log_descriptor.page_capacity_chunk_2, parts);
  DBUG_RETURN(0);
}


/*
  Put chunk 3 of requested length in the buffer from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk3_page()
    parts                Descriptor of record source parts
    length               Length of this chunk
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
3412 3413
    0  OK
    1  Error
3414 3415 3416 3417 3418 3419 3420 3421
*/

static my_bool
translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
                                           uint16 length,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
3422
  struct st_translog_buffer *buffer_to_flush;
3423
  LEX_STRING *part;
3424
  int rc;
unknown's avatar
unknown committed
3425
  uchar chunk3_header[1 + 2];
3426 3427
  DBUG_ENTER("translog_write_variable_record_chunk3_page");

unknown's avatar
unknown committed
3428
  LINT_INIT(buffer_to_flush);
3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);
  if (length == 0)
  {
    /* It was call to write page header only (no data for chunk 3) */
    DBUG_PRINT("info", ("It is a call to make page header only"));
    DBUG_RETURN(0);
  }

unknown's avatar
unknown committed
3447
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
3448 3449 3450
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk3_header;
unknown's avatar
unknown committed
3451
  /* Puts chunk type */
unknown's avatar
unknown committed
3452
  *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
unknown's avatar
unknown committed
3453
  /* Puts chunk length */
3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469
  int2store(chunk3_header + 1, length);

  translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
  DBUG_RETURN(0);
}

/*
  Move log pointer (horizon) on given number pages starting from next page,
  and given offset on the last page

  SYNOPSIS
    translog_advance_pointer()
    pages                Number of full pages starting from the next one
    last_page_data       Plus this data on the last page

  RETURN
unknown's avatar
unknown committed
3470 3471
    0  OK
    1  Error
3472 3473 3474 3475
*/

static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
{
unknown's avatar
unknown committed
3476 3477
  translog_size_t last_page_offset= (log_descriptor.page_overhead +
                                     last_page_data);
3478
  translog_size_t offset= (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
3479
                           log_descriptor.bc.current_page_fill +
3480
                           pages * TRANSLOG_PAGE_SIZE + last_page_offset);
3481 3482
  translog_size_t buffer_end_offset, file_end_offset, min_offset;
  DBUG_ENTER("translog_advance_pointer");
3483
  DBUG_PRINT("enter", ("Pointer:  (%lu, 0x%lx) + %u + %u pages + %u + %u",
unknown's avatar
unknown committed
3484
                       LSN_IN_PARTS(log_descriptor.horizon),
3485
                       (uint) (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
3486
                               log_descriptor.bc.current_page_fill),
3487 3488 3489 3490 3491
                       pages, (uint) log_descriptor.page_overhead,
                       (uint) last_page_data));

  for (;;)
  {
unknown's avatar
unknown committed
3492
    uint8 new_buffer_no;
3493 3494 3495
    struct st_translog_buffer *new_buffer;
    struct st_translog_buffer *old_buffer;
    buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
unknown's avatar
unknown committed
3496 3497 3498
    file_end_offset= (log_descriptor.log_file_max_size -
                      LSN_OFFSET(log_descriptor.horizon));
    DBUG_PRINT("info", ("offset: %lu  buffer_end_offs: %lu, "
3499 3500 3501 3502 3503 3504 3505 3506
                        "file_end_offs:  %lu",
                        (ulong) offset, (ulong) buffer_end_offset,
                        (ulong) file_end_offset));
    DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
                        "0x%lx (0x%lx)",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (uint) log_descriptor.bc.buffer_no,
                        (ulong) log_descriptor.bc.buffer,
3507
                        (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
3508
                        (ulong) log_descriptor.bc.buffer->size,
3509
                        (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
3510
                                 log_descriptor.bc.buffer->size),
3511 3512
                        (ulong) LSN_OFFSET(log_descriptor.horizon)));
    DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
3513
                log_descriptor.bc.buffer->size ==
3514
                LSN_OFFSET(log_descriptor.horizon));
3515 3516 3517 3518 3519 3520 3521 3522 3523 3524

    if (offset <= buffer_end_offset && offset <= file_end_offset)
      break;
    old_buffer= log_descriptor.bc.buffer;
    new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
    new_buffer= log_descriptor.buffers + new_buffer_no;

    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
3525
    min_offset= min(buffer_end_offset, file_end_offset);
unknown's avatar
unknown committed
3526
    /* TODO: check is it ptr or size enough */
3527
    log_descriptor.bc.buffer->size+= min_offset;
3528
    log_descriptor.bc.ptr+= min_offset;
unknown's avatar
unknown committed
3529
    DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
3530 3531 3532 3533 3534 3535
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        log_descriptor.bc.chaser,
                        (ulong) log_descriptor.bc.buffer->size,
                        (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                 buffer->buffer)));
unknown's avatar
unknown committed
3536 3537
    DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
                         log_descriptor.bc.buffer->buffer) ==
3538 3539 3540 3541 3542 3543 3544
                log_descriptor.bc.buffer->size);
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
    translog_buffer_increase_writers(log_descriptor.bc.buffer);

    if (file_end_offset <= buffer_end_offset)
    {
3545 3546 3547
      log_descriptor.horizon+= LSN_ONE_FILE;
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3548
      DBUG_PRINT("info", ("New file: %lu",
3549
                          (ulong) LSN_FILE_NO(log_descriptor.horizon)));
3550 3551 3552 3553 3554 3555 3556 3557
      if (translog_create_new_file())
      {
        DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("info", ("The same file"));
3558
      log_descriptor.horizon+= min_offset; /* offset increasing */
3559 3560
    }
    translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
3561
    old_buffer->next_buffer_offset= new_buffer->offset;
3562 3563 3564 3565 3566 3567 3568
    if (translog_buffer_unlock(old_buffer))
      DBUG_RETURN(1);
    offset-= min_offset;
  }
  log_descriptor.bc.ptr+= offset;
  log_descriptor.bc.buffer->size+= offset;
  translog_buffer_increase_writers(log_descriptor.bc.buffer);
3569
  log_descriptor.horizon+= offset; /* offset increasing */
unknown's avatar
unknown committed
3570
  log_descriptor.bc.current_page_fill= last_page_offset;
3571 3572 3573
  DBUG_PRINT("info", ("drop write_counter"));
  log_descriptor.bc.write_counter= 0;
  log_descriptor.bc.previous_offset= 0;
unknown's avatar
unknown committed
3574 3575
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)  "
                      "offset: %u  last page: %u",
3576 3577 3578 3579
                      (uint) log_descriptor.bc.buffer->buffer_no,
                      (ulong) log_descriptor.bc.buffer,
                      log_descriptor.bc.chaser,
                      (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
3580 3581
                      (ulong) (log_descriptor.bc.ptr -
                               log_descriptor.bc.buffer->
3582 3583 3584
                               buffer), (uint) offset,
                      (uint) last_page_offset));
  DBUG_PRINT("info",
3585
             ("pointer moved to: (%lu, 0x%lx)",
unknown's avatar
unknown committed
3586
              LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
3587
  DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606
  log_descriptor.bc.protected= 0;
  DBUG_RETURN(0);
}



/*
  Get page rest

  SYNOPSIS
    translog_get_current_page_rest()

  NOTE loghandler should be locked

  RETURN
    number of bytes left on the current page
*/

#define translog_get_current_page_rest() \
unknown's avatar
unknown committed
3607
  (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill)
3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642

/*
  Get buffer rest in full pages

  SYNOPSIS
     translog_get_current_buffer_rest()

  NOTE loghandler should be locked

  RETURN
    number of full pages left on the current buffer
*/

#define translog_get_current_buffer_rest() \
  ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \
    log_descriptor.bc.ptr) / \
   TRANSLOG_PAGE_SIZE)

/*
  Calculate possible group size without first (current) page

  SYNOPSIS
    translog_get_current_group_size()

  NOTE loghandler should be locked

  RETURN
    group size without first (current) page
*/

static translog_size_t translog_get_current_group_size()
{
  /* buffer rest in full pages */
  translog_size_t buffer_rest= translog_get_current_buffer_rest();
  DBUG_ENTER("translog_get_current_group_size");
unknown's avatar
unknown committed
3643
  DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
3644 3645 3646 3647 3648

  buffer_rest*= log_descriptor.page_capacity_chunk_2;
  /* in case of only half of buffer free we can write this and next buffer */
  if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
  {
unknown's avatar
unknown committed
3649 3650
    DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
                        (ulong) buffer_rest,
3651 3652 3653 3654
                        (ulong) log_descriptor.buffer_capacity_chunk_2));
    buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
  }

unknown's avatar
unknown committed
3655
  DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
3656 3657 3658 3659 3660

  DBUG_RETURN(buffer_rest);
}


unknown's avatar
unknown committed
3661 3662
/**
   @brief Write variable record in 1 group.
3663

unknown's avatar
unknown committed
3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
3674

unknown's avatar
unknown committed
3675 3676 3677
   @return Operation status
     @retval 0      OK
     @retval 1      Error
3678 3679 3680 3681 3682
*/

static my_bool
translog_write_variable_record_1group(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
3683
                                      MARIA_HA *tbl_info,
3684 3685 3686 3687
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
3688
                                      TRN *trn, void *hook_arg)
3689 3690 3691 3692 3693 3694 3695
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i;
  translog_size_t record_rest, full_pages, first_page;
  uint additional_chunk3_page= 0;
unknown's avatar
unknown committed
3696
  uchar chunk0_header[1 + 2 + 5 + 2];
3697 3698 3699
  DBUG_ENTER("translog_write_variable_record_1group");

  *lsn= horizon= log_descriptor.horizon;
3700 3701 3702 3703
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
3704
                                                        lsn, hook_arg)))
3705
  {
unknown's avatar
unknown committed
3706
    translog_unlock();
3707 3708 3709 3710 3711
    DBUG_RETURN(1);
  }
  cursor= log_descriptor.bc;
  cursor.chaser= 1;

unknown's avatar
unknown committed
3712
  /* Advance pointer To be able unlock the loghandler */
3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725
  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - header_length);
  full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
  record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);

  if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
  {
    DBUG_PRINT("info", ("2 chunks type 3 is needed"));
    /* We will write 2 chunks type 3 at the end of this group */
    additional_chunk3_page= 1;
    record_rest= 1;
  }

unknown's avatar
unknown committed
3726 3727
  DBUG_PRINT("info", ("first_page: %u (%u)  full_pages: %u (%lu)  "
                      "additional: %u (%u)  rest %u = %u",
3728 3729 3730 3731 3732 3733 3734 3735 3736
                      first_page, first_page - header_length,
                      full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      additional_chunk3_page,
                      additional_chunk3_page *
                      (log_descriptor.page_capacity_chunk_2 - 1),
                      record_rest, parts->record_length));
  /* record_rest + 3 is chunk type 3 overhead + record_rest */
unknown's avatar
unknown committed
3737 3738
  rc|= translog_advance_pointer(full_pages + additional_chunk3_page,
                                (record_rest ? record_rest + 3 : 0));
3739 3740 3741 3742 3743
  log_descriptor.bc.buffer->last_lsn= *lsn;

  rc|= translog_unlock();

  /*
unknown's avatar
unknown committed
3744
     Check if we switched buffer and need process it (current buffer is
3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  /* fill the pages */
  translog_write_parts_on_page(&horizon, &cursor, first_page, parts);


unknown's avatar
unknown committed
3763
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
3764 3765
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
3766 3767 3768 3769 3770 3771

  for (i= 0; i < full_pages; i++)
  {
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
      DBUG_RETURN(1);

unknown's avatar
unknown committed
3772
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
3773 3774
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
3775 3776 3777 3778 3779 3780 3781 3782 3783
  }

  if (additional_chunk3_page)
  {
    if (translog_write_variable_record_chunk3_page(parts,
                                                   log_descriptor.
                                                   page_capacity_chunk_2 - 2,
                                                   &horizon, &cursor))
      DBUG_RETURN(1);
unknown's avatar
unknown committed
3784
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
3785 3786
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
unknown's avatar
unknown committed
3787
    DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
3788 3789 3790 3791 3792 3793
  }

  if (translog_write_variable_record_chunk3_page(parts,
                                                 record_rest,
                                                 &horizon, &cursor))
    DBUG_RETURN(1);
unknown's avatar
unknown committed
3794
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
3795 3796 3797 3798
                        (ulong) LSN_FILE_NO(log_descriptor.horizon),
                        (ulong) LSN_OFFSET(log_descriptor.horizon),
                        (ulong) LSN_FILE_NO(horizon),
                        (ulong) LSN_OFFSET(horizon)));
3799

unknown's avatar
unknown committed
3800
  if (!(rc= translog_buffer_lock(cursor.buffer)))
3801 3802
  {
    /*
unknown's avatar
unknown committed
3803
       Check if we wrote something on 1:st not full page and need to reconstruct
3804 3805 3806 3807 3808 3809 3810 3811 3812
       CRC and sector protection
    */
    translog_buffer_decrease_writers(cursor.buffer);
  }
  rc|= translog_buffer_unlock(cursor.buffer);
  DBUG_RETURN(rc);
}


unknown's avatar
unknown committed
3813 3814
/**
   @brief Write variable record in 1 chunk.
3815

unknown's avatar
unknown committed
3816 3817 3818 3819 3820 3821 3822 3823 3824 3825
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
3826

unknown's avatar
unknown committed
3827 3828 3829
   @return Operation status
     @retval 0      OK
     @retval 1      Error
3830 3831 3832 3833 3834
*/

static my_bool
translog_write_variable_record_1chunk(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
3835
                                      MARIA_HA *tbl_info,
3836 3837 3838 3839
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
3840
                                      TRN *trn, void *hook_arg)
3841 3842
{
  int rc;
unknown's avatar
unknown committed
3843
  uchar chunk0_header[1 + 2 + 5 + 2];
3844 3845 3846 3847 3848 3849
  DBUG_ENTER("translog_write_variable_record_1chunk");

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  *lsn= log_descriptor.horizon;
3850 3851 3852 3853
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                                 *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
3854
                                                        lsn, hook_arg)))
3855
  {
unknown's avatar
unknown committed
3856
    translog_unlock();
3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887
    DBUG_RETURN(1);
  }

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);
  log_descriptor.bc.buffer->last_lsn= *lsn;
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


/*
  Calculate and write LSN difference (compressed LSN)

  SYNOPSIS
    translog_put_LSN_diff()
    base_lsn             LSN from which we calculate difference
    lsn                  LSN for codding
unknown's avatar
unknown committed
3888
    dst                  Result will be written to dst[-pack_length] .. dst[-1]
3889 3890

  NOTE:
unknown's avatar
unknown committed
3891
    To store an LSN in a compact way we will use the following compression:
3892

unknown's avatar
unknown committed
3893 3894
    If a log record has LSN1, and it contains the lSN2 as a back reference,
    Instead of LSN2 we write LSN1-LSN2, encoded as:
3895 3896 3897 3898 3899

     two bits     the number N (see below)
     14 bits
     N bytes

unknown's avatar
unknown committed
3900
     That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
3901 3902 3903
     is stored in the first two bits.

  RETURN
unknown's avatar
unknown committed
3904 3905
    #     pointer on coded LSN
    NULL  Error
3906 3907
*/

unknown's avatar
unknown committed
3908
static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
3909 3910
{
  DBUG_ENTER("translog_put_LSN_diff");
unknown's avatar
unknown committed
3911
  DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx)  val: (0x%lu,0x%lx)  dst: 0x%lx",
unknown's avatar
unknown committed
3912 3913
                       LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
                       (ulong) dst));
3914
  if (LSN_FILE_NO(base_lsn) == LSN_FILE_NO(lsn))
3915 3916
  {
    uint32 diff;
3917 3918
    DBUG_ASSERT(base_lsn > lsn);
    diff= base_lsn - lsn;
unknown's avatar
unknown committed
3919
    DBUG_PRINT("info", ("File is the same. Diff: 0x%lx", (ulong) diff));
3920 3921 3922
    if (diff <= 0x3FFF)
    {
      dst-= 2;
unknown's avatar
unknown committed
3923
      /*
unknown's avatar
unknown committed
3924
        Note we store this high uchar first to ensure that first uchar has
unknown's avatar
unknown committed
3925 3926
        0 in the 3 upper bits.
      */
3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952
      dst[0]= diff >> 8;
      dst[1]= (diff & 0xFF);
    }
    else if (diff <= 0x3FFFFF)
    {
      dst-= 3;
      dst[0]= 0x40 | (diff >> 16);
      int2store(dst + 1, diff & 0xFFFF);
    }
    else if (diff <= 0x3FFFFFFF)
    {
      dst-= 4;
      dst[0]= 0x80 | (diff >> 24);
      int3store(dst + 1, diff & 0xFFFFFF);
    }
    else
    {
      dst-= 5;
      dst[0]= 0xC0;
      int4store(dst + 1, diff);
    }
  }
  else
  {
    uint32 diff;
    uint32 offset_diff;
3953 3954 3955
    ulonglong base_offset= LSN_OFFSET(base_lsn);
    DBUG_ASSERT(base_lsn > lsn);
    diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn);
unknown's avatar
unknown committed
3956 3957
    DBUG_PRINT("info", ("File is different. Diff: 0x%lx", (ulong) diff));

3958
    if (base_offset < LSN_OFFSET(lsn))
3959 3960 3961
    {
      /* take 1 from file offset */
      diff--;
unknown's avatar
unknown committed
3962
      base_offset+= LL(0x100000000);
3963
    }
3964
    offset_diff= base_offset - LSN_OFFSET(lsn);
3965 3966
    if (diff > 0x3f)
    {
3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980
      /*
        It is full LSN after special 1 diff (which is impossible
        in real life)
      */
      dst-= 2 + LSN_STORE_SIZE;
      dst[0]= 0;
      dst[1]= 1;
      lsn_store(dst + 2, lsn);
    }
    else
    {
      dst-= 5;
      *dst= (0xC0 | diff);
      int4store(dst + 1, offset_diff);
3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997
    }
  }
  DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
  DBUG_RETURN(dst);
}


/*
  Get LSN from LSN-difference (compressed LSN)

  SYNOPSIS
    translog_get_LSN_from_diff()
    base_lsn             LSN from which we calculate difference
    src                  pointer to coded lsn
    dst                  pointer to buffer where to write 7byte LSN

  NOTE:
unknown's avatar
unknown committed
3998
    To store an LSN in a compact way we will use the following compression:
3999 4000

    If a log record has LSN1, and it contains the lSN2 as a back reference,
unknown's avatar
unknown committed
4001
    Instead of LSN2 we write LSN1-LSN2, encoded as:
4002 4003 4004 4005 4006

     two bits     the number N (see below)
     14 bits
     N bytes

unknown's avatar
unknown committed
4007 4008
    That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
    is stored in the first two bits.
4009 4010 4011 4012 4013

  RETURN
    pointer to buffer after decoded LSN
*/

unknown's avatar
unknown committed
4014
static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
4015 4016 4017 4018
{
  LSN lsn;
  uint32 diff;
  uint32 first_byte;
unknown's avatar
unknown committed
4019
  uint32 file_no, rec_offset;
4020 4021
  uint8 code;
  DBUG_ENTER("translog_get_LSN_from_diff");
unknown's avatar
unknown committed
4022
  DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx)  src: 0x%lx  dst 0x%lx",
unknown's avatar
unknown committed
4023
                       LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
4024
  first_byte= *((uint8*) src);
4025
  code= first_byte >> 6; /* Length is in 2 most significant bits */
unknown's avatar
unknown committed
4026 4027 4028 4029 4030
  first_byte&= 0x3F;
  src++;                                        /* Skip length + encode */
  file_no= LSN_FILE_NO(base_lsn);               /* Assume relative */
  DBUG_PRINT("info", ("code: %u  first byte: %lu",
                      (uint) code, (ulong) first_byte));
4031
  switch (code) {
unknown's avatar
unknown committed
4032
  case 0:
4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043
    if (first_byte == 0 && *((uint8*)src) == 1)
    {
      /*
        It is full LSN after special 1 diff (which is impossible
        in real life)
      */
      memcpy(dst, src + 1, LSN_STORE_SIZE);
      DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
                          (ulong) (src + 1 + LSN_STORE_SIZE)));
      DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
    }
unknown's avatar
unknown committed
4044
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
4045
    break;
unknown's avatar
unknown committed
4046 4047 4048
  case 1:
    diff= uint2korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
4049
    break;
unknown's avatar
unknown committed
4050 4051 4052
  case 2:
    diff= uint3korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
4053
    break;
unknown's avatar
unknown committed
4054
  case 3:
4055
  {
4056
    ulonglong base_offset= LSN_OFFSET(base_lsn);
unknown's avatar
unknown committed
4057
    diff= uint4korr(src);
4058
    if (diff > LSN_OFFSET(base_lsn))
4059 4060 4061
    {
      /* take 1 from file offset */
      first_byte++;
unknown's avatar
unknown committed
4062
      base_offset+= LL(0x100000000);
4063
    }
unknown's avatar
unknown committed
4064 4065
    file_no= LSN_FILE_NO(base_lsn) - first_byte;
    rec_offset= base_offset - diff;
4066 4067 4068 4069 4070 4071
    break;
  }
  default:
    DBUG_ASSERT(0);
    DBUG_RETURN(NULL);
  }
unknown's avatar
unknown committed
4072 4073 4074
  lsn= MAKE_LSN(file_no, rec_offset);
  src+= code + 1;
  lsn_store(dst, lsn);
4075
  DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090
  DBUG_RETURN(src);
}


/*
  Encode relative LSNs listed in the parameters

  SYNOPSIS
    translog_relative_LSN_encode()
    parts                Parts list with encoded LSN(s)
    base_lsn             LSN which is base for encoding
    lsns                 number of LSN(s) to encode
    compressed_LSNs      buffer which can be used for storing compressed LSN(s)

  RETURN
unknown's avatar
unknown committed
4091 4092
    0  OK
    1  Error
4093 4094 4095
*/

static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts,
4096
                                            LSN base_lsn,
unknown's avatar
unknown committed
4097
                                            uint lsns, uchar *compressed_LSNs)
4098
{
4099
  LEX_STRING *part;
unknown's avatar
unknown committed
4100
  uint lsns_len= lsns * LSN_STORE_SIZE;
4101 4102
  char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
  char *buffer= buffer_src;
4103 4104 4105

  DBUG_ENTER("translog_relative_LSN_encode");

4106
  DBUG_ASSERT(parts->current != 0);
4107
  part= parts->parts + parts->current;
4108

4109
  /* collect all LSN(s) in one chunk if it (they) is (are) divided */
4110
  if (part->length < lsns_len)
4111
  {
4112 4113
    uint copied= part->length;
    LEX_STRING *next_part;
unknown's avatar
unknown committed
4114
    DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
unknown's avatar
unknown committed
4115
    memcpy(buffer, (uchar*)part->str, part->length);
4116
    next_part= parts->parts + parts->current + 1;
4117 4118
    do
    {
4119 4120
      DBUG_ASSERT(next_part < parts->parts + parts->elements);
      if ((next_part->length + copied) < lsns_len)
4121
      {
unknown's avatar
unknown committed
4122
        memcpy(buffer + copied, (uchar*)next_part->str,
4123 4124 4125 4126 4127
               next_part->length);
        copied+= next_part->length;
        next_part->length= 0; next_part->str= 0;
        /* delete_dynamic_element(&parts->parts, parts->current + 1); */
        next_part++;
4128 4129
        parts->current++;
        part= parts->parts + parts->current;
4130 4131 4132 4133
      }
      else
      {
        uint len= lsns_len - copied;
unknown's avatar
unknown committed
4134
        memcpy(buffer + copied, (uchar*)next_part->str, len);
4135
        copied= lsns_len;
4136 4137
        next_part->str+= len;
        next_part->length-= len;
4138 4139 4140
      }
    } while (copied < lsns_len);
  }
4141 4142 4143 4144 4145 4146 4147 4148 4149
  else
  {
    buffer= part->str;
    part->str+= lsns_len;
    part->length-= lsns_len;
    parts->current--;
    part= parts->parts + parts->current;
  }

4150 4151 4152
  {
    /* Compress */
    LSN ref;
4153
    int economy;
unknown's avatar
unknown committed
4154 4155
    uchar *src_ptr;
    uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
4156 4157
                                      COMPRESSED_LSN_MAX_STORE_SIZE);
    for (src_ptr= buffer + lsns_len - LSN_STORE_SIZE;
4158
         src_ptr >= (uchar*) buffer;
4159
         src_ptr-= LSN_STORE_SIZE)
4160
    {
4161
      ref= lsn_korr(src_ptr);
4162
      if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL)
4163 4164
        DBUG_RETURN(1);
    }
4165 4166 4167 4168 4169
    part->length= (uint)((compressed_LSNs +
                          (MAX_NUMBER_OF_LSNS_PER_RECORD *
                           COMPRESSED_LSN_MAX_STORE_SIZE)) -
                         dst_ptr);
    parts->record_length-= (economy= lsns_len - part->length);
4170 4171
    DBUG_PRINT("info", ("new length of LSNs: %lu  economy: %d",
                        (ulong)part->length, economy));
4172
    parts->total_record_length-= economy;
4173
    part->str= (char*)dst_ptr;
4174 4175 4176 4177 4178
  }
  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
4179 4180
/**
   @brief Write multi-group variable-size record.
4181

unknown's avatar
unknown committed
4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Header length calculated for 1 group
   @param  buffer_rest     Beginning from which we plan to write in full pages
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4193

unknown's avatar
unknown committed
4194 4195 4196
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4197 4198 4199 4200 4201
*/

static my_bool
translog_write_variable_record_mgroup(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4202
                                      MARIA_HA *tbl_info,
4203 4204 4205 4206 4207 4208
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush,
                                      uint16 header_length,
                                      translog_size_t buffer_rest,
unknown's avatar
unknown committed
4209
                                      TRN *trn, void *hook_arg)
4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i, chunk2_page, full_pages;
  uint curr_group= 0;
  translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
  translog_size_t done= 0;
  struct st_translog_group_descriptor group;
  DYNAMIC_ARRAY groups;
  uint16 chunk3_size;
  uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
  uint16 last_page_capacity;
  my_bool new_page_before_chunk0= 1, first_chunk0= 1;
unknown's avatar
unknown committed
4224 4225
  uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
  uchar chunk2_header[1];
4226 4227
  uint header_fixed_part= header_length + 2;
  uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
4228
  uint file_of_the_first_group;
4229 4230
  DBUG_ENTER("translog_write_variable_record_mgroup");

unknown's avatar
unknown committed
4231 4232
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;

4233 4234 4235
  if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor),
                         10, 10 CALLER_INFO))
  {
unknown's avatar
unknown committed
4236
    translog_unlock();
4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251
    UNRECOVERABLE_ERROR(("init array failed"));
    DBUG_RETURN(1);
  }

  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - 1);
  DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));

  if (record_rest < buffer_rest)
  {
    DBUG_PRINT("info", ("too many free space because changing header"));
    buffer_rest-= log_descriptor.page_capacity_chunk_2;
    DBUG_ASSERT(record_rest >= buffer_rest);
  }

4252 4253
  file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
  translog_mark_file_unfinished(file_of_the_first_group);
4254 4255 4256 4257 4258 4259 4260
  do
  {
    group.addr= horizon= log_descriptor.horizon;
    cursor= log_descriptor.bc;
    cursor.chaser= 1;
    if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
    {
unknown's avatar
unknown committed
4261
      /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
4262 4263 4264 4265 4266
      full_pages= 255;
      buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
    }
    /*
       group chunks =
unknown's avatar
unknown committed
4267
       full pages + first page (which actually can be full, too).
4268 4269 4270
       But here we assign number of chunks - 1
    */
    group.num= full_pages;
unknown's avatar
unknown committed
4271
    if (insert_dynamic(&groups, (uchar*) &group))
4272 4273
    {
      UNRECOVERABLE_ERROR(("insert into array failed"));
unknown's avatar
unknown committed
4274
      goto err_unlock;
4275 4276
    }

unknown's avatar
unknown committed
4277 4278
    DBUG_PRINT("info", ("chunk: #%u  first_page: %u (%u)  "
                        "full_pages: %lu (%lu)  "
4279 4280 4281
                        "Left %lu",
                        groups.elements,
                        first_page, first_page - 1,
4282
                        (ulong) full_pages,
unknown's avatar
unknown committed
4283 4284 4285 4286 4287 4288
                        (ulong) (full_pages *
                                 log_descriptor.page_capacity_chunk_2),
                        (ulong)(parts->record_length - (first_page - 1 +
                                                        buffer_rest) -
                                done)));
    rc|= translog_advance_pointer(full_pages, 0);
4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303

    rc|= translog_unlock();

    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
      UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4304
      goto err;
4305 4306 4307 4308
    }

    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
4309 4310
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)  "
                        "Left  %lu",
unknown's avatar
unknown committed
4311 4312
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4313 4314 4315 4316 4317 4318
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));

    for (i= 0; i < full_pages; i++)
    {
      if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
4319
        goto err;
4320

unknown's avatar
unknown committed
4321 4322
      DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  "
                          "local: (%lu,0x%lx)  "
4323
                          "Left: %lu",
unknown's avatar
unknown committed
4324 4325
                          LSN_IN_PARTS(log_descriptor.horizon),
                          LSN_IN_PARTS(horizon),
4326 4327 4328 4329 4330 4331 4332
                          (ulong) (parts->record_length - (first_page - 1) -
                                   i * log_descriptor.page_capacity_chunk_2 -
                                   done)));
    }

    done+= (first_page - 1 + buffer_rest);

unknown's avatar
unknown committed
4333
    /* TODO: make separate function for following */
4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346
    rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
      UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4347
      goto err;
4348 4349 4350 4351 4352 4353
    }
    rc= translog_buffer_lock(cursor.buffer);
    if (!rc)
      translog_buffer_decrease_writers(cursor.buffer);
    rc|= translog_buffer_unlock(cursor.buffer);
    if (rc)
unknown's avatar
unknown committed
4354
      goto err;
4355 4356 4357 4358 4359 4360 4361 4362 4363 4364

    translog_lock();

    first_page= translog_get_current_page_rest();
    buffer_rest= translog_get_current_group_size();
  } while (first_page + buffer_rest < (uint) (parts->record_length - done));

  group.addr= horizon= log_descriptor.horizon;
  cursor= log_descriptor.bc;
  cursor.chaser= 1;
4365
  group.num= 0;                       /* 0 because it does not matter */
unknown's avatar
unknown committed
4366
  if (insert_dynamic(&groups, (uchar*) &group))
4367 4368
  {
    UNRECOVERABLE_ERROR(("insert into array failed"));
unknown's avatar
unknown committed
4369
    goto err_unlock;
4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418
  }
  record_rest= parts->record_length - done;
  DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
  if (first_page <= record_rest + 1)
  {
    chunk2_page= 1;
    record_rest-= (first_page - 1);
    full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
    record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
    last_page_capacity= page_capacity;
  }
  else
  {
    chunk2_page= full_pages= 0;
    last_page_capacity= first_page;
  }
  chunk3_size= 0;
  chunk3_pages= 0;
  if (last_page_capacity > record_rest + 1 && record_rest != 0)
  {
    if (last_page_capacity >
        record_rest + header_fixed_part + groups.elements * (7 + 1))
    {
      /* 1 record of type 0 */
      chunk3_pages= 0;
    }
    else
    {
      chunk3_pages= 1;
      if (record_rest + 2 == last_page_capacity)
      {
        chunk3_size= record_rest - 1;
        record_rest= 1;
      }
      else
      {
        chunk3_size= record_rest;
        record_rest= 0;
      }
    }
  }
  /*
     A first non-full page will hold type 0 chunk only if it fit in it with
     all its headers
  */
  while (page_capacity <
         record_rest + header_fixed_part +
         (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
    chunk0_pages++;
unknown's avatar
unknown committed
4419 4420
  DBUG_PRINT("info", ("chunk0_pages: %u  groups %u  groups per full page: %u  "
                      "Group on last page: %u",
4421 4422 4423 4424 4425
                      chunk0_pages, groups.elements,
                      groups_per_page,
                      (groups.elements -
                       ((page_capacity - header_fixed_part) / (7 + 1)) *
                       (chunk0_pages - 1))));
unknown's avatar
unknown committed
4426 4427
  DBUG_PRINT("info", ("first_page: %u  chunk2: %u  full_pages: %u (%lu)  "
                      "chunk3: %u (%u)  rest: %u",
4428 4429 4430 4431 4432
                      first_page,
                      chunk2_page, full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      chunk3_pages, (uint) chunk3_size, (uint) record_rest));
unknown's avatar
unknown committed
4433 4434 4435 4436 4437 4438 4439 4440 4441 4442
  rc= translog_advance_pointer(full_pages + chunk3_pages +
                               (chunk0_pages - 1),
                               record_rest + header_fixed_part +
                               (groups.elements -
                                ((page_capacity -
                                  header_fixed_part) / (7 + 1)) *
                                (chunk0_pages - 1)) * (7 + 1));
  rc|= translog_unlock();
  if (rc)
    goto err;
4443 4444 4445 4446 4447 4448

  if (chunk2_page)
  {
    DBUG_PRINT("info", ("chunk 2 to finish first page"));
    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
4449
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4450
                        "Left: %lu",
unknown's avatar
unknown committed
4451 4452
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4453 4454 4455 4456 4457 4458 4459
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));
  }
  else if (chunk3_pages)
  {
    DBUG_PRINT("info", ("chunk 3"));
    DBUG_ASSERT(full_pages == 0);
unknown's avatar
unknown committed
4460
    uchar chunk3_header[3];
unknown's avatar
unknown committed
4461
    chunk3_pages= 0;
4462 4463 4464 4465
    chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
    int2store(chunk3_header + 1, chunk3_size);
    translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
    translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
unknown's avatar
unknown committed
4466
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4467
                        "Left: %lu",
unknown's avatar
unknown committed
4468 4469
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481
                        (ulong) (parts->record_length - chunk3_size - done)));
  }
  else
  {
    DBUG_PRINT("info", ("no new_page_before_chunk0"));
    new_page_before_chunk0= 0;
  }

  for (i= 0; i < full_pages; i++)
  {
    DBUG_ASSERT(chunk2_page != 0);
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
4482
      goto err;
4483

unknown's avatar
unknown committed
4484
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
4485
                        "Left: %lu",
unknown's avatar
unknown committed
4486 4487
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4488 4489 4490 4491 4492 4493 4494 4495 4496
                        (ulong) (parts->record_length - (first_page - 1) -
                                 i * log_descriptor.page_capacity_chunk_2 -
                                 done)));
  }

  if (chunk3_pages &&
      translog_write_variable_record_chunk3_page(parts,
                                                 chunk3_size,
                                                 &horizon, &cursor))
unknown's avatar
unknown committed
4497 4498
    goto err;
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4499 4500
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
4501

unknown's avatar
unknown committed
4502
  *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524
  int2store(chunk0_header + 1, short_trid);
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
  do
  {
    int limit;
    if (new_page_before_chunk0)
    {
      rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
      if (buffer_to_flush != NULL)
      {
        rc|= translog_buffer_lock(buffer_to_flush);
        translog_buffer_decrease_writers(buffer_to_flush);
        if (!rc)
          rc= translog_buffer_flush(buffer_to_flush);
        rc|= translog_buffer_unlock(buffer_to_flush);
        buffer_to_flush= NULL;
      }
      if (rc)
      {
        UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4525
        goto err;
4526 4527 4528 4529 4530 4531
      }
    }
    new_page_before_chunk0= 1;

    if (first_chunk0)
    {
unknown's avatar
unknown committed
4532
      first_chunk0= 0;
4533 4534
      *lsn= horizon;
      if (log_record_type_descriptor[type].inwrite_hook &&
unknown's avatar
unknown committed
4535 4536
          (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
                                                            tbl_info,
unknown's avatar
unknown committed
4537
                                                            lsn, hook_arg))
unknown's avatar
unknown committed
4538
        goto err;
4539 4540 4541 4542 4543 4544 4545 4546 4547
    }

    /*
       A first non-full page will hold type 0 chunk only if it fit in it with
       all its headers => the fist page is full or number of groups less then
       possible number of full page.
    */
    limit= (groups_per_page < groups.elements - curr_group ?
            groups_per_page : groups.elements - curr_group);
unknown's avatar
unknown committed
4548
    DBUG_PRINT("info", ("Groups: %u  curr: %u  limit: %u",
4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570
                        (uint) groups.elements, (uint) curr_group,
                        (uint) limit));

    if (chunk0_pages == 1)
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
                          (uint) limit, (uint) record_rest,
                          (uint) (2 + limit * (7 + 1) + record_rest)));
      int2store(chunk0_header + header_length - 2,
                2 + limit * (7 + 1) + record_rest);
    }
    else
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
                          (uint) limit, (uint) (2 + limit * (7 + 1))));
      int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
    }
    int2store(chunk0_header + header_length, groups.elements - curr_group);
    translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
                                chunk0_header);
    for (i= curr_group; i < limit + curr_group; i++)
    {
unknown's avatar
unknown committed
4571 4572 4573 4574 4575
      struct st_translog_group_descriptor *grp_ptr;
      grp_ptr= dynamic_element(&groups, i,
                               struct st_translog_group_descriptor *);
      lsn_store(group_desc, grp_ptr->addr);
      group_desc[7]= grp_ptr->num;
4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591
      translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
    }

    if (chunk0_pages == 1 && record_rest != 0)
      translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);

    chunk0_pages--;
    curr_group+= limit;

  } while (chunk0_pages != 0);
  rc= translog_buffer_lock(cursor.buffer);
  if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
    cursor.buffer->last_lsn= *lsn;
  translog_buffer_decrease_writers(cursor.buffer);
  rc|= translog_buffer_unlock(cursor.buffer);

4592 4593 4594 4595 4596 4597
  if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
                                 *lsn, FALSE))
    goto err;
  translog_mark_file_finished(file_of_the_first_group);


4598 4599
  delete_dynamic(&groups);
  DBUG_RETURN(rc);
unknown's avatar
unknown committed
4600 4601 4602 4603 4604 4605

err_unlock:
  translog_unlock();
err:
  delete_dynamic(&groups);
  DBUG_RETURN(1);
4606 4607 4608
}


unknown's avatar
unknown committed
4609 4610
/**
   @brief Write the variable length log record.
4611

unknown's avatar
unknown committed
4612 4613 4614 4615 4616 4617 4618 4619
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4620

unknown's avatar
unknown committed
4621 4622 4623
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4624 4625 4626 4627
*/

static my_bool translog_write_variable_record(LSN *lsn,
                                              enum translog_record_type type,
unknown's avatar
unknown committed
4628
                                              MARIA_HA *tbl_info,
4629 4630
                                              SHORT_TRANSACTION_ID short_trid,
                                              struct st_translog_parts *parts,
unknown's avatar
unknown committed
4631
                                              TRN *trn, void *hook_arg)
4632 4633 4634 4635 4636 4637
{
  struct st_translog_buffer *buffer_to_flush= NULL;
  uint header_length1= 1 + 2 + 2 +
    translog_variable_record_length_bytes(parts->record_length);
  ulong buffer_rest;
  uint page_rest;
unknown's avatar
unknown committed
4638
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
4639
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
4640
    COMPRESSED_LSN_MAX_STORE_SIZE];
unknown's avatar
unknown committed
4641
  my_bool res;
4642 4643 4644
  DBUG_ENTER("translog_write_variable_record");

  translog_lock();
unknown's avatar
unknown committed
4645
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
4646
                      LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
4647 4648
  page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  DBUG_PRINT("info", ("header length: %u  page_rest: %u",
4649 4650 4651
                      header_length1, page_rest));

  /*
4652 4653
    header and part which we should read have to fit in one chunk
    TODO: allow to divide readable header
4654 4655 4656 4657 4658
  */
  if (page_rest <
      (header_length1 + log_record_type_descriptor[type].read_header_len))
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
4659 4660
               ("Next page, size: %u  header: %u + %u",
                log_descriptor.bc.current_page_fill,
4661 4662 4663 4664
                header_length1,
                log_record_type_descriptor[type].read_header_len));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
unknown's avatar
unknown committed
4665
    /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
4666 4667 4668 4669 4670 4671 4672 4673
    page_rest= log_descriptor.page_capacity_chunk_2 + 1;
    DBUG_PRINT("info", ("page_rest: %u", page_rest));
  }

  /*
     To minimize compressed size we will compress always relative to
     very first chunk address (log_descriptor.horizon for now)
  */
unknown's avatar
unknown committed
4674
  if (log_record_type_descriptor[type].compressed_LSN > 0)
4675
  {
4676
    if (translog_relative_LSN_encode(parts, log_descriptor.horizon,
4677
                                     log_record_type_descriptor[type].
unknown's avatar
unknown committed
4678
                                     compressed_LSN, compressed_LSNs))
4679
    {
unknown's avatar
unknown committed
4680
      translog_unlock();
4681 4682
      if (buffer_to_flush != NULL)
      {
unknown's avatar
unknown committed
4683 4684 4685 4686 4687 4688 4689
        /*
          It is just try to finish log in nice way in case of error, so we
          do not check result of the following functions, because we are
          going return error state in any case
        */
        translog_buffer_flush(buffer_to_flush);
        translog_buffer_unlock(buffer_to_flush);
4690 4691 4692 4693 4694 4695
      }
      DBUG_RETURN(1);
    }
    /* recalculate header length after compression */
    header_length1= 1 + 2 + 2 +
      translog_variable_record_length_bytes(parts->record_length);
unknown's avatar
unknown committed
4696 4697
    DBUG_PRINT("info", ("after compressing LSN(s) header length: %u  "
                        "record length: %lu",
4698
                        header_length1, (ulong)parts->record_length));
4699 4700 4701 4702 4703 4704
  }

  /* TODO: check space on current page for header + few bytes */
  if (page_rest >= parts->record_length + header_length1)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
4705
    res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
unknown's avatar
unknown committed
4706 4707
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
4708
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
4709
    DBUG_RETURN(res);
4710 4711 4712 4713 4714 4715 4716
  }

  buffer_rest= translog_get_current_group_size();

  if (buffer_rest >= parts->record_length + header_length1 - page_rest)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
4717
    res= translog_write_variable_record_1group(lsn, type, tbl_info,
unknown's avatar
unknown committed
4718 4719
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
4720
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
4721
    DBUG_RETURN(res);
4722 4723
  }
  /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
4724
  res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
unknown's avatar
unknown committed
4725 4726 4727
                                             short_trid,
                                             parts, buffer_to_flush,
                                             header_length1,
unknown's avatar
unknown committed
4728
                                             buffer_rest, trn, hook_arg);
unknown's avatar
unknown committed
4729
  DBUG_RETURN(res);
4730 4731 4732
}


unknown's avatar
unknown committed
4733 4734
/**
   @brief Write the fixed and pseudo-fixed log record.
4735

unknown's avatar
unknown committed
4736 4737 4738 4739 4740 4741 4742 4743
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4744

unknown's avatar
unknown committed
4745 4746 4747
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4748 4749 4750 4751
*/

static my_bool translog_write_fixed_record(LSN *lsn,
                                           enum translog_record_type type,
unknown's avatar
unknown committed
4752
                                           MARIA_HA *tbl_info,
4753 4754
                                           SHORT_TRANSACTION_ID short_trid,
                                           struct st_translog_parts *parts,
unknown's avatar
unknown committed
4755
                                           TRN *trn, void *hook_arg)
4756 4757
{
  struct st_translog_buffer *buffer_to_flush= NULL;
unknown's avatar
unknown committed
4758
  uchar chunk1_header[1 + 2];
unknown's avatar
unknown committed
4759
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
4760
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
4761
    COMPRESSED_LSN_MAX_STORE_SIZE];
4762
  LEX_STRING *part;
4763 4764 4765 4766 4767 4768 4769 4770
  int rc;
  DBUG_ENTER("translog_write_fixed_record");
  DBUG_ASSERT((log_record_type_descriptor[type].class ==
               LOGRECTYPE_FIXEDLENGTH &&
               parts->record_length ==
               log_record_type_descriptor[type].fixed_length) ||
              (log_record_type_descriptor[type].class ==
               LOGRECTYPE_PSEUDOFIXEDLENGTH &&
4771
               parts->record_length ==
4772 4773 4774
               log_record_type_descriptor[type].fixed_length));

  translog_lock();
unknown's avatar
unknown committed
4775
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
4776
                      LSN_IN_PARTS(log_descriptor.horizon)));
4777

unknown's avatar
unknown committed
4778
  DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
4779
  DBUG_PRINT("info",
unknown's avatar
unknown committed
4780 4781
             ("Page size: %u  record: %u  next cond: %d",
              log_descriptor.bc.current_page_fill,
4782
              (parts->record_length +
unknown's avatar
unknown committed
4783 4784
               log_record_type_descriptor[type].compressed_LSN * 2 + 3),
              ((((uint) log_descriptor.bc.current_page_fill) +
4785
                (parts->record_length +
unknown's avatar
unknown committed
4786
                 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
4787 4788
               TRANSLOG_PAGE_SIZE)));
  /*
4789 4790
     check that there is enough place on current page.
     NOTE: compressing may increase page LSN size on two bytes for every LSN
4791
  */
unknown's avatar
unknown committed
4792
  if ((((uint) log_descriptor.bc.current_page_fill) +
4793
       (parts->record_length +
unknown's avatar
unknown committed
4794
        log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
4795 4796 4797 4798 4799 4800 4801 4802
      TRANSLOG_PAGE_SIZE)
  {
    DBUG_PRINT("info", ("Next page"));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
  }

  *lsn= log_descriptor.horizon;
4803 4804 4805 4806
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info,
unknown's avatar
unknown committed
4807
                                                         lsn, hook_arg)))
4808
  {
unknown's avatar
unknown committed
4809 4810
    rc= 1;
    goto err;
4811 4812 4813 4814 4815
  }

  /* compress LSNs */
  if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
  {
unknown's avatar
unknown committed
4816
    DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
4817
    if (translog_relative_LSN_encode(parts, *lsn,
4818
                                     log_record_type_descriptor[type].
unknown's avatar
unknown committed
4819
                                     compressed_LSN, compressed_LSNs))
4820 4821 4822 4823 4824 4825 4826
    {
      rc= 1;
      goto err;
    }
  }

  /*
unknown's avatar
unknown committed
4827 4828
    Write the whole record at once (we know that there is enough place on
    the destination page)
4829
  */
unknown's avatar
unknown committed
4830
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
4831 4832 4833
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk1_header;
unknown's avatar
unknown committed
4834
  *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
4835 4836 4837 4838 4839 4840 4841
  int2store(chunk1_header + 1, short_trid);

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);

  log_descriptor.bc.buffer->last_lsn= *lsn;
unknown's avatar
unknown committed
4842

4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860
err:
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871
/**
   @brief Writes the log record

   If share has no 2-byte-id yet, gives an id to the share and logs
   LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
   yet, logs it.

   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
unknown's avatar
unknown committed
4872
   @param  tbl_info        MARIA_HA of table or NULL
4873 4874 4875 4876 4877 4878
   @param  rec_len         record length or 0 (count it)
   @param  part_no         number of parts or 0 (count it)
   @param  parts_data      zero ended (in case of number of parts is 0)
                           array of LEX_STRINGs (parts), first
                           TRANSLOG_INTERNAL_PARTS positions in the log
                           should be unused (need for loghandler)
unknown's avatar
unknown committed
4879 4880 4881
   @param  store_share_id  if tbl_info!=NULL then share's id will
                           automatically be stored in the two first bytes
                           pointed (so pointer is assumed to be !=NULL)
unknown's avatar
unknown committed
4882 4883 4884
   @param  hook_arg        argument which will be passed to pre-write and
                           in-write hooks of this record.

4885 4886 4887
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4888 4889 4890 4891
*/

my_bool translog_write_record(LSN *lsn,
                              enum translog_record_type type,
unknown's avatar
unknown committed
4892
                              TRN *trn, MARIA_HA *tbl_info,
4893 4894
                              translog_size_t rec_len,
                              uint part_no,
4895
                              LEX_STRING *parts_data,
unknown's avatar
unknown committed
4896 4897
                              uchar *store_share_id,
                              void *hook_arg)
4898 4899
{
  struct st_translog_parts parts;
4900
  LEX_STRING *part;
4901
  int rc;
4902
  uint short_trid= trn->short_id;
4903
  DBUG_ENTER("translog_write_record");
unknown's avatar
unknown committed
4904 4905
  DBUG_PRINT("enter", ("type: %u  ShortTrID: %u  rec_len: %lu",
                       (uint) type, (uint) short_trid, (ulong) rec_len));
unknown's avatar
unknown committed
4906
  DBUG_ASSERT(translog_inited == 1);
4907

unknown's avatar
unknown committed
4908
  if (tbl_info)
4909
  {
unknown's avatar
unknown committed
4910
    MARIA_SHARE *share= tbl_info->s;
unknown's avatar
unknown committed
4911
    if (!share->now_transactional)
4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924
    {
      DBUG_PRINT("info", ("It is not transactional table"));
      DBUG_RETURN(0);
    }
    if (unlikely(share->id == 0))
    {
      /*
        First log write for this MARIA_SHARE; give it a short id.
        When the lock manager is enabled and needs a short id, it should be
        assigned in the lock manager (because row locks will be taken before
        log records are written; for example SELECT FOR UPDATE takes locks but
        writes no log record.
      */
unknown's avatar
unknown committed
4925
      if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
4926 4927 4928 4929 4930 4931
        DBUG_RETURN(1);
    }
    fileid_store(store_share_id, share->id);
  }
  if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
  {
unknown's avatar
unknown committed
4932
    LSN dummy_lsn;
4933 4934 4935 4936 4937 4938
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
    uchar log_data[6];
    int6store(log_data, trn->trid);
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
unknown's avatar
unknown committed
4939
    if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
4940 4941
                                       trn, NULL, sizeof(log_data),
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
4942
                                       log_array, NULL, NULL)))
4943
      DBUG_RETURN(1);
4944
  }
unknown's avatar
unknown committed
4945

4946
  parts.parts= parts_data;
4947

4948 4949
  /* count parts if they are not counted by upper level */
  if (part_no == 0)
unknown's avatar
unknown committed
4950
  {
4951 4952 4953
    for (part_no= TRANSLOG_INTERNAL_PARTS;
         parts_data[part_no].length != 0;
         part_no++);
unknown's avatar
unknown committed
4954
  }
4955 4956
  parts.elements= part_no;
  parts.current= TRANSLOG_INTERNAL_PARTS;
4957

4958
  /* clear TRANSLOG_INTERNAL_PARTS */
4959
  DBUG_ASSERT(TRANSLOG_INTERNAL_PARTS != 0);
4960 4961 4962 4963 4964
  parts_data[0].str= 0;
  parts_data[0].length= 0;

  /* count length of the record */
  if (rec_len == 0)
unknown's avatar
unknown committed
4965
  {
4966 4967 4968
    for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
        part < parts_data + part_no;
        part++)
4969
    {
4970
      rec_len+= part->length;
4971 4972
    }
  }
4973
  parts.record_length= rec_len;
unknown's avatar
unknown committed
4974

4975 4976 4977 4978
#ifndef DBUG_OFF
  {
    uint i;
    uint len= 0;
4979
#ifdef HAVE_purify
unknown's avatar
unknown committed
4980 4981
    ha_checksum checksum= 0;
#endif
4982
    for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
unknown's avatar
unknown committed
4983
    {
4984
#ifdef HAVE_purify
unknown's avatar
unknown committed
4985 4986 4987 4988
      /* Find unitialized bytes early */
      checksum+= my_checksum(checksum, parts_data[i].str,
                             parts_data[i].length);
#endif
4989
      len+= parts_data[i].length;
unknown's avatar
unknown committed
4990
    }
4991 4992 4993
    DBUG_ASSERT(len == rec_len);
  }
#endif
unknown's avatar
unknown committed
4994 4995 4996 4997 4998
  /*
    Start total_record_length from record_length then overhead will
    be add
  */
  parts.total_record_length= parts.record_length;
unknown's avatar
unknown committed
4999
  DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
5000 5001 5002

  /* process this parts */
  if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
5003
             (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
unknown's avatar
unknown committed
5004
                                                                tbl_info,
unknown's avatar
unknown committed
5005
                                                                hook_arg))))
5006
  {
unknown's avatar
unknown committed
5007
    switch (log_record_type_descriptor[type].class) {
5008
    case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
5009
      rc= translog_write_variable_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5010
                                         short_trid, &parts, trn, hook_arg);
5011 5012 5013
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
    case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
5014
      rc= translog_write_fixed_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5015
                                      short_trid, &parts, trn, hook_arg);
5016 5017 5018 5019 5020 5021 5022 5023
      break;
    case LOGRECTYPE_NOT_ALLOWED:
    default:
      DBUG_ASSERT(0);
      rc= 1;
    }
  }

unknown's avatar
unknown committed
5024
  DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(*lsn)));
5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042
  DBUG_RETURN(rc);
}


/*
  Decode compressed (relative) LSN(s)

  SYNOPSIS
   translog_relative_lsn_decode()
   base_lsn              LSN for encoding
   src                   Decode LSN(s) from here
   dst                   Put decoded LSNs here
   lsns                  number of LSN(s)

   RETURN
     position in sources after decoded LSN(s)
*/

unknown's avatar
unknown committed
5043 5044
static uchar *translog_relative_LSN_decode(LSN base_lsn,
                                          uchar *src, uchar *dst, uint lsns)
5045 5046
{
  uint i;
unknown's avatar
unknown committed
5047
  for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
5048 5049 5050 5051 5052 5053
  {
    src= translog_get_LSN_from_diff(base_lsn, src, dst);
  }
  return src;
}

5054 5055 5056
/**
   @brief Get header of fixed/pseudo length record and call hook for
   it processing
5057

5058 5059 5060 5061
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
5062

5063 5064 5065
   @return Length of header or operation status
     @retval #  number of bytes in TRANSLOG_HEADER_BUFFER::header where
                stored decoded part of the header
5066 5067
*/

5068 5069 5070
static int translog_fixed_length_header(uchar *page,
                                        translog_size_t page_offset,
                                        TRANSLOG_HEADER_BUFFER *buff)
5071 5072 5073
{
  struct st_log_record_type_descriptor *desc=
    log_record_type_descriptor + buff->type;
unknown's avatar
unknown committed
5074 5075 5076
  uchar *src= page + page_offset + 3;
  uchar *dst= buff->header;
  uchar *start= src;
unknown's avatar
unknown committed
5077
  uint lsns= desc->compressed_LSN;
5078
  uint length= desc->fixed_length;
5079 5080 5081 5082 5083 5084 5085 5086

  DBUG_ENTER("translog_fixed_length_header");

  buff->record_length= length;

  if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
  {
    DBUG_ASSERT(lsns > 0);
5087
    src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
unknown's avatar
unknown committed
5088
    lsns*= LSN_STORE_SIZE;
5089 5090
    dst+= lsns;
    length-= lsns;
5091
    buff->compressed_LSN_economy= (lsns - (src - start));
5092 5093 5094 5095
  }
  else
    buff->compressed_LSN_economy= 0;

unknown's avatar
unknown committed
5096
  memcpy(dst, src, length);
5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113
  buff->non_header_data_start_offset= page_offset +
    ((src + length) - (page + page_offset));
  buff->non_header_data_len= 0;
  DBUG_RETURN(buff->record_length);
}


/*
  Free resources used by TRANSLOG_HEADER_BUFFER

  SYNOPSIS
    translog_free_record_header();
*/

void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
{
  DBUG_ENTER("translog_free_record_header");
unknown's avatar
unknown committed
5114
  DBUG_ASSERT(translog_inited == 1);
5115 5116
  if (buff->groups_no != 0)
  {
unknown's avatar
unknown committed
5117
    my_free((uchar*) buff->groups, MYF(0));
5118 5119 5120 5121 5122 5123
    buff->groups_no= 0;
  }
  DBUG_VOID_RETURN;
}


5124 5125
/**
   @brief Returns the current horizon at the end of the current log
5126

5127
   @return Horizon
5128 5129
*/

5130
TRANSLOG_ADDRESS translog_get_horizon()
5131
{
5132
  TRANSLOG_ADDRESS res;
unknown's avatar
unknown committed
5133
  DBUG_ASSERT(translog_inited == 1);
5134
  translog_lock();
5135
  res= log_descriptor.horizon;
5136
  translog_unlock();
5137
  return res;
5138 5139 5140
}


unknown's avatar
unknown committed
5141 5142 5143 5144 5145 5146 5147 5148 5149
/**
   @brief Returns the current horizon at the end of the current log, caller is
   assumed to already hold the lock

   @return Horizon
*/

TRANSLOG_ADDRESS translog_get_horizon_no_lock()
{
unknown's avatar
unknown committed
5150
  DBUG_ASSERT(translog_inited == 1);
unknown's avatar
unknown committed
5151 5152 5153 5154 5155
  translog_lock_assert_owner();
  return log_descriptor.horizon;
}


5156 5157 5158 5159 5160 5161 5162 5163
/*
  Set last page in the scanner data structure

  SYNOPSIS
    translog_scanner_set_last_page()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5164 5165
    0  OK
    1  Error
5166 5167
*/

unknown's avatar
unknown committed
5168
static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA
5169 5170 5171
                                              *scanner)
{
  my_bool page_ok;
5172 5173 5174 5175 5176 5177 5178 5179
  if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
  {
    /* It is last file => we can easy find last page address by horizon */
    uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
    scanner->last_file_page= (scanner->horizon -
                              (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
    return (0);
  }
5180
  scanner->last_file_page= scanner->page_addr;
unknown's avatar
unknown committed
5181
  return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok));
5182 5183 5184
}


5185 5186
/**
  @brief Get page from page cache according to requested method
5187

5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215
  @param scanner         The scanner data

  @return operation status
  @retval 0 OK
  @retval 1 Error
*/

static my_bool
translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
{
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_scanner_get_page");
  data.addr= &scanner->page_addr;
  data.was_recovered= 0;
  DBUG_RETURN((scanner->page=
               translog_get_page(&data, scanner->buffer,
                                 (scanner->use_direct_link ?
                                  &scanner->direct_link :
                                  NULL))) ==
               NULL);
}


/**
  @brief Initialize reader scanner.

  @param lsn             LSN with which it have to be inited
  @param fixed_horizon   true if it is OK do not read records which was written
5216
                         after scanning beginning
5217 5218 5219
  @param scanner         scanner which have to be inited
  @param use_direct      prefer using direct lings from page handler
                         where it is possible.
5220

5221 5222 5223 5224 5225 5226
  @note If direct link was used translog_destroy_scanner should be
        called after it using

  @return status of the operation
  @retval 0 OK
  @retval 1 Error
5227 5228
*/

unknown's avatar
unknown committed
5229 5230
my_bool translog_init_scanner(LSN lsn,
                              my_bool fixed_horizon,
5231 5232
                              TRANSLOG_SCANNER_DATA *scanner,
                              my_bool use_direct)
unknown's avatar
unknown committed
5233 5234
{
  TRANSLOG_VALIDATOR_DATA data;
5235
  DBUG_ENTER("translog_init_scanner");
5236 5237
  DBUG_PRINT("enter", ("Scanner: 0x%lx  LSN: (0x%lu,0x%lx)",
                       (ulong) scanner, LSN_IN_PARTS(lsn)));
5238
  DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
unknown's avatar
unknown committed
5239
  DBUG_ASSERT(translog_inited == 1);
unknown's avatar
unknown committed
5240 5241 5242 5243

  data.addr= &scanner->page_addr;
  data.was_recovered= 0;

5244
  scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
5245 5246

  scanner->fixed_horizon= fixed_horizon;
5247 5248
  scanner->use_direct_link= use_direct;
  scanner->direct_link= NULL;
5249

5250
  scanner->horizon= translog_get_horizon();
unknown's avatar
unknown committed
5251
  DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)",
unknown's avatar
unknown committed
5252
                      LSN_IN_PARTS(scanner->horizon)));
5253 5254

  /* lsn < horizon */
5255
  DBUG_ASSERT(lsn < scanner->horizon);
5256

5257 5258
  scanner->page_addr= lsn;
  scanner->page_addr-= scanner->page_offset; /*decrease offset */
5259 5260 5261 5262

  if (translog_scanner_set_last_page(scanner))
    DBUG_RETURN(1);

5263
  if (translog_scanner_get_page(scanner))
5264 5265 5266 5267 5268
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


5269 5270 5271 5272 5273 5274 5275 5276
/**
  @brief Destroy scanner object;

  @param scanner         The scanner object to destroy
*/

void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
{
5277 5278
  DBUG_ENTER("translog_destroy_scanner");
  DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
5279
  translog_free_link(scanner->direct_link);
5280
  DBUG_VOID_RETURN;
5281 5282 5283
}


5284 5285 5286 5287 5288 5289 5290 5291
/*
  Checks End of the Log

  SYNOPSIS
    translog_scanner_eol()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5292 5293
    1  End of the Log
    0  OK
5294
*/
5295

unknown's avatar
unknown committed
5296
static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
5297 5298 5299
{
  DBUG_ENTER("translog_scanner_eol");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
5300
             ("Horizon: (%lu, 0x%lx)  Current: (%lu, 0x%lx+0x%x=0x%lx)",
unknown's avatar
unknown committed
5301 5302
              LSN_IN_PARTS(scanner->horizon),
              LSN_IN_PARTS(scanner->page_addr),
5303
              (uint) scanner->page_offset,
5304 5305 5306
              (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
  if (scanner->horizon > (scanner->page_addr +
                          scanner->page_offset))
5307 5308 5309 5310 5311 5312 5313 5314 5315
  {
    DBUG_PRINT("info", ("Horizon is not reached"));
    DBUG_RETURN(0);
  }
  if (scanner->fixed_horizon)
  {
    DBUG_PRINT("info", ("Horizon is fixed and reached"));
    DBUG_RETURN(1);
  }
5316
  scanner->horizon= translog_get_horizon();
5317 5318
  DBUG_PRINT("info",
             ("Horizon is re-read, EOL: %d",
5319 5320 5321 5322
              scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset)));
  DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset));
5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333
}


/*
  Cheks End of the Page

  SYNOPSIS
    translog_scanner_eop()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5334 5335
    1  End of the Page
    0  OK
5336
*/
5337

unknown's avatar
unknown committed
5338
static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354
{
  DBUG_ENTER("translog_scanner_eop");
  DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
              scanner->page[scanner->page_offset] == 0);
}


/*
  Checks End of the File (I.e. we are scanning last page, which do not
  mean end of this page)

  SYNOPSIS
    translog_scanner_eof()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5355 5356
    1  End of the File
    0  OK
5357
*/
5358

unknown's avatar
unknown committed
5359
static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
5360 5361
{
  DBUG_ENTER("translog_scanner_eof");
5362 5363
  DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
              LSN_FILE_NO(scanner->last_file_page));
unknown's avatar
unknown committed
5364 5365
  DBUG_PRINT("enter", ("curr Page: 0x%lx  last page: 0x%lx  "
                       "normal EOF: %d",
5366 5367 5368 5369
                       (ulong) LSN_OFFSET(scanner->page_addr),
                       (ulong) LSN_OFFSET(scanner->last_file_page),
                       LSN_OFFSET(scanner->page_addr) ==
                       LSN_OFFSET(scanner->last_file_page)));
5370 5371 5372 5373
  /*
     TODO: detect damaged file EOF,
     TODO: issue warning if damaged file EOF detected
  */
5374 5375
  DBUG_RETURN(scanner->page_addr ==
              scanner->last_file_page);
5376 5377 5378 5379 5380 5381 5382 5383 5384 5385
}

/*
  Move scanner to the next chunk

  SYNOPSIS
    translog_get_next_chunk()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5386 5387
    0  OK
    1  Error
5388 5389
*/

unknown's avatar
unknown committed
5390 5391
static my_bool
translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
5392
{
unknown's avatar
unknown committed
5393
  uint16 len;
5394
  DBUG_ENTER("translog_get_next_chunk");
unknown's avatar
unknown committed
5395 5396 5397

  if ((len= translog_get_total_chunk_length(scanner->page,
                                            scanner->page_offset)) == 0)
5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408
    DBUG_RETURN(1);
  scanner->page_offset+= len;

  if (translog_scanner_eol(scanner))
  {
    scanner->page= &end_of_log;
    scanner->page_offset= 0;
    DBUG_RETURN(0);
  }
  if (translog_scanner_eop(scanner))
  {
5409 5410
    /* before reading next page we should unpin current one if it was pinned */
    translog_free_link(scanner->direct_link);
5411 5412
    if (translog_scanner_eof(scanner))
    {
unknown's avatar
unknown committed
5413
      DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  pageaddr: (%lu,0x%lx)",
unknown's avatar
unknown committed
5414 5415
                          LSN_IN_PARTS(scanner->horizon),
                          LSN_IN_PARTS(scanner->page_addr)));
5416
      /* if it is log end it have to be caught before */
5417 5418 5419 5420 5421
      DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
                  LSN_FILE_NO(scanner->page_addr));
      scanner->page_addr+= LSN_ONE_FILE;
      scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
                                             TRANSLOG_PAGE_SIZE);
5422 5423 5424 5425 5426
      if (translog_scanner_set_last_page(scanner))
        DBUG_RETURN(1);
    }
    else
    {
5427
      scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
5428
    }
unknown's avatar
unknown committed
5429

5430
    if (translog_scanner_get_page(scanner))
unknown's avatar
unknown committed
5431 5432
      DBUG_RETURN(1);

5433 5434 5435 5436 5437 5438 5439
    scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
    if (translog_scanner_eol(scanner))
    {
      scanner->page= &end_of_log;
      scanner->page_offset= 0;
      DBUG_RETURN(0);
    }
unknown's avatar
unknown committed
5440
    DBUG_ASSERT(scanner->page[scanner->page_offset]);
5441 5442 5443 5444 5445
  }
  DBUG_RETURN(0);
}


5446 5447
/**
   @brief Get header of variable length record and call hook for it processing
5448

5449 5450 5451 5452 5453
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
   @param scanner         If present should be moved to the header page if
5454 5455 5456
                          it differ from LSN page

   @return                Length of header or operation status
5457 5458 5459 5460
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
5461 5462
*/

unknown's avatar
unknown committed
5463 5464 5465 5466
static int
translog_variable_length_header(uchar *page, translog_size_t page_offset,
                                TRANSLOG_HEADER_BUFFER *buff,
                                TRANSLOG_SCANNER_DATA *scanner)
5467
{
unknown's avatar
unknown committed
5468 5469
  struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
                                               buff->type);
unknown's avatar
unknown committed
5470 5471
  uchar *src= page + page_offset + 1 + 2;
  uchar *dst= buff->header;
5472
  LSN base_lsn;
unknown's avatar
unknown committed
5473
  uint lsns= desc->compressed_LSN;
5474
  uint16 chunk_len;
5475
  uint16 length= desc->read_header_len;
5476 5477
  uint16 buffer_length= length;
  uint16 body_len;
unknown's avatar
unknown committed
5478
  TRANSLOG_SCANNER_DATA internal_scanner;
5479 5480 5481 5482
  DBUG_ENTER("translog_variable_length_header");

  buff->record_length= translog_variable_record_1group_decode_len(&src);
  chunk_len= uint2korr(src);
unknown's avatar
unknown committed
5483
  DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  length: %u  bufflen: %u",
5484 5485 5486 5487 5488 5489 5490 5491 5492 5493
                      (ulong) buff->record_length, (uint) chunk_len,
                      (uint) length, (uint) buffer_length));
  if (chunk_len == 0)
  {
    uint16 page_rest;
    DBUG_PRINT("info", ("1 group"));
    src+= 2;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);

    base_lsn= buff->lsn;
unknown's avatar
unknown committed
5494
    body_len= min(page_rest, buff->record_length);
5495 5496 5497 5498 5499 5500 5501 5502 5503
  }
  else
  {
    uint grp_no, curr;
    uint header_to_skip;
    uint16 page_rest;

    DBUG_PRINT("info", ("multi-group"));
    grp_no= buff->groups_no= uint2korr(src + 2);
unknown's avatar
unknown committed
5504 5505 5506
    if (!(buff->groups=
          (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
                                      MYF(0))))
5507
      DBUG_RETURN(RECHEADER_READ_ERROR);
5508 5509 5510 5511 5512 5513 5514 5515 5516
    DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
    src+= (2 + 2);
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    curr= 0;
    header_to_skip= src - (page + page_offset);
    buff->chunk0_pages= 0;

    for (;;)
    {
unknown's avatar
unknown committed
5517
      uint i, read= grp_no;
5518 5519 5520 5521

      buff->chunk0_pages++;
      if (page_rest < grp_no * (7 + 1))
        read= page_rest / (7 + 1);
unknown's avatar
unknown committed
5522 5523
      DBUG_PRINT("info", ("Read chunk0 page#%u  read: %u  left: %u  "
                          "start from: %u",
5524 5525 5526 5527
                          buff->chunk0_pages, read, grp_no, curr));
      for (i= 0; i < read; i++, curr++)
      {
        DBUG_ASSERT(curr < buff->groups_no);
unknown's avatar
unknown committed
5528
        buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
5529
        buff->groups[curr].num= src[i * (7 + 1) + 7];
unknown's avatar
unknown committed
5530
        DBUG_PRINT("info", ("group #%u (%lu,0x%lx)  chunks: %u",
5531
                            curr,
unknown's avatar
unknown committed
5532
                            LSN_IN_PARTS(buff->groups[curr].addr),
5533 5534 5535 5536 5537 5538 5539 5540
                            (uint) buff->groups[curr].num));
      }
      grp_no-= read;
      if (grp_no == 0)
      {
        if (scanner)
        {
          buff->chunk0_data_addr= scanner->page_addr;
5541
          buff->chunk0_data_addr+= (page_offset + header_to_skip +
unknown's avatar
unknown committed
5542
                                    read * (7 + 1)); /* offset increased */
5543 5544 5545 5546
        }
        else
        {
          buff->chunk0_data_addr= buff->lsn;
5547
          /* offset increased */
unknown's avatar
unknown committed
5548
          buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1));
5549
        }
unknown's avatar
unknown committed
5550 5551
        buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1);
        DBUG_PRINT("info", ("Data address: (%lu,0x%lx)  len: %u",
unknown's avatar
unknown committed
5552
                            LSN_IN_PARTS(buff->chunk0_data_addr),
5553 5554 5555 5556 5557
                            buff->chunk0_data_len));
        break;
      }
      if (scanner == NULL)
      {
unknown's avatar
unknown committed
5558
        DBUG_PRINT("info", ("use internal scanner for header reading"));
5559
        scanner= &internal_scanner;
5560
        if (translog_init_scanner(buff->lsn, 1, scanner, 0))
5561
          DBUG_RETURN(RECHEADER_READ_ERROR);
5562
      }
5563 5564
      if (translog_get_next_chunk(scanner))
        DBUG_RETURN(RECHEADER_READ_ERROR);
5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577
      page= scanner->page;
      page_offset= scanner->page_offset;
      src= page + page_offset + header_to_skip;
      chunk_len= uint2korr(src - 2 - 2);
      DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
      page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    }

    if (scanner == NULL)
    {
      DBUG_PRINT("info", ("use internal scanner"));
      scanner= &internal_scanner;
    }
5578 5579 5580 5581
    else
    {
      translog_destroy_scanner(scanner);
    }
5582
    base_lsn= buff->groups[0].addr;
5583
    translog_init_scanner(base_lsn, 1, scanner, scanner == &internal_scanner);
5584 5585 5586 5587 5588 5589
    /* first group chunk is always chunk type 2 */
    page= scanner->page;
    page_offset= scanner->page_offset;
    src= page + page_offset + 1;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    body_len= page_rest;
5590 5591
    if (scanner == &internal_scanner)
      translog_destroy_scanner(scanner);
5592 5593 5594
  }
  if (lsns)
  {
unknown's avatar
unknown committed
5595
    uchar *start= src;
5596
    src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
unknown's avatar
unknown committed
5597
    lsns*= LSN_STORE_SIZE;
5598 5599 5600
    dst+= lsns;
    length-= lsns;
    buff->record_length+= (buff->compressed_LSN_economy=
5601 5602
                           (lsns - (src - start)));
    DBUG_PRINT("info", ("lsns: %u  length: %u  economy: %d  new length: %lu",
unknown's avatar
unknown committed
5603
                        lsns / LSN_STORE_SIZE, (uint) length,
5604
                        (int) buff->compressed_LSN_economy,
5605 5606 5607 5608 5609 5610 5611 5612
                        (ulong) buff->record_length));
    body_len-= (src - start);
  }
  else
    buff->compressed_LSN_economy= 0;

  DBUG_ASSERT(body_len >= length);
  body_len-= length;
unknown's avatar
unknown committed
5613
  memcpy(dst, src, length);
5614 5615
  buff->non_header_data_start_offset= src + length - page;
  buff->non_header_data_len= body_len;
unknown's avatar
unknown committed
5616
  DBUG_PRINT("info", ("non_header_data_start_offset: %u  len: %u  buffer: %u",
5617 5618 5619 5620 5621 5622
                      buff->non_header_data_start_offset,
                      buff->non_header_data_len, buffer_length));
  DBUG_RETURN(buffer_length);
}


5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637
/**
   @brief Read record header from the given buffer

   @param page            page content buffer
   @param page_offset     offset of the chunk in the page
   @param buff            destination buffer
   @param scanner         If this is set the scanner will be moved to the
                          record header page (differ from LSN page in case of
                          multi-group records)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where 
                                   stored decoded part of the header
5638 5639
*/

5640 5641 5642 5643
int translog_read_record_header_from_buffer(uchar *page,
                                            uint16 page_offset,
                                            TRANSLOG_HEADER_BUFFER *buff,
                                            TRANSLOG_SCANNER_DATA *scanner)
5644
{
unknown's avatar
unknown committed
5645
  translog_size_t res;
5646 5647 5648 5649 5650
  DBUG_ENTER("translog_read_record_header_from_buffer");
  DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_LSN ||
              (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_FIXED);
unknown's avatar
unknown committed
5651
  DBUG_ASSERT(translog_inited == 1);
5652 5653
  buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
  buff->short_trid= uint2korr(page + page_offset + 1);
5654
  DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)",
5655
                      (uint) buff->type, (uint)buff->short_trid,
unknown's avatar
unknown committed
5656
                      LSN_IN_PARTS(buff->lsn)));
5657
  /* Read required bytes from the header and call hook */
unknown's avatar
unknown committed
5658
  switch (log_record_type_descriptor[buff->type].class) {
5659
  case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
5660 5661 5662
    res= translog_variable_length_header(page, page_offset, buff,
                                         scanner);
    break;
5663 5664
  case LOGRECTYPE_PSEUDOFIXEDLENGTH:
  case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
5665 5666
    res= translog_fixed_length_header(page, page_offset, buff);
    break;
5667
  default:
unknown's avatar
unknown committed
5668
    DBUG_ASSERT(0); /* we read some junk (got no LSN) */
5669
    res= RECHEADER_READ_ERROR;
5670
  }
unknown's avatar
unknown committed
5671
  DBUG_RETURN(res);
5672 5673 5674
}


5675 5676 5677
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
5678

5679 5680
   @param lsn             log record serial number (address of the record)
   @param buff            log record header buffer
5681

5682 5683 5684 5685 5686 5687 5688 5689 5690 5691
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
5692 5693
*/

5694
int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
5695
{
unknown's avatar
unknown committed
5696
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
5697
  translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
unknown's avatar
unknown committed
5698
  PAGECACHE_BLOCK_LINK *direct_link;
unknown's avatar
unknown committed
5699 5700
  TRANSLOG_ADDRESS addr;
  TRANSLOG_VALIDATOR_DATA data;
5701
  DBUG_ENTER("translog_read_record_header");
unknown's avatar
unknown committed
5702
  DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", LSN_IN_PARTS(lsn)));
5703
  DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
unknown's avatar
unknown committed
5704
  DBUG_ASSERT(translog_inited == 1);
5705

5706
  buff->lsn= lsn;
5707
  buff->groups_no= 0;
unknown's avatar
unknown committed
5708 5709 5710 5711
  data.addr= &addr;
  data.was_recovered= 0;
  addr= lsn;
  addr-= page_offset; /* offset decreasing */
5712 5713
  res= (!(page= translog_get_page(&data, buffer, &direct_link))) ?
    RECHEADER_READ_ERROR :
unknown's avatar
unknown committed
5714
    translog_read_record_header_from_buffer(page, page_offset, buff, 0);
5715
  translog_free_link(direct_link);
unknown's avatar
unknown committed
5716
  DBUG_RETURN(res);
5717 5718 5719
}


5720 5721 5722
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
5723

5724 5725 5726
   @param scan            scanner position to read
   @param buff            log record header buffer
   @param move_scanner    request to move scanner to the header position
5727

5728 5729 5730 5731 5732 5733 5734 5735 5736 5737
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where stored
                                   decoded part of the header
5738 5739
*/

5740 5741 5742
int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff,
                                     my_bool move_scanner)
5743
{
unknown's avatar
unknown committed
5744
  translog_size_t res;
5745
  DBUG_ENTER("translog_read_record_header_scan");
unknown's avatar
unknown committed
5746 5747
  DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                       "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed %d",
unknown's avatar
unknown committed
5748 5749 5750
                       LSN_IN_PARTS(scanner->page_addr),
                       LSN_IN_PARTS(scanner->horizon),
                       LSN_IN_PARTS(scanner->last_file_page),
5751 5752
                       (uint) scanner->page_offset,
                       (uint) scanner->page_offset, scanner->fixed_horizon));
unknown's avatar
unknown committed
5753
  DBUG_ASSERT(translog_inited == 1);
5754 5755
  buff->groups_no= 0;
  buff->lsn= scanner->page_addr;
5756
  buff->lsn+= scanner->page_offset; /* offset increasing */
unknown's avatar
unknown committed
5757 5758 5759 5760 5761 5762
  res= translog_read_record_header_from_buffer(scanner->page,
                                               scanner->page_offset,
                                               buff,
                                               (move_scanner ?
                                                scanner : 0));
  DBUG_RETURN(res);
5763 5764 5765
}


5766 5767 5768
/**
   @brief Read record header and some fixed part of the next record (the part
   depend on record type).
5769

5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781
   @param scanner         data for scanning if lsn is NULL scanner data
                          will be used for continue scanning.
                          The scanner can be NULL.

   @param buff            log record header buffer

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval RECHEADER_READ_EOF    EOF
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
5782
*/
5783

5784 5785
int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff)
5786 5787
{
  uint8 chunk_type;
unknown's avatar
unknown committed
5788
  translog_size_t res;
5789
  buff->groups_no= 0;        /* to be sure that we will free it right */
5790 5791 5792

  DBUG_ENTER("translog_read_next_record_header");
  DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
unknown's avatar
unknown committed
5793 5794
  DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
unknown's avatar
unknown committed
5795 5796 5797
                      LSN_IN_PARTS(scanner->page_addr),
                      LSN_IN_PARTS(scanner->horizon),
                      LSN_IN_PARTS(scanner->last_file_page),
5798 5799
                      (uint) scanner->page_offset,
                      (uint) scanner->page_offset, scanner->fixed_horizon));
unknown's avatar
unknown committed
5800
  DBUG_ASSERT(translog_inited == 1);
5801 5802 5803 5804

  do
  {
    if (translog_get_next_chunk(scanner))
5805
      DBUG_RETURN(RECHEADER_READ_ERROR);
5806
    chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
unknown's avatar
unknown committed
5807
    DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
5808 5809 5810 5811 5812 5813 5814
                        (uint) scanner->page[scanner->page_offset]));
  } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type !=
           TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0);

  if (scanner->page[scanner->page_offset] == 0)
  {
    /* Last record was read */
unknown's avatar
unknown committed
5815
    buff->lsn= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
5816
    /* Return 'end of log' marker */
5817
    res= RECHEADER_READ_EOF;
5818
  }
unknown's avatar
unknown committed
5819 5820 5821
  else
    res= translog_read_record_header_scan(scanner, buff, 0);
  DBUG_RETURN(res);
5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833
}


/*
  Moves record data reader to the next chunk and fill the data reader
  information about that chunk.

  SYNOPSIS
    translog_record_read_next_chunk()
    data                 data cursor

  RETURN
unknown's avatar
unknown committed
5834 5835
    0  OK
    1  Error
5836
*/
5837

5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858
static my_bool translog_record_read_next_chunk(struct st_translog_reader_data
                                               *data)
{
  translog_size_t new_current_offset= data->current_offset + data->chunk_size;
  uint16 chunk_header_len, chunk_len;
  uint8 type;
  DBUG_ENTER("translog_record_read_next_chunk");

  if (data->eor)
  {
    DBUG_PRINT("info", ("end of the record flag set"));
    DBUG_RETURN(1);
  }

  if (data->header.groups_no &&
      data->header.groups_no - 1 != data->current_group &&
      data->header.groups[data->current_group].num == data->current_chunk)
  {
    /* Goto next group */
    data->current_group++;
    data->current_chunk= 0;
unknown's avatar
unknown committed
5859
    DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
5860
    translog_destroy_scanner(&data->scanner);
5861
    translog_init_scanner(data->header.groups[data->current_group].addr,
5862
                          1, &data->scanner, 1);
5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874
  }
  else
  {
    data->current_chunk++;
    if (translog_get_next_chunk(&data->scanner))
      DBUG_RETURN(1);
  }
  type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;

  if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
5875
               ("Last chunk: data len: %u  offset: %u  group: %u of %u",
5876 5877 5878
                data->header.chunk0_data_len, data->scanner.page_offset,
                data->current_group, data->header.groups_no - 1));
    DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
5879 5880
    DBUG_ASSERT(data->header.lsn ==
                data->scanner.page_addr + data->scanner.page_offset);
5881 5882
    translog_destroy_scanner(&data->scanner);
    translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner, 1);
5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903
    data->chunk_size= data->header.chunk0_data_len;
    data->body_offset= data->scanner.page_offset;
    data->current_offset= new_current_offset;
    data->eor= 1;
    DBUG_RETURN(0);
  }

  if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
  {
    data->eor= 1;
    DBUG_RETURN(1);                             /* End of record */
  }

  chunk_header_len=
    translog_get_chunk_header_length(data->scanner.page,
                                     data->scanner.page_offset);
  chunk_len= translog_get_total_chunk_length(data->scanner.page,
                                             data->scanner.page_offset);
  data->chunk_size= chunk_len - chunk_header_len;
  data->body_offset= data->scanner.page_offset + chunk_header_len;
  data->current_offset= new_current_offset;
unknown's avatar
unknown committed
5904 5905
  DBUG_PRINT("info", ("grp: %u  chunk: %u  body_offset: %u  chunk_size: %u  "
                      "current_offset: %lu",
5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922
                      (uint) data->current_group,
                      (uint) data->current_chunk,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


/*
  Initialize record reader data from LSN

  SYNOPSIS
    translog_init_reader_data()
    lsn                  reference to LSN we should start from
    data                 reader data to initialize

  RETURN
unknown's avatar
unknown committed
5923 5924
    0  OK
    1  Error
5925 5926
*/

5927
static my_bool translog_init_reader_data(LSN lsn,
5928 5929
                                         struct st_translog_reader_data *data)
{
unknown's avatar
unknown committed
5930
  int read_header;
5931
  DBUG_ENTER("translog_init_reader_data");
5932
  if (translog_init_scanner(lsn, 1, &data->scanner, 1) ||
unknown's avatar
unknown committed
5933 5934 5935
      ((read_header=
        translog_read_record_header_scan(&data->scanner, &data->header, 1))
       == RECHEADER_READ_ERROR))
5936
    DBUG_RETURN(1);
unknown's avatar
unknown committed
5937
  data->read_header= read_header;
5938 5939 5940 5941 5942 5943
  data->body_offset= data->header.non_header_data_start_offset;
  data->chunk_size= data->header.non_header_data_len;
  data->current_offset= data->read_header;
  data->current_group= 0;
  data->current_chunk= 0;
  data->eor= 0;
unknown's avatar
unknown committed
5944 5945
  DBUG_PRINT("info", ("read_header: %u  "
                      "body_offset: %u  chunk_size: %u  current_offset: %lu",
5946 5947 5948 5949 5950 5951 5952
                      (uint) data->read_header,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


5953 5954 5955 5956 5957 5958 5959 5960 5961 5962
/**
  @brief Destroy reader data object
*/

static void translog_destroy_reader_data(struct st_translog_reader_data *data)
{
  translog_destroy_scanner(&data->scanner);
}


5963 5964 5965 5966 5967 5968
/*
  Read a part of the record.

  SYNOPSIS
    translog_read_record_header()
    lsn                  log record serial number (address of the record)
unknown's avatar
unknown committed
5969
    offset               From the beginning of the record beginning (read
5970
                         by translog_read_record_header).
unknown's avatar
unknown committed
5971 5972
    length               Length of record part which have to be read.
    buffer               Buffer where to read the record part (have to be at
5973 5974 5975 5976 5977 5978
                         least 'length' bytes length)

  RETURN
    length of data actually read
*/

5979
translog_size_t translog_read_record(LSN lsn,
5980 5981
                                     translog_size_t offset,
                                     translog_size_t length,
unknown's avatar
unknown committed
5982
                                     uchar *buffer,
5983 5984 5985 5986 5987 5988
                                     struct st_translog_reader_data *data)
{
  translog_size_t requested_length= length;
  translog_size_t end= offset + length;
  struct st_translog_reader_data internal_data;
  DBUG_ENTER("translog_read_record");
unknown's avatar
unknown committed
5989
  DBUG_ASSERT(translog_inited == 1);
5990 5991 5992

  if (data == NULL)
  {
unknown's avatar
unknown committed
5993
    DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
5994 5995 5996 5997 5998 5999 6000 6001 6002
    data= &internal_data;
  }
  if (lsn ||
      (offset < data->current_offset &&
       !(offset < data->read_header && offset + length < data->read_header)))
  {
    if (translog_init_reader_data(lsn, data))
      DBUG_RETURN(0);
  }
unknown's avatar
unknown committed
6003 6004 6005
  DBUG_PRINT("info", ("Offset: %lu  length: %lu  "
                      "Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
6006
                      (ulong) offset, (ulong) length,
unknown's avatar
unknown committed
6007 6008 6009
                      LSN_IN_PARTS(data->scanner.page_addr),
                      LSN_IN_PARTS(data->scanner.horizon),
                      LSN_IN_PARTS(data->scanner.last_file_page),
6010 6011 6012 6013 6014
                      (uint) data->scanner.page_offset,
                      (uint) data->scanner.page_offset,
                      data->scanner.fixed_horizon));
  if (offset < data->read_header)
  {
unknown's avatar
unknown committed
6015
    uint16 len= min(data->read_header, end) - offset;
6016
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6017
               ("enter header offset: %lu  length: %lu",
6018
                (ulong) offset, (ulong) length));
unknown's avatar
unknown committed
6019
    memcpy(buffer, data->header.header + offset, len);
6020 6021
    length-= len;
    if (length == 0)
6022 6023
    {
      translog_destroy_reader_data(data);
6024
      DBUG_RETURN(requested_length);
6025
    }
6026 6027 6028
    offset+= len;
    buffer+= len;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6029
               ("len: %u  offset: %lu   curr: %lu  length: %lu",
6030 6031 6032 6033 6034 6035
                len, (ulong) offset, (ulong) data->current_offset,
                (ulong) length));
  }
  /* TODO: find first page which we should read by offset */

  /* read the record chunk by chunk */
unknown's avatar
unknown committed
6036
  for(;;)
6037 6038 6039
  {
    uint page_end= data->current_offset + data->chunk_size;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6040 6041
               ("enter body offset: %lu  curr: %lu  "
                "length: %lu  page_end: %lu",
6042 6043 6044 6045 6046
                (ulong) offset, (ulong) data->current_offset, (ulong) length,
                (ulong) page_end));
    if (offset < page_end)
    {
      uint len= page_end - offset;
unknown's avatar
unknown committed
6047
      set_if_smaller(len, length); /* in case we read beyond record's end */
unknown's avatar
unknown committed
6048 6049
      DBUG_ASSERT(offset >= data->current_offset);
      memcpy(buffer,
6050 6051 6052 6053
              data->scanner.page + data->body_offset +
              (offset - data->current_offset), len);
      length-= len;
      if (length == 0)
6054 6055
      {
        translog_destroy_reader_data(data);
6056
        DBUG_RETURN(requested_length);
6057
      }
6058 6059 6060
      offset+= len;
      buffer+= len;
      DBUG_PRINT("info",
unknown's avatar
unknown committed
6061
                 ("len: %u  offset: %lu  curr: %lu  length: %lu",
6062 6063 6064 6065
                  len, (ulong) offset, (ulong) data->current_offset,
                  (ulong) length));
    }
    if (translog_record_read_next_chunk(data))
6066 6067
    {
      translog_destroy_reader_data(data);
6068
      DBUG_RETURN(requested_length - length);
6069
    }
unknown's avatar
unknown committed
6070
  }
6071 6072 6073 6074
}


/*
6075
  @brief Force skipping to the next buffer
6076

6077 6078
  @todo Do not copy old page content if all page protections are switched off
  (because we do not need calculate something or change old parts of the page)
6079 6080 6081 6082
*/

static void translog_force_current_buffer_to_finish()
{
unknown's avatar
unknown committed
6083
  TRANSLOG_ADDRESS new_buff_beginning;
unknown's avatar
unknown committed
6084 6085 6086 6087
  uint16 old_buffer_no= log_descriptor.bc.buffer_no;
  uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
  struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
                                          new_buffer_no);
6088
  struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
6089
  uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6090 6091
  uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  uint16 current_page_fill, write_counter, previous_offset;
6092
  DBUG_ENTER("translog_force_current_buffer_to_finish");
unknown's avatar
unknown committed
6093 6094 6095 6096
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size: %lu (%lu)  Pg: %u  left: %u",
6097 6098
                       (uint) log_descriptor.bc.buffer_no,
                       (ulong) log_descriptor.bc.buffer,
unknown's avatar
unknown committed
6099
                       LSN_IN_PARTS(log_descriptor.bc.buffer->offset),
6100 6101
                       (ulong) LSN_FILE_NO(log_descriptor.horizon),
                       (ulong) (LSN_OFFSET(log_descriptor.horizon) -
unknown's avatar
unknown committed
6102
                                log_descriptor.bc.current_page_fill),
6103 6104 6105
                       (ulong) log_descriptor.bc.buffer->size,
                       (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                buffer->buffer),
unknown's avatar
unknown committed
6106
                       (uint) log_descriptor.bc.current_page_fill,
6107
                       (uint) left));
unknown's avatar
unknown committed
6108

unknown's avatar
unknown committed
6109 6110 6111
  LINT_INIT(current_page_fill);
  new_buff_beginning= log_descriptor.bc.buffer->offset;
  new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */
unknown's avatar
unknown committed
6112

6113
  DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
6114 6115
  DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
              LSN_FILE_NO(log_descriptor.bc.buffer->offset));
unknown's avatar
unknown committed
6116
  DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
unknown's avatar
unknown committed
6117 6118
  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
6119 6120 6121 6122 6123
  {
    /*
       TODO: if 'left' is so small that can't hold any other record
       then do not move the page
    */
unknown's avatar
unknown committed
6124
    DBUG_PRINT("info", ("left: %u", (uint) left));
6125

6126
    /* decrease offset */
unknown's avatar
unknown committed
6127
    new_buff_beginning-= log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6128
    current_page_fill= log_descriptor.bc.current_page_fill;
6129 6130 6131

    bzero(log_descriptor.bc.ptr, left);
    log_descriptor.bc.buffer->size+= left;
unknown's avatar
unknown committed
6132
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
6133 6134 6135 6136 6137 6138 6139 6140 6141
                        "Size: %lu",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        (ulong) log_descriptor.bc.buffer->size));
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
  }
  else
  {
unknown's avatar
unknown committed
6142
    log_descriptor.bc.current_page_fill= 0;
6143 6144 6145 6146 6147
  }

  translog_buffer_lock(new_buffer);
  translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
6148 6149 6150
  write_counter= log_descriptor.bc.write_counter;
  previous_offset= log_descriptor.bc.previous_offset;
  translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
unknown's avatar
unknown committed
6151
  /* Fix buffer offset (which was incorrectly set to horizon) */
unknown's avatar
unknown committed
6152
  log_descriptor.bc.buffer->offset= new_buff_beginning;
unknown's avatar
unknown committed
6153 6154
  log_descriptor.bc.write_counter= write_counter;
  log_descriptor.bc.previous_offset= previous_offset;
6155

unknown's avatar
unknown committed
6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179
  /*
    Advances this log pointer, increases writers and let other threads to
    write to the log while we process old page content
  */
  if (left)
  {
    log_descriptor.bc.ptr+= current_page_fill;
    log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
      current_page_fill;
    new_buffer->overlay= old_buffer;
  }
  else
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  translog_buffer_increase_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

  /*
    We have to wait until all writers finish before start changing the
    pages by applying protection and copying the page content in the
    new buffer.
  */
  translog_wait_for_writers(old_buffer);


unknown's avatar
unknown committed
6180
  if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
6181 6182 6183 6184 6185
  {
    translog_put_sector_protection(data, &log_descriptor.bc);
    if (left)
    {
      log_descriptor.bc.write_counter++;
unknown's avatar
unknown committed
6186
      log_descriptor.bc.previous_offset= current_page_fill;
6187 6188 6189 6190 6191 6192 6193 6194 6195
    }
    else
    {
      DBUG_PRINT("info", ("drop write_counter"));
      log_descriptor.bc.write_counter= 0;
      log_descriptor.bc.previous_offset= 0;
    }
  }

unknown's avatar
unknown committed
6196
  if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
6197
  {
unknown's avatar
unknown committed
6198 6199 6200
    uint32 crc= translog_crc(data + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
6201 6202 6203 6204 6205 6206
    DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
    int4store(data + 3 + 3 + 1, crc);
  }

  if (left)
  {
6207
    /*
unknown's avatar
unknown committed
6208
      TODO: do not copy beginning of the page if we have no CRC or sector
6209 6210
      checks on
    */
unknown's avatar
unknown committed
6211
    memcpy(new_buffer->buffer, data, current_page_fill);
6212
  }
6213
  old_buffer->next_buffer_offset= new_buffer->offset;
6214

unknown's avatar
unknown committed
6215 6216 6217 6218
  translog_buffer_lock(new_buffer);
  translog_buffer_decrease_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

6219 6220 6221
  DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
6222

6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247
/**
   @brief Flush the log up to given LSN (included)

   @param  lsn             log record serial number up to which (inclusive)
                           the log has to be flushed

   @return Operation status
     @retval 0      OK
     @retval 1      Error

  @todo LOG: when a log write fails, we should not write to this log anymore
  (if we add more log records to this log they will be unreadable: we will hit
  the broken log record): all translog_flush() should be made to fail (because
  translog_flush() is when a a transaction wants something durable and we
  cannot make anything durable as log is corrupted). For that, a "my_bool
  st_translog_descriptor::write_error" could be set to 1 when a
  translog_write_record() or translog_flush() fails, and translog_flush()
  would test this var (and translog_write_record() could also test this var if
  it wants, though it's not absolutely needed).
  Then, either shut Maria down immediately, or switch to a new log (but if we
  get write error after write error, that would create too many logs).
  A popular open-source transactional engine intentionally crashes as soon as
  a log flush fails (we however don't want to crash the entire mysqld, but
  stopping all engine's operations immediately would make sense).
  Same applies to translog_write_record().
6248 6249

  @todo: remove serialization and make group commit.
6250 6251
*/

6252
my_bool translog_flush(LSN lsn)
6253 6254 6255 6256 6257 6258
{
  LSN old_flushed, sent_to_file;
  int rc= 0;
  uint i;
  my_bool full_circle= 0;
  DBUG_ENTER("translog_flush");
unknown's avatar
unknown committed
6259
  DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
unknown's avatar
unknown committed
6260
  DBUG_ASSERT(translog_inited == 1);
6261

6262
  pthread_mutex_lock(&log_descriptor.log_flush_lock);
6263 6264 6265 6266
  translog_lock();
  old_flushed= log_descriptor.flushed;
  for (;;)
  {
unknown's avatar
unknown committed
6267 6268
    uint16 buffer_no= log_descriptor.bc.buffer_no;
    uint16 buffer_start= buffer_no;
6269 6270 6271
    struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
    struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
    /* we can't flush in future */
6272 6273
    DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0);
    if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
6274
    {
unknown's avatar
unknown committed
6275
      DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)",
unknown's avatar
unknown committed
6276
                          LSN_IN_PARTS(log_descriptor.flushed)));
unknown's avatar
unknown committed
6277
      translog_unlock();
6278
      goto out;
6279 6280
    }
    /* send to the file if it is not sent */
6281
    sent_to_file= translog_get_sent_to_file();
6282
    if (cmp_translog_addr(sent_to_file, lsn) >= 0)
6283 6284 6285 6286 6287 6288 6289 6290 6291
      break;

    do
    {
      buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
      buffer= log_descriptor.buffers + buffer_no;
      translog_buffer_lock(buffer);
      translog_buffer_unlock(buffer_unlock);
      buffer_unlock= buffer;
unknown's avatar
unknown committed
6292
      if (buffer->file != -1)
6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303
      {
        buffer_unlock= NULL;
        if (buffer_start == buffer_no)
        {
          /* we made a circle */
          full_circle= 1;
          translog_force_current_buffer_to_finish();
        }
        break;
      }
    } while ((buffer_start != buffer_no) &&
6304
             cmp_translog_addr(log_descriptor.flushed, lsn) < 0);
6305
    if (buffer_unlock != NULL && buffer_unlock != buffer)
6306
      translog_buffer_unlock(buffer_unlock);
unknown's avatar
unknown committed
6307
    rc= translog_buffer_flush(buffer);
6308
    translog_buffer_unlock(buffer);
unknown's avatar
unknown committed
6309
    if (rc)
6310 6311 6312 6313
    {
      rc= 1;
      goto out;
    }
unknown's avatar
unknown committed
6314
    translog_lock();
6315
  }
unknown's avatar
unknown committed
6316
  translog_unlock();
6317

6318
  for (i= LSN_FILE_NO(old_flushed); i <= LSN_FILE_NO(lsn); i++)
6319 6320 6321 6322
  {
    uint cache_index;
    File file;

6323 6324
    if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - i) <
        OPENED_FILES_NUM)
6325 6326
    {
      /* file in the cache */
unknown's avatar
unknown committed
6327
      if (log_descriptor.log_file_num[cache_index] == -1)
6328 6329
      {
        if ((log_descriptor.log_file_num[cache_index]=
unknown's avatar
unknown committed
6330
             open_logfile_by_number_no_cache(i)) == -1)
6331
        {
6332 6333
          rc= 1;
          goto out;
6334 6335 6336 6337 6338
        }
      }
      file= log_descriptor.log_file_num[cache_index];
      rc|= my_sync(file, MYF(MY_WME));
    }
unknown's avatar
unknown committed
6339
    /* We sync file when we are closing it => do nothing if file closed */
6340 6341
  }
  log_descriptor.flushed= sent_to_file;
6342
  /** @todo LOG decide if syncing of directory is needed */
6343
  rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
6344 6345
out:
  pthread_mutex_unlock(&log_descriptor.log_flush_lock);
6346 6347
  DBUG_RETURN(rc);
}
6348 6349


6350 6351 6352 6353 6354 6355 6356
/**
   @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact

   If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
   open MARIA_SHAREs), give it one and record this assignment in the log
   (LOGREC_FILE_ID log record).

unknown's avatar
unknown committed
6357
   @param  tbl_info        table
6358 6359 6360 6361 6362 6363 6364 6365 6366
   @param  trn             calling transaction

   @return Operation status
     @retval 0      OK
     @retval 1      Error

   @note Can be called even if share already has an id (then will do nothing)
*/

unknown's avatar
unknown committed
6367
int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
6368
{
unknown's avatar
unknown committed
6369
  MARIA_SHARE *share= tbl_info->s;
6370 6371 6372 6373 6374 6375 6376 6377 6378 6379
  /*
    If you give an id to a non-BLOCK_RECORD table, you also need to release
    this id somewhere. Then you can change the assertion.
  */
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  /* re-check under mutex to avoid having 2 ids for the same share */
  pthread_mutex_lock(&share->intern_lock);
  if (likely(share->id == 0))
  {
    /* Inspired by set_short_trid() of trnman.c */
unknown's avatar
unknown committed
6380 6381
    uint i= share->kfile.file % SHARE_ID_MAX + 1;
    do
6382
    {
unknown's avatar
unknown committed
6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396
      my_atomic_rwlock_wrlock(&LOCK_id_to_share);
      for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
      {
        void *tmp= NULL;
        if (id_to_share[i] == NULL &&
            my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
        {
          share->id= (uint16)i;
          break;
        }
      }
      my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
      i= 1; /* scan the whole array */
    } while (share->id == 0);
unknown's avatar
unknown committed
6397
    DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413
    LSN lsn;
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
    uchar log_data[FILEID_STORE_SIZE];
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    /*
      open_file_name is an unresolved name (symlinks are not resolved, datadir
      is not realpath-ed, etc) which is good: the log can be moved to another
      directory and continue working.
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name;
    /**
       @todo if we had the name's length in MARIA_SHARE we could avoid this
       strlen()
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
unknown's avatar
unknown committed
6414
      strlen(share->open_file_name) + 1;
unknown's avatar
unknown committed
6415
    if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
6416 6417 6418 6419
                                       sizeof(log_data) +
                                       log_array[TRANSLOG_INTERNAL_PARTS +
                                                 1].length,
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
6420
                                       log_array, log_data, NULL)))
6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441
      return 1;
  }
  pthread_mutex_unlock(&share->intern_lock);
  return 0;
}


/**
   @brief Recycles a MARIA_SHARE's short id.

   @param  share           table

   @note Must be called only if share has an id (i.e. id != 0)
*/

void translog_deassign_id_from_share(MARIA_SHARE *share)
{
  DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
                      (ulong)share, share->id));
  /*
    We don't need any mutex as we are called only when closing the last
unknown's avatar
unknown committed
6442 6443 6444
    instance of the table or at the end of REPAIR: no writes can be
    happening. But a Checkpoint may be reading share->id, so we require this
    mutex:
6445
  */
unknown's avatar
unknown committed
6446
  safe_mutex_assert_owner(&share->intern_lock);
6447 6448 6449
  my_atomic_rwlock_rdlock(&LOCK_id_to_share);
  my_atomic_storeptr((void **)&id_to_share[share->id], 0);
  my_atomic_rwlock_rdunlock(&LOCK_id_to_share);
unknown's avatar
unknown committed
6450
  share->id= 0;
unknown's avatar
unknown committed
6451 6452
  /* useless but safety: */
  share->lsn_of_file_id= LSN_IMPOSSIBLE;
6453
}
unknown's avatar
unknown committed
6454 6455


unknown's avatar
unknown committed
6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466
void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
                                               uint16 id)
{
  DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  DBUG_ASSERT(share->id == 0);
  DBUG_ASSERT(id_to_share[id] == NULL);
  id_to_share[share->id= id]= share;
}


6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480
/**
   @brief check if such log file exists

   @param file_no number of the file to test

   @retval 0 no such file
   @retval 1 there is file with such number
*/

my_bool translog_is_file(uint file_no)
{
  MY_STAT stat_buff;
  char path[FN_REFLEN];
  return (test(my_stat(translog_filename_by_fileno(file_no, path),
6481
                       &stat_buff, MYF(0))));
6482 6483 6484
}


unknown's avatar
unknown committed
6485
/**
6486
  @brief returns minimum log file number
unknown's avatar
unknown committed
6487

6488 6489 6490 6491 6492
  @param horizon         the end of the log
  @param is_protected    true if it is under purge_log protection

  @retval minimum file number
  @retval 0 no files found
unknown's avatar
unknown committed
6493 6494
*/

6495
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
unknown's avatar
unknown committed
6496
{
6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509
  uint min_file= 1, max_file;
  DBUG_ENTER("translog_first_file");
  if (!is_protected)
    pthread_mutex_lock(&log_descriptor.purger_lock);
  if (log_descriptor.min_file_number &&
      translog_is_file(log_descriptor.min_file_number))
  {
    DBUG_PRINT("info", ("cached %lu",
                        (ulong) log_descriptor.min_file_number));
    if (!is_protected)
      pthread_mutex_unlock(&log_descriptor.purger_lock);
    DBUG_RETURN(log_descriptor.min_file_number);
  }
6510

6511 6512 6513
  max_file= LSN_FILE_NO(horizon);

  if (MAKE_LSN(1, TRANSLOG_PAGE_SIZE) >= horizon)
6514 6515
  {
    /* there is no first page yet */
6516
    DBUG_RETURN(0);
6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531
  }

  /* binary search for last file */
  while (min_file != max_file && min_file != (max_file - 1))
  {
    uint test= (min_file + max_file) / 2;
    DBUG_PRINT("info", ("min_file: %u  test: %u  max_file: %u",
                        min_file, test, max_file));
    if (test == max_file)
      test--;
    if (translog_is_file(test))
      max_file= test;
    else
      min_file= test;
  }
6532 6533 6534 6535 6536 6537 6538
  log_descriptor.min_file_number= max_file;
  if (!is_protected)
    pthread_mutex_unlock(&log_descriptor.purger_lock);
  DBUG_RETURN(max_file);
}


6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553
/**
  @brief returns the most close LSN higher the given chunk address

  @param addr the chunk address to start from
  @param horizon the horizon if it is known or LSN_IMPOSSIBLE

  @retval LSN_ERROR Error
  @retval LSN_IMPOSSIBLE no LSNs after the address
  @retval # LSN of the most close LSN higher the given chunk address
*/

LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
{
  uint chunk_type;
  TRANSLOG_SCANNER_DATA scanner;
6554
  LSN result;
6555 6556 6557 6558 6559 6560 6561 6562
  DBUG_ENTER("translog_next_LSN");

  if (horizon == LSN_IMPOSSIBLE)
    horizon= translog_get_horizon();

  if (addr == horizon)
    DBUG_RETURN(LSN_IMPOSSIBLE);

6563
  translog_init_scanner(addr, 0, &scanner, 1);
6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577

  chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
  DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                      (uint) scanner.page[scanner.page_offset]));
  while (chunk_type != TRANSLOG_CHUNK_LSN &&
         chunk_type != TRANSLOG_CHUNK_FIXED &&
         scanner.page[scanner.page_offset] != 0)
  {
    if (translog_get_next_chunk(&scanner))
      DBUG_RETURN(LSN_ERROR);
    chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
    DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                        (uint) scanner.page[scanner.page_offset]));
  }
6578

6579
  if (scanner.page[scanner.page_offset] == 0)
6580 6581 6582 6583 6584
    result= LSN_IMPOSSIBLE; /* reached page filler */
  else
    result= scanner.page_addr + scanner.page_offset;
  translog_destroy_scanner(&scanner);
  DBUG_RETURN(result);
6585 6586
}

6587 6588 6589 6590
/**
   @brief returns the LSN of the first record starting in this log

   @retval LSN_ERROR Error
unknown's avatar
unknown committed
6591
   @retval LSN_IMPOSSIBLE no log or the log is empty
6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602
   @retval # LSN of the first record
*/

LSN translog_first_lsn_in_log()
{
  TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
  TRANSLOG_VALIDATOR_DATA data;
  uint file;
  uint16 chunk_offset;
  uchar *page;
  DBUG_ENTER("translog_first_lsn_in_log");
unknown's avatar
unknown committed
6603
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
unknown's avatar
unknown committed
6604
  DBUG_ASSERT(translog_inited == 1);
6605 6606 6607 6608 6609 6610

  if (!(file= translog_first_file(horizon, 0)))
  {
    /* log has no records yet */
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }
6611

6612
  addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
6613 6614
  data.addr= &addr;
  {
6615
    uchar buffer[TRANSLOG_PAGE_SIZE];
6616
    if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
6617
        (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
6618 6619
      DBUG_RETURN(LSN_ERROR);
  }
6620 6621 6622
  addr+= chunk_offset;

  DBUG_RETURN(translog_next_LSN(addr, horizon));
6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639
}


/**
   @brief returns theoretical first LSN if first log is present

   @retval LSN_ERROR Error
   @retval LSN_IMPOSSIBLE no log
   @retval # LSN of the first record
*/

LSN translog_first_theoretical_lsn()
{
  TRANSLOG_ADDRESS addr= translog_get_horizon();
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_first_theoretical_lsn");
unknown's avatar
unknown committed
6640
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
unknown's avatar
unknown committed
6641
  DBUG_ASSERT(translog_inited == 1);
6642 6643 6644 6645 6646

  if (!translog_is_file(1))
    DBUG_RETURN(LSN_IMPOSSIBLE);
  if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
  {
6647
    /* log has no records yet */
6648 6649 6650 6651 6652 6653
    DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                         log_descriptor.page_overhead));
  }

  addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
  data.addr= &addr;
6654
  if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
6655 6656 6657 6658
    DBUG_RETURN(LSN_ERROR);

  DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                       page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
unknown's avatar
unknown committed
6659
}
6660 6661 6662 6663 6664


/**
  @brief Check given low water mark and purge files if it is need

unknown's avatar
unknown committed
6665
  @param low the last (minimum) address which is need
6666 6667 6668 6669 6670

  @retval 0 OK
  @retval 1 Error
*/

unknown's avatar
unknown committed
6671
my_bool translog_purge(TRANSLOG_ADDRESS low)
6672 6673 6674 6675 6676
{
  uint32 last_need_file= LSN_FILE_NO(low);
  TRANSLOG_ADDRESS horizon= translog_get_horizon();
  int rc= 0;
  DBUG_ENTER("translog_purge");
unknown's avatar
unknown committed
6677
  DBUG_PRINT("enter", ("low: (%lu,0x%lx)", LSN_IN_PARTS(low)));
unknown's avatar
unknown committed
6678
  DBUG_ASSERT(translog_inited == 1);
6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710

  pthread_mutex_lock(&log_descriptor.purger_lock);
  if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
  {
    uint32 i;
    uint32 min_file= translog_first_file(horizon, 1);
    DBUG_ASSERT(min_file != 0); /* log is already started */

    for(i= min_file; i < last_need_file && rc == 0; i++)
    {
      LSN lsn= translog_get_file_max_lsn_stored(i);
      if (lsn == LSN_IMPOSSIBLE)
        break;   /* files are still in writing */
      if (lsn == LSN_ERROR)
      {
        rc= 1;
        break;
      }
      if (cmp_translog_addr(lsn, low) >= 0)
        break;
      DBUG_PRINT("info", ("purge file %lu", (ulong) i));
      {
        char path[FN_REFLEN], *file_name;
        file_name= translog_filename_by_fileno(i, path);
        rc= test(my_delete(file_name, MYF(MY_WME)));
      }
    }
  }

  pthread_mutex_unlock(&log_descriptor.purger_lock);
  DBUG_RETURN(rc);
}