trnman.c 12.2 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2006 MySQL AB
unknown's avatar
unknown committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */


#include <my_global.h>
#include <my_sys.h>
#include <lf.h>
#include "trnman.h"

unknown's avatar
unknown committed
23
/* status variables */
unknown's avatar
unknown committed
24 25
uint trnman_active_transactions, trnman_allocated_transactions;

unknown's avatar
unknown committed
26 27 28 29
/* list of active transactions in the trid order */
static TRN active_list_min, active_list_max;
/* list of committed transactions in the trid order */
static TRN committed_list_min, committed_list_max;
unknown's avatar
unknown committed
30

unknown's avatar
unknown committed
31
/* a counter, used to generate transaction ids */
unknown's avatar
unknown committed
32 33
static TrID global_trid_generator;

unknown's avatar
unknown committed
34 35 36 37 38 39 40 41
/* the mutex for everything above */
static pthread_mutex_t LOCK_trn_list;

/* LIFO pool of unused TRN structured for reuse */
static TRN *pool;

/* a hash for committed transactions that maps trid to a TRN structure */
static LF_HASH trid_to_committed_trn;
unknown's avatar
unknown committed
42

unknown's avatar
unknown committed
43 44 45 46
/* an array that maps short_trid of an active transaction to a TRN structure */
static TRN **short_trid_to_active_trn;

/* locks for short_trid_to_active_trn and pool */
unknown's avatar
unknown committed
47 48
static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool;

unknown's avatar
unknown committed
49 50
static LOCKMAN maria_lockman;

51 52 53 54 55
/*
  short transaction id is at the same time its identifier
  for a lock manager - its lock owner identifier (loid)
*/
#define short_id locks.loid
unknown's avatar
unknown committed
56

57 58 59 60 61 62
/*
  NOTE
    Just as short_id doubles as loid, this function doubles as
    short_trid_to_LOCK_OWNER. See the compile-time assert below.
*/
static TRN *short_trid_to_TRN(uint16 short_trid)
unknown's avatar
unknown committed
63 64
{
  TRN *trn;
65
  compile_time_assert(offsetof(TRN, locks) == 0);
unknown's avatar
unknown committed
66
  my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
unknown's avatar
unknown committed
67
  trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]);
unknown's avatar
unknown committed
68
  my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
69
  return (TRN *)trn;
unknown's avatar
unknown committed
70 71
}

72
static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused)
unknown's avatar
unknown committed
73
{
74 75 76
  *len= sizeof(TrID);
  return (byte *) & ((*((TRN **)trn))->trid);
}
unknown's avatar
unknown committed
77

78 79
int trnman_init()
{
unknown's avatar
unknown committed
80 81 82 83 84 85 86 87 88 89
  /*
    Initialize lists.
    active_list_max.min_read_from must be larger than any trid,
    so that when an active list is empty we would could free
    all committed list.
    And  committed_list_max itself can not be freed so
    committed_list_max.commit_trid must not be smaller that
    active_list_max.min_read_from
  */

unknown's avatar
unknown committed
90 91 92 93 94 95 96 97 98 99 100
  active_list_max.trid= active_list_min.trid= 0;
  active_list_max.min_read_from= ~0;
  active_list_max.next= active_list_min.prev= 0;
  active_list_max.prev= &active_list_min;
  active_list_min.next= &active_list_max;

  committed_list_max.commit_trid= ~0;
  committed_list_max.next= committed_list_min.prev= 0;
  committed_list_max.prev= &committed_list_min;
  committed_list_min.next= &committed_list_max;

unknown's avatar
unknown committed
101 102 103
  trnman_active_transactions= 0;
  trnman_allocated_transactions= 0;

unknown's avatar
unknown committed
104
  pool= 0;
unknown's avatar
unknown committed
105 106
  global_trid_generator= 0; /* set later by the recovery code */
  lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE,
unknown's avatar
unknown committed
107
               0, 0, trn_get_hash_key, 0);
108
  pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
unknown's avatar
unknown committed
109 110
  my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
  my_atomic_rwlock_init(&LOCK_pool);
unknown's avatar
unknown committed
111
  short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*),
unknown's avatar
unknown committed
112
                                     MYF(MY_WME|MY_ZEROFILL));
unknown's avatar
unknown committed
113
  if (unlikely(!short_trid_to_active_trn))
unknown's avatar
unknown committed
114
    return 1;
unknown's avatar
unknown committed
115
  short_trid_to_active_trn--; /* min short_trid is 1 */
unknown's avatar
unknown committed
116

117
  lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000);
unknown's avatar
unknown committed
118 119 120 121

  return 0;
}

unknown's avatar
unknown committed
122 123 124 125 126
/*
  NOTE
    this could only be called in the "idle" state - no transaction can be
    running. See asserts below.
*/
127
void trnman_destroy()
unknown's avatar
unknown committed
128
{
unknown's avatar
unknown committed
129
  DBUG_ASSERT(trid_to_committed_trn.count == 0);
unknown's avatar
unknown committed
130 131 132 133 134 135 136 137 138 139 140 141 142
  DBUG_ASSERT(trnman_active_transactions == 0);
  DBUG_ASSERT(active_list_max.prev == &active_list_min);
  DBUG_ASSERT(active_list_min.next == &active_list_max);
  DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
  DBUG_ASSERT(committed_list_min.next == &committed_list_max);
  while (pool)
  {
    TRN *trn= pool;
    pool= pool->next;
    DBUG_ASSERT(trn->locks.mutex == 0);
    DBUG_ASSERT(trn->locks.cond == 0);
    my_free((void *)trn, MYF(0));
  }
unknown's avatar
unknown committed
143
  lf_hash_destroy(&trid_to_committed_trn);
unknown's avatar
unknown committed
144 145 146
  pthread_mutex_destroy(&LOCK_trn_list);
  my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
  my_atomic_rwlock_destroy(&LOCK_pool);
unknown's avatar
unknown committed
147
  my_free((void *)(short_trid_to_active_trn+1), MYF(0));
unknown's avatar
unknown committed
148 149 150
  lockman_destroy(&maria_lockman);
}

unknown's avatar
unknown committed
151 152 153 154 155 156
/*
  NOTE
    TrID is limited to 6 bytes. Initial value of the generator
    is set by the recovery code - being read from the last checkpoint
    (or 1 on a first run).
*/
unknown's avatar
unknown committed
157 158 159 160 161 162 163 164 165
static TrID new_trid()
{
  DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL);
  safe_mutex_assert_owner(&LOCK_trn_list);
  return ++global_trid_generator;
}

static void set_short_trid(TRN *trn)
{
166
  int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1;
unknown's avatar
unknown committed
167 168 169 170
  my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
  for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */
  {
    void *tmp= NULL;
unknown's avatar
unknown committed
171 172
    if (short_trid_to_active_trn[i] == NULL &&
        my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
unknown's avatar
unknown committed
173 174 175
      break;
  }
  my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
176
  trn->short_id= i;
unknown's avatar
unknown committed
177 178
}

unknown's avatar
unknown committed
179 180 181 182 183
/*
  DESCRIPTION
    start a new transaction, allocate and initialize transaction object
    mutex and cond will be used for lock waits
*/
unknown's avatar
unknown committed
184 185 186 187 188
TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond)
{
  TRN *trn;

  /*
unknown's avatar
unknown committed
189 190
    we have a mutex, to do simple things under it - allocate a TRN,
    increment trnman_active_transactions, set trn->min_read_from.
unknown's avatar
unknown committed
191 192

    Note that all the above is fast. generating short_trid may be slow,
unknown's avatar
unknown committed
193 194
    as it involves scanning a large array - so it's done outside of the
    mutex.
unknown's avatar
unknown committed
195 196 197 198
  */

  pthread_mutex_lock(&LOCK_trn_list);

unknown's avatar
unknown committed
199
  /* Allocating a new TRN structure */
unknown's avatar
unknown committed
200
  trn= pool;
unknown's avatar
unknown committed
201 202 203 204
  /*
    Popping an unused TRN from the pool
    (ABA isn't possible, we're behind a mutex
  */
unknown's avatar
unknown committed
205 206 207 208 209 210
  my_atomic_rwlock_wrlock(&LOCK_pool);
  while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn,
                                  (void *)trn->next))
    /* no-op */;
  my_atomic_rwlock_wrunlock(&LOCK_pool);

unknown's avatar
unknown committed
211
  /* Nothing in the pool ? Allocate a new one */
unknown's avatar
unknown committed
212 213 214
  if (!trn)
  {
    trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME));
unknown's avatar
unknown committed
215
    if (unlikely(!trn))
unknown's avatar
unknown committed
216 217 218 219 220 221
    {
      pthread_mutex_unlock(&LOCK_trn_list);
      return 0;
    }
    trnman_allocated_transactions++;
  }
unknown's avatar
unknown committed
222
  trnman_active_transactions++;
unknown's avatar
unknown committed
223 224 225 226

  trn->min_read_from= active_list_min.next->trid;

  trn->trid= new_trid();
227
  trn->short_id= 0;
unknown's avatar
unknown committed
228 229 230 231 232 233

  trn->next= &active_list_max;
  trn->prev= active_list_max.prev;
  active_list_max.prev= trn->prev->next= trn;
  pthread_mutex_unlock(&LOCK_trn_list);

unknown's avatar
unknown committed
234
  trn->pins= lf_hash_get_pins(&trid_to_committed_trn);
unknown's avatar
unknown committed
235

unknown's avatar
unknown committed
236
  if (unlikely(!trn->min_read_from))
unknown's avatar
unknown committed
237 238
    trn->min_read_from= trn->trid;

unknown's avatar
unknown committed
239 240
  trn->commit_trid= 0;

unknown's avatar
unknown committed
241 242 243 244 245 246
  trn->locks.mutex= mutex;
  trn->locks.cond= cond;
  trn->locks.waiting_for= 0;
  trn->locks.all_locks= 0;
  trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc);

unknown's avatar
unknown committed
247 248 249 250 251
  /*
    only after the following function TRN is considered initialized,
    so it must be done the last
  */
  set_short_trid(trn);
unknown's avatar
unknown committed
252 253 254 255 256

  return trn;
}

/*
unknown's avatar
unknown committed
257 258
  remove a trn from the active list.
  if necessary - move to committed list and set commit_trid
259 260 261 262 263 264 265 266 267

  NOTE
    Locks are released at the end. In particular, after placing the
    transaction in commit list, and after setting commit_trid. It's
    important, as commit_trid affects visibility.  Locks don't affect
    anything they simply delay execution of other threads - they could be
    released arbitrarily late. In other words, when locks are released it
    serves as a start banner for other threads, they start to run. So
    everything they may need must be ready at that point.
unknown's avatar
unknown committed
268 269 270 271 272 273 274
*/
void trnman_end_trn(TRN *trn, my_bool commit)
{
  TRN *free_me= 0;
  LF_PINS *pins= trn->pins;

  pthread_mutex_lock(&LOCK_trn_list);
unknown's avatar
unknown committed
275 276

  /* remove from active list */
unknown's avatar
unknown committed
277 278 279
  trn->next->prev= trn->prev;
  trn->prev->next= trn->next;

unknown's avatar
unknown committed
280 281 282 283 284
  /*
    if trn was the oldest active transaction, now that it goes away there
    may be committed transactions in the list which no active transaction
    needs to bother about - clean up the committed list
  */
unknown's avatar
unknown committed
285 286 287 288 289 290 291
  if (trn->prev == &active_list_min)
  {
    TRN *t;
    for (t= committed_list_min.next;
         t->commit_trid < active_list_min.next->min_read_from;
         t= t->next) /* no-op */;

unknown's avatar
unknown committed
292
    /* found transactions committed before the oldest active one */
unknown's avatar
unknown committed
293 294 295 296 297 298 299 300 301
    if (t != committed_list_min.next)
    {
      free_me= committed_list_min.next;
      committed_list_min.next= t;
      t->prev->next= 0;
      t->prev= &committed_list_min;
    }
  }

unknown's avatar
unknown committed
302 303 304 305
  /*
    if transaction is committed and it was not the only active transaction -
    add it to the committed list (which is used for read-from relation)
  */
unknown's avatar
unknown committed
306 307
  if (commit && active_list_min.next != &active_list_max)
  {
308
    int res;
unknown's avatar
unknown committed
309

310
    trn->commit_trid= global_trid_generator;
unknown's avatar
unknown committed
311 312 313 314
    trn->next= &committed_list_max;
    trn->prev= committed_list_max.prev;
    committed_list_max.prev= trn->prev->next= trn;

unknown's avatar
unknown committed
315
    res= lf_hash_insert(&trid_to_committed_trn, pins, &trn);
unknown's avatar
unknown committed
316 317
    DBUG_ASSERT(res == 0);
  }
unknown's avatar
unknown committed
318
  else /* otherwise free it right away */
unknown's avatar
unknown committed
319 320 321 322 323 324 325
  {
    trn->next= free_me;
    free_me= trn;
  }
  trnman_active_transactions--;
  pthread_mutex_unlock(&LOCK_trn_list);

unknown's avatar
unknown committed
326
  /* the rest is done outside of a critical section */
unknown's avatar
unknown committed
327 328 329 330
  lockman_release_locks(&maria_lockman, &trn->locks);
  trn->locks.mutex= 0;
  trn->locks.cond= 0;
  my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
331
  my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0);
unknown's avatar
unknown committed
332 333
  my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);

unknown's avatar
unknown committed
334 335 336 337 338 339
  /*
    we, under the mutex, removed going-in-free_me transactions from the
    active and committed lists, thus nobody else may see them when it scans
    those lists, and thus nobody may want to free them. Now we don't
    need a mutex to access free_me list
  */
unknown's avatar
unknown committed
340 341 342 343 344
  while (free_me) // XXX send them to the purge thread
  {
    TRN *t= free_me;
    free_me= free_me->next;

345
    lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID));
unknown's avatar
unknown committed
346 347 348 349 350 351 352 353 354 355

    trnman_free_trn(t);
  }

  lf_hash_put_pins(pins);
  lf_pinbox_put_pins(trn->locks.pins);
}

/*
  free a trn (add to the pool, that is)
356 357 358 359 360 361 362
  note - we can never really free() a TRN if there's at least one other
  running transaction - see, e.g., how lock waits are implemented in
  lockman.c
  The same is true for other lock-free data structures too. We may need some
  kind of FLUSH command to reset them all - ensuring that no transactions are
  running. It may even be called automatically on checkpoints if no
  transactions are running.
unknown's avatar
unknown committed
363 364 365 366 367 368 369 370 371
*/
void trnman_free_trn(TRN *trn)
{
  TRN *tmp= pool;

  my_atomic_rwlock_wrlock(&LOCK_pool);
  do
  {
    /*
unknown's avatar
unknown committed
372
      without this volatile cast gcc-3.4.4 moved the assignment
unknown's avatar
unknown committed
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
      down after the loop at -O2
    */
    *(TRN * volatile *)&(trn->next)= tmp;
  } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn));
  my_atomic_rwlock_wrunlock(&LOCK_pool);
}

/*
  NOTE
    here we access the hash in a lock-free manner.
    It's safe, a 'found' TRN can never be freed/reused before we access it.
    In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
    can only be removed from the hash when:
                found->commit_trid < ALL (trn->min_read_from)
    that is, at least
                found->commit_trid < trn->min_read_from
    but
                found->trid >= trn->min_read_from
    and
                found->commit_trid > found->trid
*/
my_bool trnman_can_read_from(TRN *trn, TrID trid)
{
  TRN **found;
  my_bool can;
  LF_REQUIRE_PINS(3);

  if (trid < trn->min_read_from)
unknown's avatar
unknown committed
401
    return TRUE; /* can read */
unknown's avatar
unknown committed
402
  if (trid > trn->trid)
unknown's avatar
unknown committed
403
    return FALSE; /* cannot read */
unknown's avatar
unknown committed
404

unknown's avatar
unknown committed
405
  found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid));
unknown's avatar
unknown committed
406
  if (!found)
unknown's avatar
unknown committed
407
    return FALSE; /* not in the hash of committed transactions = cannot read */
unknown's avatar
unknown committed
408 409 410 411 412 413

  can= (*found)->commit_trid < trn->trid;
  lf_unpin(trn->pins, 2);
  return can;
}