ha_ndbcluster.cc 312 KB
Newer Older
1
/* Copyright (C) 2000-2003 MySQL AB
2 3 4 5 6 7 8 9 10 11 12 13 14

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
15
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 17 18 19 20 21 22
*/

/*
  This file defines the NDB Cluster handler: the interface between MySQL and
  NDB Cluster
*/

23
#ifdef USE_PRAGMA_IMPLEMENTATION
24
#pragma implementation				// gcc: Class implementation
25 26 27 28 29
#endif

#include "mysql_priv.h"

#include <my_dir.h>
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
30
#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
31 32 33
#include "ha_ndbcluster.h"
#include <ndbapi/NdbApi.hpp>
#include <ndbapi/NdbScanFilter.hpp>
34
#include <../util/Bitmask.hpp>
35
#include <ndbapi/NdbIndexStat.hpp>
36

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
37
#include "ha_ndbcluster_binlog.h"
38
#include "ha_ndbcluster_tables.h"
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
39

acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
40 41
#include <mysql/plugin.h>

42 43 44 45 46
#ifdef ndb_dynamite
#undef assert
#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0)
#endif

47 48 49
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
extern const char *opt_ndbcluster_connectstring;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
50
extern ulong opt_ndb_cache_check_time;
51

52 53 54 55 56 57 58 59 60 61
// ndb interface initialization/cleanup
#ifdef  __cplusplus
extern "C" {
#endif
extern void ndb_init_internal();
extern void ndb_end_internal();
#ifdef  __cplusplus
}
#endif

62 63 64 65 66 67
const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS};
TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1,
                                    "", ndb_distribution_names, NULL };
const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH];
enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH;

68
// Default value for parallelism
69
static const int parallelism= 0;
70

71 72
// Default value for max number of transactions
// createable against NDB from this handler
tulin@dl145b.mysql.com's avatar
tulin@dl145b.mysql.com committed
73
static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used
74

75 76
static uint ndbcluster_partition_flags();
static uint ndbcluster_alter_table_flags(uint flags);
77
static int ndbcluster_init(void *);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
78 79
static int ndbcluster_end(ha_panic_function flag);
static bool ndbcluster_show_status(THD*,stat_print_fn *,enum ha_stat_type);
80
static int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info);
81
static int ndbcluster_fill_files_table(THD *thd, TABLE_LIST *tables, COND *cond);
82

83
handlerton *ndbcluster_hton;
84

85 86
static handler *ndbcluster_create_handler(TABLE_SHARE *table,
                                          MEM_ROOT *mem_root)
87
{
88
  return new (mem_root) ha_ndbcluster(table);
89 90
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
static uint ndbcluster_partition_flags()
{
  return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
          HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
}

static uint ndbcluster_alter_table_flags(uint flags)
{
  if (flags & ALTER_DROP_PARTITION)
    return 0;
  else
    return (HA_ONLINE_ADD_INDEX | HA_ONLINE_DROP_INDEX |
            HA_ONLINE_ADD_UNIQUE_INDEX | HA_ONLINE_DROP_UNIQUE_INDEX |
            HA_PARTITION_FUNCTION_SUPPORTED);

}

108
#define NDB_AUTO_INCREMENT_RETRIES 10
109 110

#define ERR_PRINT(err) \
111
  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
112

113 114
#define ERR_RETURN(err)                  \
{                                        \
115
  const NdbError& tmp= err;              \
116
  ERR_PRINT(tmp);                        \
117
  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
118 119
}

120 121 122 123 124 125 126 127
#define ERR_BREAK(err, code)             \
{                                        \
  const NdbError& tmp= err;              \
  ERR_PRINT(tmp);                        \
  code= ndb_to_mysql_error(&tmp);        \
  break;                                 \
}

128
static int ndbcluster_inited= 0;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
129
int ndbcluster_util_inited= 0;
130

131
static Ndb* g_ndb= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
132
Ndb_cluster_connection* g_ndb_cluster_connection= NULL;
monty@mysql.com's avatar
monty@mysql.com committed
133
uchar g_node_id_map[max_ndb_nodes];
134

135 136 137 138
// Handler synchronization
pthread_mutex_t ndbcluster_mutex;

// Table lock handling
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
139
HASH ndbcluster_open_tables;
140 141 142

static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
                                my_bool not_used __attribute__((unused)));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
143 144 145
#ifdef HAVE_NDB_BINLOG
static int rename_share(NDB_SHARE *share, const char *new_key);
#endif
146
static void ndb_set_fragmentation(NDBTAB &tab, TABLE *table, uint pk_len);
147

148
static int ndb_get_table_statistics(Ndb*, const NDBTAB *, 
149
                                    struct Ndb_statistics *);
150

151

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
152
// Util thread variables
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
153
pthread_t ndb_util_thread;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
154 155
pthread_mutex_t LOCK_ndb_util_thread;
pthread_cond_t COND_ndb_util_thread;
156
pthread_handler_t ndb_util_thread_func(void *arg);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
157
ulong ndb_cache_check_time;
158

159 160 161 162
/*
  Dummy buffer to read zero pack_length fields
  which are mapped to 1 char
*/
163
static uint32 dummy_buf;
164

165 166 167 168 169 170 171 172 173 174 175
/*
  Stats that can be retrieved from ndb
*/

struct Ndb_statistics {
  Uint64 row_count;
  Uint64 commit_count;
  Uint64 row_size;
  Uint64 fragment_memory;
};

176 177 178 179 180 181
/* Status variables shown with 'show status like 'Ndb%' */

static long ndb_cluster_node_id= 0;
static const char * ndb_connected_host= 0;
static long ndb_connected_port= 0;
static long ndb_number_of_replicas= 0;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
182
long ndb_number_of_storage_nodes= 0;
183 184
long ndb_number_of_ready_storage_nodes= 0;
long ndb_connect_count= 0;
185 186 187 188 189 190 191 192

static int update_status_variables(Ndb_cluster_connection *c)
{
  ndb_cluster_node_id=         c->node_id();
  ndb_connected_port=          c->get_connected_port();
  ndb_connected_host=          c->get_connected_host();
  ndb_number_of_replicas=      0;
  ndb_number_of_storage_nodes= c->no_db_nodes();
193 194
  ndb_number_of_ready_storage_nodes= c->get_no_ready();
  ndb_connect_count= c->get_connect_count();
195 196 197
  return 0;
}

serg@serg.mylan's avatar
serg@serg.mylan committed
198
SHOW_VAR ndb_status_variables[]= {
199
  {"cluster_node_id",        (char*) &ndb_cluster_node_id,         SHOW_LONG},
200 201
  {"config_from_host",         (char*) &ndb_connected_host,      SHOW_CHAR_PTR},
  {"config_from_port",         (char*) &ndb_connected_port,          SHOW_LONG},
202 203 204 205 206
//  {"number_of_replicas",     (char*) &ndb_number_of_replicas,      SHOW_LONG},
  {"number_of_storage_nodes",(char*) &ndb_number_of_storage_nodes, SHOW_LONG},
  {NullS, NullS, SHOW_LONG}
};

207 208 209 210
/*
  Error handling functions
*/

211
/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
212

213
static int ndb_to_mysql_error(const NdbError *ndberr)
214
{
215 216
  /* read the mysql mapped error code */
  int error= ndberr->mysql_code;
217

218 219 220 221 222 223 224 225 226 227 228 229 230
  switch (error)
  {
    /* errors for which we do not add warnings, just return mapped error code
    */
  case HA_ERR_NO_SUCH_TABLE:
  case HA_ERR_KEY_NOT_FOUND:
  case HA_ERR_FOUND_DUPP_KEY:
    return error;

    /* Mapping missing, go with the ndb error code*/
  case -1:
    error= ndberr->code;
    break;
231

232 233 234 235
    /* Mapping exists, go with the mapped code */
  default:
    break;
  }
236

237 238 239 240 241 242
  /*
    Push the NDB error message as warning
    - Used to be able to use SHOW WARNINGS toget more info on what the error is
    - Used by replication to see if the error was temporary
  */
  if (ndberr->status == NdbError::TemporaryError)
243
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
244 245 246 247 248 249 250
			ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
			ndberr->code, ndberr->message, "NDB");
  else
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
			ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
			ndberr->code, ndberr->message, "NDB");
  return error;
251 252
}

253 254 255 256 257 258 259 260 261 262 263 264
int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans)
{
  int res= trans->execute(NdbTransaction::NoCommit,
                          NdbTransaction::AO_IgnoreError,
                          h->m_force_send);
  if (res == 0)
    return 0;

  const NdbError &err= trans->getNdbError();
  if (err.classification != NdbError::ConstraintViolation &&
      err.classification != NdbError::NoDataFound)
    return res;
265

266 267
  return 0;
}
268 269

inline
270
int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans,
271
		      bool force_release)
272
{
273
#ifdef NOT_USED
274
  int m_batch_execute= 0;
275
  if (m_batch_execute)
276
    return 0;
277
#endif
278
  h->release_completed_operations(trans, force_release);
279 280 281 282 283
  return h->m_ignore_no_key ?
    execute_no_commit_ignore_no_key(h,trans) :
    trans->execute(NdbTransaction::NoCommit,
		   NdbTransaction::AbortOnError,
		   h->m_force_send);
284 285 286
}

inline
287
int execute_commit(ha_ndbcluster *h, NdbTransaction *trans)
288
{
289
#ifdef NOT_USED
290
  int m_batch_execute= 0;
291
  if (m_batch_execute)
292
    return 0;
293
#endif
294
  return trans->execute(NdbTransaction::Commit,
295 296
                        NdbTransaction::AbortOnError,
                        h->m_force_send);
297 298 299
}

inline
300
int execute_commit(THD *thd, NdbTransaction *trans)
301 302
{
#ifdef NOT_USED
303
  int m_batch_execute= 0;
304 305 306
  if (m_batch_execute)
    return 0;
#endif
307
  return trans->execute(NdbTransaction::Commit,
308 309
                        NdbTransaction::AbortOnError,
                        thd->variables.ndb_force_send);
310 311 312
}

inline
313
int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans,
314
			 bool force_release)
315
{
316
#ifdef NOT_USED
317
  int m_batch_execute= 0;
318
  if (m_batch_execute)
319
    return 0;
320
#endif
321
  h->release_completed_operations(trans, force_release);
322
  return trans->execute(NdbTransaction::NoCommit,
323 324
                        NdbTransaction::AO_IgnoreError,
                        h->m_force_send);
325 326
}

327 328 329
/*
  Place holder for ha_ndbcluster thread specific data
*/
330 331 332 333 334
static
byte *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, uint *length,
                            my_bool not_used __attribute__((unused)))
{
  *length= sizeof(thd_ndb_share->key);
335
  return (byte*) &thd_ndb_share->key;
336 337
}

338 339
Thd_ndb::Thd_ndb()
{
340
  ndb= new Ndb(g_ndb_cluster_connection, "");
341 342
  lock_count= 0;
  count= 0;
343 344
  all= NULL;
  stmt= NULL;
345
  error= 0;
346
  query_state&= NDB_QUERY_NORMAL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
347
  options= 0;
348
  (void) hash_init(&open_tables, &my_charset_bin, 5, 0, 0,
349
                   (hash_get_key)thd_ndb_share_get_key, 0, 0);
350 351 352 353
}

Thd_ndb::~Thd_ndb()
{
354
  if (ndb)
355 356
  {
#ifndef DBUG_OFF
357 358
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
359 360 361 362 363 364 365 366 367 368
    while (ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
369
    delete ndb;
370
    ndb= NULL;
371
  }
372
  changed_tables.empty();
373 374 375 376 377 378
  hash_free(&open_tables);
}

void
Thd_ndb::init_open_tables()
{
379 380
  count= 0;
  error= 0;
381 382 383 384 385 386 387
  my_hash_reset(&open_tables);
}

THD_NDB_SHARE *
Thd_ndb::get_open_table(THD *thd, const void *key)
{
  DBUG_ENTER("Thd_ndb::get_open_table");
388
  HASH_SEARCH_STATE state;
389
  THD_NDB_SHARE *thd_ndb_share=
390
    (THD_NDB_SHARE*)hash_first(&open_tables, (byte *)&key, sizeof(key), &state);
391
  while (thd_ndb_share && thd_ndb_share->key != key)
392
    thd_ndb_share= (THD_NDB_SHARE*)hash_next(&open_tables, (byte *)&key, sizeof(key), &state);
393 394 395 396 397
  if (thd_ndb_share == 0)
  {
    thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root,
                                                sizeof(THD_NDB_SHARE));
    thd_ndb_share->key= key;
398 399
    thd_ndb_share->stat.last_count= count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
400
    thd_ndb_share->stat.records= ~(ha_rows)0;
401 402
    my_hash_insert(&open_tables, (byte *)thd_ndb_share);
  }
403 404 405 406
  else if (thd_ndb_share->stat.last_count != count)
  {
    thd_ndb_share->stat.last_count= count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
407
    thd_ndb_share->stat.records= ~(ha_rows)0;
408
  }
409
  DBUG_PRINT("exit", ("thd_ndb_share: 0x%x  key: 0x%x", thd_ndb_share, key));
410
  DBUG_RETURN(thd_ndb_share);
411 412
}

413 414 415
inline
Ndb *ha_ndbcluster::get_ndb()
{
416
  return get_thd_ndb(current_thd)->ndb;
417 418 419 420 421 422
}

/*
 * manage uncommitted insert/deletes during transactio to get records correct
 */

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
423 424 425
void ha_ndbcluster::set_rec_per_key()
{
  DBUG_ENTER("ha_ndbcluster::get_status_const");
426
  for (uint i=0 ; i < table_share->keys ; i++)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
427 428 429 430 431 432
  {
    table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1;
  }
  DBUG_VOID_RETURN;
}

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
ha_rows ha_ndbcluster::records()
{
  ha_rows retval;
  DBUG_ENTER("ha_ndbcluster::records");
  struct Ndb_local_table_statistics *info= m_table_info;
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));

  Ndb *ndb= get_ndb();
  ndb->setDatabaseName(m_dbname);
  struct Ndb_statistics stat;
  if (ndb_get_table_statistics(ndb, m_table, &stat) == 0)
  {
    retval= stat.row_count;
  }
449 450 451 452 453 454 455
  else
  {
    /**
     * Be consistent with BUG#19914 until we fix it properly
     */
    DBUG_RETURN(-1);
  }
456 457 458 459 460 461 462 463

  THD *thd= current_thd;
  if (get_thd_ndb(thd)->error)
    info->no_uncommitted_rows_count= 0;

  DBUG_RETURN(retval + info->no_uncommitted_rows_count);
}

464 465
void ha_ndbcluster::records_update()
{
466 467
  if (m_ha_not_exact_count)
    return;
468
  DBUG_ENTER("ha_ndbcluster::records_update");
469
  struct Ndb_local_table_statistics *info= m_table_info;
470
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
471 472
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
473
  //  if (info->records == ~(ha_rows)0)
474
  {
475
    Ndb *ndb= get_ndb();
476
    struct Ndb_statistics stat;
477
    ndb->setDatabaseName(m_dbname);
478 479 480 481
    if (ndb_get_table_statistics(ndb, m_table, &stat) == 0)
    {
      stats.mean_rec_length= stat.row_size;
      stats.data_file_length= stat.fragment_memory;
482
      info->records= stat.row_count;
483 484
    }
  }
485 486
  {
    THD *thd= current_thd;
487
    if (get_thd_ndb(thd)->error)
488 489
      info->no_uncommitted_rows_count= 0;
  }
490
  stats.records= info->records+ info->no_uncommitted_rows_count;
491 492 493
  DBUG_VOID_RETURN;
}

494 495
void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
496 497
  if (m_ha_not_exact_count)
    return;
498
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
499
  get_thd_ndb(current_thd)->error= 1;
500 501 502
  DBUG_VOID_RETURN;
}

503 504
void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
505 506
  if (m_ha_not_exact_count)
    return;
507
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
508
  struct Ndb_local_table_statistics *info= m_table_info;
509 510
  info->no_uncommitted_rows_count+= c;
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
511 512
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
513 514 515 516 517
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
{
518 519
  if (m_ha_not_exact_count)
    return;
520
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
521 522 523
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  thd_ndb->count++;
  thd_ndb->error= 0;
524 525 526
  DBUG_VOID_RETURN;
}

527
int ha_ndbcluster::ndb_err(NdbTransaction *trans)
528
{
529
  int res;
530
  NdbError err= trans->getNdbError();
531 532 533 534 535
  DBUG_ENTER("ndb_err");
  
  ERR_PRINT(err);
  switch (err.classification) {
  case NdbError::SchemaError:
536
  {
537 538
    // TODO perhaps we need to do more here, invalidate also in the cache
    m_table->setStatusInvalid();
539 540 541 542 543 544
    /* Close other open handlers not used by any thread */
    TABLE_LIST table_list;
    bzero((char*) &table_list,sizeof(table_list));
    table_list.db= m_dbname;
    table_list.alias= table_list.table_name= m_tabname;
    close_cached_tables(current_thd, 0, &table_list);
545
    break;
546
  }
547 548 549
  default:
    break;
  }
550 551
  res= ndb_to_mysql_error(&err);
  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", 
552
                      err.code, res));
553
  if (res == HA_ERR_FOUND_DUPP_KEY)
554 555
  {
    if (m_rows_to_insert == 1)
556
      m_dupkey= table_share->primary_key;
557
    else
monty@mishka.local's avatar
monty@mishka.local committed
558 559
    {
      /* We are batching inserts, offending key is not available */
560
      m_dupkey= (uint) -1;
monty@mishka.local's avatar
monty@mishka.local committed
561
    }
562
  }
563
  DBUG_RETURN(res);
564 565 566
}


567
/*
568
  Override the default get_error_message in order to add the 
569 570 571
  error message of NDB 
 */

572
bool ha_ndbcluster::get_error_message(int error, 
573
                                      String *buf)
574
{
575
  DBUG_ENTER("ha_ndbcluster::get_error_message");
576
  DBUG_PRINT("enter", ("error: %d", error));
577

578
  Ndb *ndb= get_ndb();
579
  if (!ndb)
580
    DBUG_RETURN(FALSE);
581

582
  const NdbError err= ndb->getNdbError(error);
583 584 585 586
  bool temporary= err.status==NdbError::TemporaryError;
  buf->set(err.message, strlen(err.message), &my_charset_bin);
  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
  DBUG_RETURN(temporary);
587 588 589
}


tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
590
#ifndef DBUG_OFF
pekka@mysql.com's avatar
pekka@mysql.com committed
591 592 593 594
/*
  Check if type is supported by NDB.
*/

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
595
static bool ndb_supported_type(enum_field_types type)
pekka@mysql.com's avatar
pekka@mysql.com committed
596 597
{
  switch (type) {
pekka@mysql.com's avatar
pekka@mysql.com committed
598 599 600 601 602 603 604
  case MYSQL_TYPE_TINY:        
  case MYSQL_TYPE_SHORT:
  case MYSQL_TYPE_LONG:
  case MYSQL_TYPE_INT24:       
  case MYSQL_TYPE_LONGLONG:
  case MYSQL_TYPE_FLOAT:
  case MYSQL_TYPE_DOUBLE:
605 606
  case MYSQL_TYPE_DECIMAL:    
  case MYSQL_TYPE_NEWDECIMAL:
pekka@mysql.com's avatar
pekka@mysql.com committed
607 608 609 610 611 612 613 614
  case MYSQL_TYPE_TIMESTAMP:
  case MYSQL_TYPE_DATETIME:    
  case MYSQL_TYPE_DATE:
  case MYSQL_TYPE_NEWDATE:
  case MYSQL_TYPE_TIME:        
  case MYSQL_TYPE_YEAR:        
  case MYSQL_TYPE_STRING:      
  case MYSQL_TYPE_VAR_STRING:
pekka@mysql.com's avatar
pekka@mysql.com committed
615
  case MYSQL_TYPE_VARCHAR:
pekka@mysql.com's avatar
pekka@mysql.com committed
616 617 618 619 620 621
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:    
  case MYSQL_TYPE_MEDIUM_BLOB:   
  case MYSQL_TYPE_LONG_BLOB:  
  case MYSQL_TYPE_ENUM:
  case MYSQL_TYPE_SET:         
622
  case MYSQL_TYPE_BIT:
623
  case MYSQL_TYPE_GEOMETRY:
624
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
625
  case MYSQL_TYPE_NULL:   
pekka@mysql.com's avatar
pekka@mysql.com committed
626
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
627
  }
628
  return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
629
}
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
630
#endif /* !DBUG_OFF */
pekka@mysql.com's avatar
pekka@mysql.com committed
631 632


633 634 635 636 637
/*
  Instruct NDB to set the value of the hidden primary key
*/

bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op,
638
                                   uint fieldnr, const byte *field_ptr)
639 640
{
  DBUG_ENTER("set_hidden_key");
641
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0);
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
}


/*
  Instruct NDB to set the value of one primary key attribute
*/

int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field,
                               uint fieldnr, const byte *field_ptr)
{
  uint32 pack_len= field->pack_length();
  DBUG_ENTER("set_ndb_key");
  DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", 
                       fieldnr, field->field_name, field->type(),
                       pack_len));
  DBUG_DUMP("key", (char*)field_ptr, pack_len);
  
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
659 660 661 662
  DBUG_ASSERT(ndb_supported_type(field->type()));
  DBUG_ASSERT(! (field->flags & BLOB_FLAG));
  // Common implementation for most field types
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*) field_ptr, pack_len) != 0);
663 664 665 666 667 668 669 670
}


/*
 Instruct NDB to set the value of one attribute
*/

int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, 
671 672
                                 uint fieldnr, int row_offset,
                                 bool *set_blob_value)
673
{
674 675
  const byte* field_ptr= field->ptr + row_offset;
  uint32 pack_len= field->pack_length();
676
  DBUG_ENTER("set_ndb_value");
677
  DBUG_PRINT("enter", ("%d: %s  type: %u  len=%d  is_null=%s", 
678
                       fieldnr, field->field_name, field->type(), 
679
                       pack_len, field->is_null(row_offset) ? "Y" : "N"));
680
  DBUG_DUMP("value", (char*) field_ptr, pack_len);
pekka@mysql.com's avatar
pekka@mysql.com committed
681

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
682
  DBUG_ASSERT(ndb_supported_type(field->type()));
683
  {
684
    // ndb currently does not support size 0
685
    uint32 empty_field;
686 687
    if (pack_len == 0)
    {
688 689
      pack_len= sizeof(empty_field);
      field_ptr= (byte *)&empty_field;
690
      if (field->is_null(row_offset))
691
        empty_field= 0;
692
      else
693
        empty_field= 1;
694
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
695 696
    if (! (field->flags & BLOB_FLAG))
    {
697 698
      if (field->type() != MYSQL_TYPE_BIT)
      {
699 700 701
        if (field->is_null(row_offset))
        {
          DBUG_PRINT("info", ("field is NULL"));
702
          // Set value to NULL
703
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0));
704
	}
705
        // Common implementation for most field types
706
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0);
707 708 709
      }
      else // if (field->type() == MYSQL_TYPE_BIT)
      {
710
        longlong bits= field->val_int();
711
 
712 713
        // Round up bit field length to nearest word boundry
        pack_len= ((pack_len + 3) >> 2) << 2;
714
        DBUG_ASSERT(pack_len <= 8);
715
        if (field->is_null(row_offset))
716
          // Set value to NULL
717
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0));
718
        DBUG_PRINT("info", ("bit field"));
719
        DBUG_DUMP("value", (char*)&bits, pack_len);
720
#ifdef WORDS_BIGENDIAN
721 722
        if (pack_len < 5)
        {
723
          DBUG_RETURN(ndb_op->setValue(fieldnr, ((char*)&bits)+4) != 0);
724
        }
725
#endif
726
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0);
727
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
728 729
    }
    // Blob type
730
    NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
pekka@mysql.com's avatar
pekka@mysql.com committed
731 732
    if (ndb_blob != NULL)
    {
733
      if (field->is_null(row_offset))
pekka@mysql.com's avatar
pekka@mysql.com committed
734 735 736 737 738 739 740 741 742
        DBUG_RETURN(ndb_blob->setNull() != 0);

      Field_blob *field_blob= (Field_blob*)field;

      // Get length and pointer to data
      uint32 blob_len= field_blob->get_length(field_ptr);
      char* blob_ptr= NULL;
      field_blob->get_ptr(&blob_ptr);

743 744 745
      // Looks like NULL ptr signals length 0 blob
      if (blob_ptr == NULL) {
        DBUG_ASSERT(blob_len == 0);
746
        blob_ptr= (char*)"";
747
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
748

elliot@mysql.com's avatar
elliot@mysql.com committed
749 750
      DBUG_PRINT("value", ("set blob ptr=%p len=%u",
                           blob_ptr, blob_len));
pekka@mysql.com's avatar
pekka@mysql.com committed
751 752
      DBUG_DUMP("value", (char*)blob_ptr, min(blob_len, 26));

753
      if (set_blob_value)
754
        *set_blob_value= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
755 756 757 758
      // No callback needed to write value
      DBUG_RETURN(ndb_blob->setValue(blob_ptr, blob_len) != 0);
    }
    DBUG_RETURN(1);
759
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
}


/*
  Callback to read all blob values.
  - not done in unpack_record because unpack_record is valid
    after execute(Commit) but reading blobs is not
  - may only generate read operations; they have to be executed
    somewhere before the data is available
  - due to single buffer for all blobs, we let the last blob
    process all blobs (last so that all are active)
  - null bit is still set in unpack_record
  - TODO allocate blob part aligned buffers
*/

775
NdbBlob::ActiveHook g_get_ndb_blobs_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
776

777
int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
pekka@mysql.com's avatar
pekka@mysql.com committed
778
{
779
  DBUG_ENTER("g_get_ndb_blobs_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
780 781 782
  if (ndb_blob->blobsNextBlob() != NULL)
    DBUG_RETURN(0);
  ha_ndbcluster *ha= (ha_ndbcluster *)arg;
783 784
  int ret= get_ndb_blobs_value(ha->table, ha->m_value,
                               ha->m_blobs_buffer, ha->m_blobs_buffer_size,
785
                               ha->m_blobs_offset);
786
  DBUG_RETURN(ret);
pekka@mysql.com's avatar
pekka@mysql.com committed
787 788
}

789 790 791 792 793 794 795 796
/*
  This routine is shared by injector.  There is no common blobs buffer
  so the buffer and length are passed by reference.  Injector also
  passes a record pointer diff.
 */
int get_ndb_blobs_value(TABLE* table, NdbValue* value_array,
                        byte*& buffer, uint& buffer_size,
                        my_ptrdiff_t ptrdiff)
pekka@mysql.com's avatar
pekka@mysql.com committed
797 798 799 800 801 802 803 804
{
  DBUG_ENTER("get_ndb_blobs_value");

  // Field has no field number so cannot use TABLE blob_field
  // Loop twice, first only counting total buffer size
  for (int loop= 0; loop <= 1; loop++)
  {
    uint32 offset= 0;
805
    for (uint i= 0; i < table->s->fields; i++)
pekka@mysql.com's avatar
pekka@mysql.com committed
806 807
    {
      Field *field= table->field[i];
808
      NdbValue value= value_array[i];
809 810 811
      if (! (field->flags & BLOB_FLAG))
        continue;
      if (value.blob == NULL)
pekka@mysql.com's avatar
pekka@mysql.com committed
812
      {
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
        DBUG_PRINT("info",("[%u] skipped", i));
        continue;
      }
      Field_blob *field_blob= (Field_blob *)field;
      NdbBlob *ndb_blob= value.blob;
      int isNull;
      if (ndb_blob->getNull(isNull) != 0)
        ERR_RETURN(ndb_blob->getNdbError());
      if (isNull == 0) {
        Uint64 len64= 0;
        if (ndb_blob->getLength(len64) != 0)
          ERR_RETURN(ndb_blob->getNdbError());
        // Align to Uint64
        uint32 size= len64;
        if (size % 8 != 0)
          size+= 8 - size % 8;
        if (loop == 1)
830
        {
831 832 833 834 835 836 837 838 839 840 841
          char *buf= buffer + offset;
          uint32 len= 0xffffffff;  // Max uint32
          if (ndb_blob->readData(buf, len) != 0)
            ERR_RETURN(ndb_blob->getNdbError());
          DBUG_PRINT("info", ("[%u] offset=%u buf=%p len=%u [ptrdiff=%d]",
                              i, offset, buf, len, (int)ptrdiff));
          DBUG_ASSERT(len == len64);
          // Ugly hack assumes only ptr needs to be changed
          field_blob->ptr+= ptrdiff;
          field_blob->set_ptr(len, buf);
          field_blob->ptr-= ptrdiff;
842
        }
843 844 845 846 847 848 849 850 851 852 853
        offset+= size;
      }
      else if (loop == 1) // undefined or null
      {
        // have to set length even in this case
        char *buf= buffer + offset; // or maybe NULL
        uint32 len= 0;
        field_blob->ptr+= ptrdiff;
        field_blob->set_ptr(len, buf);
        field_blob->ptr-= ptrdiff;
        DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
pekka@mysql.com's avatar
pekka@mysql.com committed
854 855
      }
    }
856
    if (loop == 0 && offset > buffer_size)
pekka@mysql.com's avatar
pekka@mysql.com committed
857
    {
858 859 860 861 862
      my_free(buffer, MYF(MY_ALLOW_ZERO_PTR));
      buffer_size= 0;
      DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
      buffer= my_malloc(offset, MYF(MY_WME));
      if (buffer == NULL)
pekka@mysql.com's avatar
pekka@mysql.com committed
863
        DBUG_RETURN(-1);
864
      buffer_size= offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
865
    }
866
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
867
  DBUG_RETURN(0);
868 869 870 871 872
}


/*
  Instruct NDB to fetch one field
pekka@mysql.com's avatar
pekka@mysql.com committed
873 874
  - data is read directly into buffer provided by field
    if field is NULL, data is read into memory provided by NDBAPI
875 876
*/

pekka@mysql.com's avatar
pekka@mysql.com committed
877
int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field,
878
                                 uint fieldnr, byte* buf)
879 880
{
  DBUG_ENTER("get_ndb_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
881 882 883 884 885
  DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr,
                       (int)(field != NULL ? field->flags : 0)));

  if (field != NULL)
  {
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
886 887
      DBUG_ASSERT(buf);
      DBUG_ASSERT(ndb_supported_type(field->type()));
pekka@mysql.com's avatar
pekka@mysql.com committed
888 889
      DBUG_ASSERT(field->ptr != NULL);
      if (! (field->flags & BLOB_FLAG))
890
      { 
891 892
        if (field->type() != MYSQL_TYPE_BIT)
        {
893 894 895 896 897 898 899 900
          byte *field_buf;
          if (field->pack_length() != 0)
            field_buf= buf + (field->ptr - table->record[0]);
          else
            field_buf= (byte *)&dummy_buf;
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr, 
                                                 field_buf);
        }
901 902 903 904
        else // if (field->type() == MYSQL_TYPE_BIT)
        {
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr);
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
905 906 907 908 909 910 911 912 913
        DBUG_RETURN(m_value[fieldnr].rec == NULL);
      }

      // Blob type
      NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
      m_value[fieldnr].blob= ndb_blob;
      if (ndb_blob != NULL)
      {
        // Set callback
914
	m_blobs_offset= buf - (byte*) table->record[0];
pekka@mysql.com's avatar
pekka@mysql.com committed
915
        void *arg= (void *)this;
916
        DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0);
pekka@mysql.com's avatar
pekka@mysql.com committed
917 918 919 920 921
      }
      DBUG_RETURN(1);
  }

  // Used for hidden key only
922
  m_value[fieldnr].rec= ndb_op->getValue(fieldnr, m_ref);
pekka@mysql.com's avatar
pekka@mysql.com committed
923 924 925
  DBUG_RETURN(m_value[fieldnr].rec == NULL);
}

926 927 928 929 930 931
/*
  Instruct NDB to fetch the partition id (fragment id)
*/
int ha_ndbcluster::get_ndb_partition_id(NdbOperation *ndb_op)
{
  DBUG_ENTER("get_ndb_partition_id");
932 933
  DBUG_RETURN(ndb_op->getValue(NdbDictionary::Column::FRAGMENT, 
                               (char *)&m_part_id) == NULL);
934
}
pekka@mysql.com's avatar
pekka@mysql.com committed
935 936 937 938

/*
  Check if any set or get of blob value in current query.
*/
939

940
bool ha_ndbcluster::uses_blob_value()
pekka@mysql.com's avatar
pekka@mysql.com committed
941
{
942 943 944
  uint blob_fields;
  MY_BITMAP *bitmap;
  uint *blob_index, *blob_index_end;
945
  if (table_share->blob_fields == 0)
946
    return FALSE;
947 948 949 950 951

  bitmap= m_write_op ? table->write_set : table->read_set;
  blob_index=     table_share->blob_field;
  blob_index_end= blob_index + table_share->blob_fields;
  do
pekka@mysql.com's avatar
pekka@mysql.com committed
952
  {
953 954 955 956
    if (bitmap_is_set(table->write_set,
                      table->field[*blob_index]->field_index))
      return TRUE;
  } while (++blob_index != blob_index_end);
957
  return FALSE;
958 959 960 961 962 963 964 965 966
}


/*
  Get metadata for this table from NDB 

  IMPLEMENTATION
    - check that frm-file on disk is equal to frm-file
      of table accessed in NDB
967 968 969 970

  RETURN
    0    ok
    -2   Meta data has changed; Re-read data and try again
971 972
*/

973 974
int cmp_frm(const NDBTAB *ndbtab, const void *pack_data,
            uint pack_length)
975 976 977 978 979 980 981 982 983 984 985
{
  DBUG_ENTER("cmp_frm");
  /*
    Compare FrmData in NDB with frm file from disk.
  */
  if ((pack_length != ndbtab->getFrmLength()) || 
      (memcmp(pack_data, ndbtab->getFrmData(), pack_length)))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}

986 987
int ha_ndbcluster::get_metadata(const char *path)
{
988 989
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
990 991 992 993 994
  const NDBTAB *tab;
  int error;
  DBUG_ENTER("get_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

995 996
  DBUG_ASSERT(m_table == NULL);
  DBUG_ASSERT(m_table_info == NULL);
997

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
  const void *data, *pack_data;
  uint length, pack_length;

  /*
    Compare FrmData in NDB with frm file from disk.
  */
  error= 0;
  if (readfrm(path, &data, &length) ||
      packfrm(data, length, &pack_data, &pack_length))
  {
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
  }
1012
    
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  if (!(tab= ndbtab_g.get_table()))
    ERR_RETURN(dict->getNdbError());

  if (get_ndb_share_state(m_share) != NSS_ALTERED 
      && cmp_frm(tab, pack_data, pack_length))
  {
    DBUG_PRINT("error", 
               ("metadata, pack_length: %d  getFrmLength: %d  memcmp: %d",
                pack_length, tab->getFrmLength(),
                memcmp(pack_data, tab->getFrmData(), pack_length)));
    DBUG_DUMP("pack_data", (char*)pack_data, pack_length);
    DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength());
    error= HA_ERR_TABLE_DEF_CHANGED;
  }
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
1030

1031
  if (error)
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
    goto err;

  DBUG_PRINT("info", ("fetched table %s", tab->getName()));
  m_table= tab;
  if ((error= open_indexes(ndb, table, FALSE)) == 0)
  {
    ndbtab_g.release();
    DBUG_RETURN(0);
  }
err:
  ndbtab_g.invalidate();
  m_table= NULL;
  DBUG_RETURN(error);
1045
}
1046

1047
static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
1048 1049
                                       const NDBINDEX *index,
                                       KEY *key_info)
1050 1051 1052 1053 1054 1055
{
  DBUG_ENTER("fix_unique_index_attr_order");
  unsigned sz= index->getNoOfIndexColumns();

  if (data.unique_index_attrid_map)
    my_free((char*)data.unique_index_attrid_map, MYF(0));
monty@mysql.com's avatar
monty@mysql.com committed
1056
  data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME));
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068

  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ASSERT(key_info->key_parts == sz);
  for (unsigned i= 0; key_part != end; key_part++, i++) 
  {
    const char *field_name= key_part->field->field_name;
#ifndef DBUG_OFF
   data.unique_index_attrid_map[i]= 255;
#endif
    for (unsigned j= 0; j < sz; j++)
    {
1069
      const NDBCOL *c= index->getColumn(j);
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
1070
      if (strcmp(field_name, c->getName()) == 0)
1071
      {
1072 1073
        data.unique_index_attrid_map[i]= j;
        break;
1074 1075 1076 1077 1078 1079
      }
    }
    DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
  }
  DBUG_RETURN(0);
}
1080

1081 1082 1083 1084 1085 1086
/*
  Create all the indexes for a table.
  If any index should fail to be created,
  the error is returned immediately
*/
int ha_ndbcluster::create_indexes(Ndb *ndb, TABLE *tab)
1087
{
1088
  uint i;
1089
  int error= 0;
1090
  const char *index_name;
1091
  KEY* key_info= tab->key_info;
1092
  const char **key_name= tab->s->keynames.type_names;
1093
  NDBDICT *dict= ndb->getDictionary();
1094
  DBUG_ENTER("ha_ndbcluster::create_indexes");
1095
  
1096
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
1097
  {
1098
    index_name= *key_name;
1099
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
1100 1101
    error= create_index(index_name, key_info, idx_type, i);
    if (error)
1102
    {
1103 1104
      DBUG_PRINT("error", ("Failed to create index %u", i));
      break;
1105
    }
1106 1107 1108 1109 1110
  }

  DBUG_RETURN(error);
}

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1111
static void ndb_init_index(NDB_INDEX_DATA &data)
1112
{
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1113 1114 1115 1116 1117 1118 1119 1120 1121
  data.type= UNDEFINED_INDEX;
  data.status= UNDEFINED;
  data.unique_index= NULL;
  data.index= NULL;
  data.unique_index_attrid_map= NULL;
  data.index_stat=NULL;
  data.index_stat_cache_entries=0;
  data.index_stat_update_freq=0;
  data.index_stat_query_count=0;
1122 1123
}

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1124
static void ndb_clear_index(NDB_INDEX_DATA &data)
1125
{
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1126 1127 1128 1129 1130 1131 1132 1133 1134
  if (data.unique_index_attrid_map)
  {
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  }
  if (data.index_stat)
  {
    delete data.index_stat;
  }
  ndb_init_index(data);
1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
}

/*
  Associate a direct reference to an index handle
  with an index (for faster access)
 */
int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
                                    const char *index_name, uint index_no)
{
  int error= 0;
  NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
  m_index[index_no].type= idx_type;
1147 1148
  DBUG_ENTER("ha_ndbcluster::add_index_handle");
  DBUG_PRINT("enter", ("table %s", m_tabname));
1149 1150 1151 1152

  if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
  {
    DBUG_PRINT("info", ("Get handle to index %s", index_name));
1153 1154 1155
    const NDBINDEX *index;
    do
    {
1156
      index= dict->getIndexGlobal(index_name, *m_table);
1157 1158 1159 1160 1161 1162 1163 1164
      if (!index)
        ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("index: 0x%x  id: %d  version: %d.%d  status: %d",
                          index,
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
1165 1166
      DBUG_ASSERT(index->getObjectStatus() ==
                  NdbDictionary::Object::Retrieved);
1167 1168
      break;
    } while (1);
1169
    m_index[index_no].index= index;
1170 1171 1172 1173 1174
    // ordered index - add stats
    NDB_INDEX_DATA& d=m_index[index_no];
    delete d.index_stat;
    d.index_stat=NULL;
    if (thd->variables.ndb_index_stat_enable)
1175
    {
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
      d.index_stat=new NdbIndexStat(index);
      d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries;
      d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq;
      d.index_stat_query_count=0;
      d.index_stat->alloc_cache(d.index_stat_cache_entries);
      DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u",
                          index->getName(),
                          d.index_stat_cache_entries,
                          d.index_stat_update_freq));
    } else
    {
      DBUG_PRINT("info", ("index %s stat=off", index->getName()));
    }
  }
  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    char unique_index_name[FN_LEN];
    static const char* unique_suffix= "$unique";
1194
    m_has_unique_index= TRUE;
1195 1196
    strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
1197 1198 1199
    const NDBINDEX *index;
    do
    {
1200
      index= dict->getIndexGlobal(unique_index_name, *m_table);
1201 1202 1203 1204 1205 1206 1207 1208
      if (!index)
        ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("index: 0x%x  id: %d  version: %d.%d  status: %d",
                          index,
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
1209 1210
      DBUG_ASSERT(index->getObjectStatus() ==
                  NdbDictionary::Object::Retrieved);
1211 1212
      break;
    } while (1);
1213
    m_index[index_no].unique_index= index;
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
    error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
  }
  if (!error)
    m_index[index_no].status= ACTIVE;
  
  DBUG_RETURN(error);
}

/*
  Associate index handles for each index of a table
*/
1225
int ha_ndbcluster::open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error)
1226 1227 1228 1229 1230 1231 1232 1233 1234
{
  uint i;
  int error= 0;
  THD *thd=current_thd;
  NDBDICT *dict= ndb->getDictionary();
  const char *index_name;
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::open_indexes");
1235
  m_has_unique_index= FALSE;
1236 1237 1238
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
1239 1240 1241 1242
      if (ignore_error)
        m_index[i].index= m_index[i].unique_index= NULL;
      else
        break;
1243
  }
1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264

  if (error && !ignore_error)
  {
    while (i > 0)
    {
      i--;
      if (m_index[i].index)
      {
         dict->removeIndexGlobal(*m_index[i].index, 1);
         m_index[i].index= NULL;
      }
      if (m_index[i].unique_index)
      {
         dict->removeIndexGlobal(*m_index[i].unique_index, 1);
         m_index[i].unique_index= NULL;
      }
    }
  }

  DBUG_ASSERT(error == 0 || error == 4243);

1265 1266 1267 1268 1269 1270 1271
  DBUG_RETURN(error);
}

/*
  Renumber indexes in index list by shifting out
  indexes that are to be dropped
 */
1272
void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
{
  uint i;
  const char *index_name;
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  NDBDICT *dict= ndb->getDictionary();
  DBUG_ENTER("ha_ndbcluster::renumber_indexes");
  
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    index_name= *key_name;
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED) 
    {
      DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", 
                          index_name, i));
      NDB_INDEX_DATA tmp;
      uint j= i + 1;
      // Shift index out of list
      while(j != MAX_KEY && m_index[j].status != UNDEFINED)
1294
      {
1295 1296 1297 1298
        tmp=  m_index[j - 1];
        m_index[j - 1]= m_index[j];
        m_index[j]= tmp;
        j++;
1299 1300
      }
    }
1301 1302
  }

1303
  DBUG_VOID_RETURN;
1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
}

/*
  Drop all indexes that are marked for deletion
*/
int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
{
  uint i;
  int error= 0;
  const char *index_name;
  KEY* key_info= tab->key_info;
  NDBDICT *dict= ndb->getDictionary();
  DBUG_ENTER("ha_ndbcluster::drop_indexes");
  
1318
  for (i= 0; i < tab->s->keys; i++, key_info++)
1319 1320 1321 1322
  {
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED)
1323
    {
1324 1325
      const NdbDictionary::Index *index= m_index[i].index;
      const NdbDictionary::Index *unique_index= m_index[i].unique_index;
1326 1327
      
      if (index)
1328
      {
1329 1330 1331
        index_name= index->getName();
        DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));  
        // Drop ordered index from ndb
1332 1333 1334 1335 1336 1337
        error= dict->dropIndexGlobal(*index);
        if (!error)
        {
          dict->removeIndexGlobal(*index, 1);
          m_index[i].index= NULL;
        }
1338 1339
      }
      if (!error && unique_index)
1340
      {
1341 1342
        index_name= unique_index->getName();
        DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
1343
        // Drop unique index from ndb
1344 1345 1346 1347 1348 1349
        error= dict->dropIndexGlobal(*unique_index);
        if (!error)
        {
          dict->removeIndexGlobal(*unique_index, 1);
          m_index[i].unique_index= NULL;
        }
1350
      }
1351 1352
      if (error)
        DBUG_RETURN(error);
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1353
      ndb_clear_index(m_index[i]);
1354
      continue;
1355
    }
1356
  }
1357 1358
  
  DBUG_RETURN(error);
1359 1360
}

1361 1362 1363 1364
/*
  Decode the type of an index from information 
  provided in table object
*/
1365
NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
1366
{
1367 1368
  return get_index_type_from_key(inx, table_share->key_info,
                                 inx == table_share->primary_key);
1369 1370 1371
}

NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
1372 1373
                                                      KEY *key_info,
                                                      bool primary) const
1374 1375
{
  bool is_hash_index=  (key_info[inx].algorithm == 
1376
                        HA_KEY_ALG_HASH);
1377
  if (primary)
1378
    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
1379 1380
  
  return ((key_info[inx].flags & HA_NOSAME) ? 
1381 1382
          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
          ORDERED_INDEX);
1383
} 
1384

1385 1386 1387 1388 1389
int ha_ndbcluster::check_index_fields_not_null(uint inx)
{
  KEY* key_info= table->key_info + inx;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1390
  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
1391 1392 1393 1394 1395 1396
  
  for (; key_part != end; key_part++) 
    {
      Field* field= key_part->field;
      if (field->maybe_null())
      {
1397 1398 1399
        my_printf_error(ER_NULL_COLUMN_IN_INDEX,ER(ER_NULL_COLUMN_IN_INDEX),
                        MYF(0),field->field_name);
        DBUG_RETURN(ER_NULL_COLUMN_IN_INDEX);
1400 1401 1402 1403 1404
      }
    }
  
  DBUG_RETURN(0);
}
1405

1406
void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
1407
{
1408
  uint i;
1409

1410 1411 1412
  DBUG_ENTER("release_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
  NDBDICT *dict= ndb->getDictionary();
  int invalidate_indexes= 0;
  if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
  {
    invalidate_indexes = 1;
  }
  if (m_table != NULL)
  {
    if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
      invalidate_indexes= 1;
    dict->removeTableGlobal(*m_table, invalidate_indexes);
  }
  // TODO investigate
  DBUG_ASSERT(m_table_info == NULL);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1427
  m_table_info= NULL;
1428

1429
  // Release index list 
1430 1431
  for (i= 0; i < MAX_KEY; i++)
  {
1432 1433 1434 1435 1436 1437 1438 1439 1440 1441
    if (m_index[i].unique_index)
    {
      DBUG_ASSERT(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
    }
    if (m_index[i].index)
    {
      DBUG_ASSERT(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
    }
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1442
    ndb_clear_index(m_index[i]);
1443 1444
  }

1445
  m_table= NULL;
1446 1447 1448
  DBUG_VOID_RETURN;
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1449
int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type)
1450
{
1451
  if (type >= TL_WRITE_ALLOW_WRITE)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1452
    return NdbOperation::LM_Exclusive;
mskold@mysql.com's avatar
mskold@mysql.com committed
1453 1454
  if (type ==  TL_READ_WITH_SHARED_LOCKS ||
      uses_blob_value())
1455
    return NdbOperation::LM_Read;
1456
  return NdbOperation::LM_CommittedRead;
1457 1458
}

1459 1460 1461 1462 1463 1464
static const ulong index_type_flags[]=
{
  /* UNDEFINED_INDEX */
  0,                         

  /* PRIMARY_KEY_INDEX */
1465
  HA_ONLY_WHOLE_INDEX, 
1466 1467

  /* PRIMARY_KEY_ORDERED_INDEX */
1468
  /* 
mskold@mysql.com's avatar
mskold@mysql.com committed
1469
     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, 
1470 1471 1472
     thus ORDERD BY clauses can be optimized by reading directly 
     through the index.
  */
mskold@mysql.com's avatar
mskold@mysql.com committed
1473
  // HA_KEYREAD_ONLY | 
1474
  HA_READ_NEXT |
1475
  HA_READ_PREV |
1476 1477
  HA_READ_RANGE |
  HA_READ_ORDER,
1478 1479

  /* UNIQUE_INDEX */
1480
  HA_ONLY_WHOLE_INDEX,
1481

1482
  /* UNIQUE_ORDERED_INDEX */
1483
  HA_READ_NEXT |
1484
  HA_READ_PREV |
1485 1486
  HA_READ_RANGE |
  HA_READ_ORDER,
1487

1488
  /* ORDERED_INDEX */
1489
  HA_READ_NEXT |
1490
  HA_READ_PREV |
1491 1492
  HA_READ_RANGE |
  HA_READ_ORDER
1493 1494 1495 1496 1497 1498 1499
};

static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);

inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
{
  DBUG_ASSERT(idx_no < MAX_KEY);
1500
  return m_index[idx_no].type;
1501 1502 1503 1504 1505 1506 1507 1508 1509 1510
}


/*
  Get the flags for an index

  RETURN
    flags depending on the type of the index.
*/

1511 1512
inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
                                        bool all_parts) const 
1513
{ 
1514
  DBUG_ENTER("ha_ndbcluster::index_flags");
1515
  DBUG_PRINT("enter", ("idx_no: %u", idx_no));
1516
  DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
1517 1518
  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | 
              HA_KEY_SCAN_NOT_ROR);
1519 1520
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1521 1522
static void shrink_varchar(Field* field, const byte* & ptr, char* buf)
{
1523
  if (field->type() == MYSQL_TYPE_VARCHAR && ptr != NULL) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1524
    Field_varstring* f= (Field_varstring*)field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1525
    if (f->length_bytes == 1) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1526 1527 1528 1529 1530
      uint pack_len= field->pack_length();
      DBUG_ASSERT(1 <= pack_len && pack_len <= 256);
      if (ptr[1] == 0) {
        buf[0]= ptr[0];
      } else {
1531
        DBUG_ASSERT(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
1532 1533 1534 1535 1536 1537 1538
        buf[0]= 255;
      }
      memmove(buf + 1, ptr + 2, pack_len - 1);
      ptr= buf;
    }
  }
}
1539 1540 1541

int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key)
{
1542
  KEY* key_info= table->key_info + table_share->primary_key;
1543 1544 1545 1546 1547 1548 1549
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ENTER("set_primary_key");

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1550 1551 1552
    const byte* ptr= key;
    char buf[256];
    shrink_varchar(field, ptr, buf);
1553
    if (set_ndb_key(op, field, 
1554
                    key_part->fieldnr-1, ptr))
1555
      ERR_RETURN(op->getNdbError());
pekka@mysql.com's avatar
pekka@mysql.com committed
1556
    key += key_part->store_length;
1557 1558 1559 1560 1561
  }
  DBUG_RETURN(0);
}


1562
int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const byte *record)
1563
{
1564
  KEY* key_info= table->key_info + table_share->primary_key;
1565 1566
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1567
  DBUG_ENTER("set_primary_key_from_record");
1568 1569 1570 1571 1572

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, 
1573
		    key_part->fieldnr-1, record+key_part->offset))
1574 1575 1576 1577 1578
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
}

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, 
                                             const byte *record, uint keyno)
{
  KEY* key_info= table->key_info + keyno;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  uint i;
  DBUG_ENTER("set_index_key_from_record");
                                                                                
  for (i= 0; key_part != end; key_part++, i++)
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, m_index[keyno].unique_index_attrid_map[i],
                    record+key_part->offset))
      ERR_RETURN(m_active_trans->getNdbError());
  }
  DBUG_RETURN(0);
}

1598 1599
int 
ha_ndbcluster::set_index_key(NdbOperation *op, 
1600 1601
                             const KEY *key_info, 
                             const byte * key_ptr)
1602
{
1603
  DBUG_ENTER("ha_ndbcluster::set_index_key");
1604 1605 1606 1607 1608 1609
  uint i;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  
  for (i= 0; key_part != end; key_part++, i++) 
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
1610 1611 1612 1613
    Field* field= key_part->field;
    const byte* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr;
    char buf[256];
    shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1614
    if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr))
1615 1616 1617 1618 1619
      ERR_RETURN(m_active_trans->getNdbError());
    key_ptr+= key_part->store_length;
  }
  DBUG_RETURN(0);
}
1620

1621 1622 1623 1624 1625 1626 1627
inline 
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
  uint i;
  DBUG_ENTER("define_read_attrs");  

  // Define attributes to read
1628
  for (i= 0; i < table_share->fields; i++) 
1629 1630
  {
    Field *field= table->field[i];
1631
    if (bitmap_is_set(table->read_set, i) ||
1632
        ((field->flags & PRI_KEY_FLAG)))
1633 1634
    {      
      if (get_ndb_value(op, field, i, buf))
1635
        ERR_RETURN(op->getNdbError());
1636
    } 
1637
    else
1638 1639 1640 1641 1642
    {
      m_value[i].ptr= NULL;
    }
  }
    
1643
  if (table_share->primary_key == MAX_KEY) 
1644 1645 1646
  {
    DBUG_PRINT("info", ("Getting hidden key"));
    // Scanning table with no primary key
1647
    int hidden_no= table_share->fields;      
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658
#ifndef DBUG_OFF
    const NDBTAB *tab= (const NDBTAB *) m_table;    
    if (!tab->getColumn(hidden_no))
      DBUG_RETURN(1);
#endif
    if (get_ndb_value(op, NULL, hidden_no, NULL))
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
} 

tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1659

1660 1661 1662 1663
/*
  Read one record from NDB using primary key
*/

1664 1665
int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf,
                           uint32 part_id)
1666
{
1667
  uint no_fields= table_share->fields;
1668 1669
  NdbConnection *trans= m_active_trans;
  NdbOperation *op;
1670

1671 1672 1673 1674
  int res;
  DBUG_ENTER("pk_read");
  DBUG_PRINT("enter", ("key_len: %u", key_len));
  DBUG_DUMP("key", (char*)key, key_len);
1675
  m_write_op= FALSE;
1676

1677 1678
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1679
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1680
      op->readTuple(lm) != 0)
1681
    ERR_RETURN(trans->getNdbError());
1682
  
1683
  if (table_share->primary_key == MAX_KEY) 
1684 1685 1686 1687 1688
  {
    // This table has no primary key, use "hidden" primary key
    DBUG_PRINT("info", ("Using hidden key"));
    DBUG_DUMP("key", (char*)key, 8);    
    if (set_hidden_key(op, no_fields, key))
1689
      ERR_RETURN(trans->getNdbError());
1690
    
1691
    // Read key at the same time, for future reference
1692
    if (get_ndb_value(op, NULL, no_fields, NULL))
1693
      ERR_RETURN(trans->getNdbError());
1694 1695 1696 1697 1698 1699 1700
  } 
  else 
  {
    if ((res= set_primary_key(op, key)))
      return res;
  }
  
1701
  if ((res= define_read_attrs(buf, op)))
1702
    DBUG_RETURN(res);
1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714

  if (m_use_partition_function)
  {
    op->setPartitionId(part_id);
    // If table has user defined partitioning
    // and no indexes, we need to read the partition id
    // to support ORDER BY queries
    if (table_share->primary_key == MAX_KEY &&
        get_ndb_partition_id(op))
      ERR_RETURN(trans->getNdbError());
  }

1715
  if (execute_no_commit_ie(this,trans,false) != 0) 
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(buf);
  table->status= 0;     
  DBUG_RETURN(0);
}

1727 1728
/*
  Read one complementing record from NDB using primary key from old_data
1729
  or hidden key
1730 1731
*/

1732 1733
int ha_ndbcluster::complemented_read(const byte *old_data, byte *new_data,
                                     uint32 old_part_id)
1734
{
1735
  uint no_fields= table_share->fields, i;
1736
  NdbTransaction *trans= m_active_trans;
1737
  NdbOperation *op;
1738
  DBUG_ENTER("complemented_read");
1739
  m_write_op= FALSE;
1740

1741
  if (bitmap_is_set_all(table->read_set))
1742
  {
1743 1744
    // We have allready retrieved all fields, nothing to complement
    DBUG_RETURN(0);
1745
  }
1746

1747 1748
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1749
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1750
      op->readTuple(lm) != 0)
1751
    ERR_RETURN(trans->getNdbError());
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
  if (table_share->primary_key != MAX_KEY) 
  {
    if (set_primary_key_from_record(op, old_data))
      ERR_RETURN(trans->getNdbError());
  } 
  else 
  {
    // This table has no primary key, use "hidden" primary key
    if (set_hidden_key(op, table->s->fields, m_ref))
      ERR_RETURN(op->getNdbError());
  }
1763 1764 1765 1766

  if (m_use_partition_function)
    op->setPartitionId(old_part_id);
  
1767 1768 1769 1770
  // Read all unreferenced non-key field(s)
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
1771
    if (!((field->flags & PRI_KEY_FLAG) ||
1772 1773
          bitmap_is_set(table->read_set, i)) &&
        !bitmap_is_set(table->write_set, i))
1774
    {
1775
      if (get_ndb_value(op, field, i, new_data))
1776
        ERR_RETURN(trans->getNdbError());
1777 1778 1779
    }
  }
  
1780
  if (execute_no_commit(this,trans,false) != 0) 
1781 1782 1783 1784 1785 1786 1787 1788
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(new_data);
  table->status= 0;     
1789 1790 1791 1792 1793 1794 1795 1796

  /**
   * restore m_value
   */
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
    if (!((field->flags & PRI_KEY_FLAG) ||
1797
          bitmap_is_set(table->read_set, i)))
1798 1799 1800 1801 1802
    {
      m_value[i].ptr= NULL;
    }
  }
  
1803 1804 1805
  DBUG_RETURN(0);
}

1806
/*
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
 * Check that all operations between first and last all
 * have gotten the errcode
 * If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
 * for all succeeding operations
 */
bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
                                                   const NdbOperation *first,
                                                   const NdbOperation *last,
                                                   uint errcode)
{
  const NdbOperation *op= first;
  DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");

  while(op)
  {
    NdbError err= op->getNdbError();
    if (err.status != NdbError::Success)
    {
      if (ndb_to_mysql_error(&err) != (int) errcode)
        DBUG_RETURN(false);
      if (op == last) break;
      op= trans->getNextCompletedOperation(op);
    }
    else
    {
      // We found a duplicate
      if (op->getType() == NdbOperation::UniqueIndexAccess)
      {
        if (errcode == HA_ERR_KEY_NOT_FOUND)
        {
          NdbIndexOperation *iop= (NdbIndexOperation *) op;
          const NDBINDEX *index= iop->getIndex();
          // Find the key_no of the index
          for(uint i= 0; i<table->s->keys; i++)
          {
            if (m_index[i].unique_index == index)
            {
              m_dupkey= i;
              break;
            }
          }
        }
      }
      else
      {
        // Must have been primary key access
        DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);
        if (errcode == HA_ERR_KEY_NOT_FOUND)
          m_dupkey= table->s->primary_key;
      }
      DBUG_RETURN(false);      
    }
  }
  DBUG_RETURN(true);
}


/*
 * Peek to check if any rows already exist with conflicting
 * primary key or unique index values
1867 1868
*/

1869
int ha_ndbcluster::peek_indexed_rows(const byte *record)
1870
{
1871
  NdbTransaction *trans= m_active_trans;
1872
  NdbOperation *op;
1873 1874
  const NdbOperation *first, *last;
  uint i;
1875
  int res;
1876
  DBUG_ENTER("peek_indexed_rows");
1877

1878 1879 1880
  NdbOperation::LockMode lm= NdbOperation::LM_Read;
  first= NULL;
  if (table->s->primary_key != MAX_KEY)
1881
  {
1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
    /*
     * Fetch any row with colliding primary key
     */
    if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
        op->readTuple(lm) != 0)
      ERR_RETURN(trans->getNdbError());
    
    first= op;
    if ((res= set_primary_key_from_record(op, record)))
      ERR_RETURN(trans->getNdbError());

    if (m_use_partition_function)
1894
    {
1895 1896 1897
      uint32 part_id;
      int error;
      longlong func_value;
1898 1899 1900 1901
      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
      error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
      dbug_tmp_restore_column_map(table->read_set, old_map);
      if (error)
1902 1903
        DBUG_RETURN(error);
      op->setPartitionId(part_id);
1904 1905
    }
  }
1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
  /*
   * Fetch any rows with colliding unique indexes
   */
  KEY* key_info;
  KEY_PART_INFO *key_part, *end;
  for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
  {
    if (i != table->s->primary_key &&
        key_info->flags & HA_NOSAME)
    {
      // A unique index is defined on table
      NdbIndexOperation *iop;
1918
      const NDBINDEX *unique_index = m_index[i].unique_index;
1919 1920
      key_part= key_info->key_part;
      end= key_part + key_info->key_parts;
1921
      if (!(iop= trans->getNdbIndexOperation(unique_index, m_table)) ||
1922 1923
          iop->readTuple(lm) != 0)
        ERR_RETURN(trans->getNdbError());
1924

1925 1926 1927 1928 1929 1930 1931 1932
      if (!first)
        first= iop;
      if ((res= set_index_key_from_record(iop, record, i)))
        ERR_RETURN(trans->getNdbError());
    }
  }
  last= trans->getLastDefinedOperation();
  if (first)
1933
    res= execute_no_commit_ie(this,trans,false);
1934 1935 1936 1937 1938 1939 1940 1941
  else
  {
    // Table has no keys
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
  }
  if (check_all_operations_for_error(trans, first, last, 
                                     HA_ERR_KEY_NOT_FOUND))
1942 1943 1944 1945
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  } 
1946 1947 1948 1949
  else
  {
    DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
  }
1950 1951
  DBUG_RETURN(0);
}
1952

1953

1954 1955 1956 1957 1958
/*
  Read one record from NDB using unique secondary index
*/

int ha_ndbcluster::unique_index_read(const byte *key,
1959
                                     uint key_len, byte *buf)
1960
{
1961
  int res;
1962
  NdbTransaction *trans= m_active_trans;
1963
  NdbIndexOperation *op;
1964
  DBUG_ENTER("ha_ndbcluster::unique_index_read");
1965 1966 1967
  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
  DBUG_DUMP("key", (char*)key, key_len);
  
1968 1969
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
1970 1971
  if (!(op= trans->getNdbIndexOperation(m_index[active_index].unique_index, 
                                        m_table)) ||
1972
      op->readTuple(lm) != 0)
1973 1974 1975
    ERR_RETURN(trans->getNdbError());
  
  // Set secondary index key(s)
1976
  if ((res= set_index_key(op, table->key_info + active_index, key)))
1977 1978
    DBUG_RETURN(res);
  
1979
  if ((res= define_read_attrs(buf, op)))
1980
    DBUG_RETURN(res);
1981

1982
  if (execute_no_commit_ie(this,trans,false) != 0) 
1983 1984 1985 1986 1987 1988 1989 1990 1991 1992
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }
  // The value have now been fetched from NDB
  unpack_record(buf);
  table->status= 0;
  DBUG_RETURN(0);
}

1993
inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
1994 1995
{
  DBUG_ENTER("fetch_next");
1996
  int check;
1997
  NdbTransaction *trans= m_active_trans;
1998
  
mskold@mysql.com's avatar
mskold@mysql.com committed
1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
      NdbConnection *trans= m_active_trans;
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
      if (!(op= m_active_cursor->lockCurrentTuple()))
      {
	m_lock_tuple= false;
	ERR_RETURN(trans->getNdbError());
      }
      m_ops_pending++;
  }
  m_lock_tuple= false;
  
  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
                    m_lock.type != TL_READ_WITH_SHARED_LOCKS;;
2023 2024
  do {
    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
pekka@mysql.com's avatar
pekka@mysql.com committed
2025 2026 2027
    /*
      We can only handle one tuple with blobs at a time.
    */
2028
    if (m_ops_pending && m_blobs_pending)
pekka@mysql.com's avatar
pekka@mysql.com committed
2029
    {
2030
      if (execute_no_commit(this,trans,false) != 0)
2031
        DBUG_RETURN(ndb_err(trans));
2032 2033
      m_ops_pending= 0;
      m_blobs_pending= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2034
    }
2035 2036
    
    if ((check= cursor->nextResult(contact_ndb, m_force_send)) == 0)
2037
    {
mskold@mysql.com's avatar
mskold@mysql.com committed
2038 2039 2040 2041 2042 2043 2044
      /*
	Explicitly lock tuple if "select for update" or
	"select lock in share mode"
      */
      m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
		     || 
		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
2045 2046 2047 2048 2049 2050
      DBUG_RETURN(0);
    } 
    else if (check == 1 || check == 2)
    {
      // 1: No more records
      // 2: No more cached records
2051
      
2052
      /*
2053 2054 2055
        Before fetching more rows and releasing lock(s),
        all pending update or delete operations should 
        be sent to NDB
2056
      */
2057 2058
      DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
      if (m_ops_pending)
2059
      {
2060 2061
        if (m_transaction_on)
        {
2062
          if (execute_no_commit(this,trans,false) != 0)
2063 2064 2065 2066 2067 2068
            DBUG_RETURN(-1);
        }
        else
        {
          if  (execute_commit(this,trans) != 0)
            DBUG_RETURN(-1);
2069
          if (trans->restart() != 0)
2070 2071 2072 2073 2074 2075
          {
            DBUG_ASSERT(0);
            DBUG_RETURN(-1);
          }
        }
        m_ops_pending= 0;
2076
      }
2077 2078
      contact_ndb= (check == 2);
    }
2079 2080 2081 2082
    else
    {
      DBUG_RETURN(-1);
    }
2083
  } while (check == 2);
2084

2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
  DBUG_RETURN(1);
}

/*
  Get the next record of a started scan. Try to fetch
  it locally from NdbApi cached records if possible, 
  otherwise ask NDB for more.

  NOTE
  If this is a update/delete make sure to not contact 
  NDB before any pending ops have been sent to NDB.
2096

2097 2098 2099 2100 2101 2102 2103
*/

inline int ha_ndbcluster::next_result(byte *buf)
{  
  int res;
  DBUG_ENTER("next_result");
    
2104 2105 2106
  if (!m_active_cursor)
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  
2107
  if ((res= fetch_next(m_active_cursor)) == 0)
2108 2109 2110 2111 2112 2113 2114
  {
    DBUG_PRINT("info", ("One more record found"));    
    
    unpack_record(buf);
    table->status= 0;
    DBUG_RETURN(0);
  }
2115
  else if (res == 1)
2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
  {
    // No more records
    table->status= STATUS_NOT_FOUND;
    
    DBUG_PRINT("info", ("No more records"));
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_RETURN(ndb_err(m_active_trans));
  }
2127 2128
}

2129
/*
2130
  Set bounds for ordered index scan.
2131 2132
*/

joreland@mysql.com's avatar
joreland@mysql.com committed
2133
int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
2134 2135
                              uint inx,
                              bool rir,
2136 2137
                              const key_range *keys[2],
                              uint range_no)
2138
{
2139
  const KEY *const key_info= table->key_info + inx;
2140 2141 2142
  const uint key_parts= key_info->key_parts;
  uint key_tot_len[2];
  uint tot_len;
2143
  uint i, j;
2144 2145

  DBUG_ENTER("set_bounds");
2146
  DBUG_PRINT("info", ("key_parts=%d", key_parts));
2147

2148
  for (j= 0; j <= 1; j++)
2149
  {
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162
    const key_range *key= keys[j];
    if (key != NULL)
    {
      // for key->flag see ha_rkey_function
      DBUG_PRINT("info", ("key %d length=%d flag=%d",
                          j, key->length, key->flag));
      key_tot_len[j]= key->length;
    }
    else
    {
      DBUG_PRINT("info", ("key %d not present", j));
      key_tot_len[j]= 0;
    }
2163 2164
  }
  tot_len= 0;
2165

2166 2167 2168 2169
  for (i= 0; i < key_parts; i++)
  {
    KEY_PART_INFO *key_part= &key_info->key_part[i];
    Field *field= key_part->field;
2170
#ifndef DBUG_OFF
2171
    uint part_len= key_part->length;
2172
#endif
2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186
    uint part_store_len= key_part->store_length;
    // Info about each key part
    struct part_st {
      bool part_last;
      const key_range *key;
      const byte *part_ptr;
      bool part_null;
      int bound_type;
      const char* bound_ptr;
    };
    struct part_st part[2];

    for (j= 0; j <= 1; j++)
    {
2187
      struct part_st &p= part[j];
2188 2189 2190 2191 2192 2193 2194
      p.key= NULL;
      p.bound_type= -1;
      if (tot_len < key_tot_len[j])
      {
        p.part_last= (tot_len + part_store_len >= key_tot_len[j]);
        p.key= keys[j];
        p.part_ptr= &p.key->key[tot_len];
joreland@mysql.com's avatar
joreland@mysql.com committed
2195
        p.part_null= key_part->null_bit && *p.part_ptr;
2196
        p.bound_ptr= (const char *)
joreland@mysql.com's avatar
joreland@mysql.com committed
2197
          p.part_null ? 0 : key_part->null_bit ? p.part_ptr + 1 : p.part_ptr;
2198 2199 2200 2201 2202 2203

        if (j == 0)
        {
          switch (p.key->flag)
          {
            case HA_READ_KEY_EXACT:
2204 2205 2206 2207
              if (! rir)
                p.bound_type= NdbIndexScanOperation::BoundEQ;
              else // differs for records_in_range
                p.bound_type= NdbIndexScanOperation::BoundLE;
2208
              break;
2209
            // ascending
2210 2211 2212 2213 2214 2215 2216 2217 2218
            case HA_READ_KEY_OR_NEXT:
              p.bound_type= NdbIndexScanOperation::BoundLE;
              break;
            case HA_READ_AFTER_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundLE;
              else
                p.bound_type= NdbIndexScanOperation::BoundLT;
              break;
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231
            // descending
            case HA_READ_PREFIX_LAST:           // weird
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
            case HA_READ_PREFIX_LAST_OR_PREV:   // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
2232 2233 2234 2235 2236 2237 2238
            default:
              break;
          }
        }
        if (j == 1) {
          switch (p.key->flag)
          {
2239
            // ascending
2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
            case HA_READ_AFTER_KEY:     // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            default:
              break;
2251
            // descending strangely sets no end key
2252 2253
          }
        }
2254

2255 2256 2257
        if (p.bound_type == -1)
        {
          DBUG_PRINT("error", ("key %d unknown flag %d", j, p.key->flag));
2258
          DBUG_ASSERT(FALSE);
2259
          // Stop setting bounds but continue with what we have
2260
          op->end_of_bound(range_no);
2261 2262 2263 2264
          DBUG_RETURN(0);
        }
      }
    }
2265

2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
    // Seen with e.g. b = 1 and c > 1
    if (part[0].bound_type == NdbIndexScanOperation::BoundLE &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("replace LE/GE pair by EQ"));
      part[0].bound_type= NdbIndexScanOperation::BoundEQ;
      part[1].bound_type= -1;
    }
    // Not seen but was in previous version
    if (part[0].bound_type == NdbIndexScanOperation::BoundEQ &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("remove GE from EQ/GE pair"));
      part[1].bound_type= -1;
    }
2283

2284 2285
    for (j= 0; j <= 1; j++)
    {
2286
      struct part_st &p= part[j];
2287 2288 2289 2290 2291 2292 2293 2294 2295
      // Set bound if not done with this key
      if (p.key != NULL)
      {
        DBUG_PRINT("info", ("key %d:%d offset=%d length=%d last=%d bound=%d",
                            j, i, tot_len, part_len, p.part_last, p.bound_type));
        DBUG_DUMP("info", (const char*)p.part_ptr, part_store_len);

        // Set bound if not cancelled via type -1
        if (p.bound_type != -1)
2296
        {
pekka@mysql.com's avatar
pekka@mysql.com committed
2297 2298 2299
          const char* ptr= p.bound_ptr;
          char buf[256];
          shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
2300
          if (op->setBound(i, p.bound_type, ptr))
2301
            ERR_RETURN(op->getNdbError());
2302
        }
2303 2304 2305 2306
      }
    }

    tot_len+= part_store_len;
2307
  }
2308
  op->end_of_bound(range_no);
2309 2310 2311
  DBUG_RETURN(0);
}

2312
/*
2313
  Start ordered index scan in NDB
2314 2315
*/

2316
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
2317
                                      const key_range *end_key,
2318 2319
                                      bool sorted, bool descending,
                                      byte* buf, part_id_range *part_spec)
2320
{  
2321
  int res;
joreland@mysql.com's avatar
joreland@mysql.com committed
2322
  bool restart;
2323
  NdbTransaction *trans= m_active_trans;
joreland@mysql.com's avatar
joreland@mysql.com committed
2324
  NdbIndexScanOperation *op;
2325

2326 2327 2328
  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d",
             active_index, sorted, descending));  
2329
  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
2330
  m_write_op= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2331

2332 2333
  // Check that sorted seems to be initialised
  DBUG_ASSERT(sorted == 0 || sorted == 1);
2334
  
2335
  if (m_active_cursor == 0)
joreland@mysql.com's avatar
joreland@mysql.com committed
2336
  {
2337
    restart= FALSE;
joreland@mysql.com's avatar
joreland@mysql.com committed
2338 2339
    NdbOperation::LockMode lm=
      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
mskold@mysql.com's avatar
mskold@mysql.com committed
2340
   bool need_pk = (lm == NdbOperation::LM_Read);
2341 2342
    if (!(op= trans->getNdbIndexScanOperation(m_index[active_index].index, 
                                              m_table)) ||
mskold@mysql.com's avatar
mskold@mysql.com committed
2343
        op->readTuples(lm, 0, parallelism, sorted, descending, false, need_pk))
joreland@mysql.com's avatar
joreland@mysql.com committed
2344
      ERR_RETURN(trans->getNdbError());
2345 2346 2347
    if (m_use_partition_function && part_spec != NULL &&
        part_spec->start_part == part_spec->end_part)
      op->setPartitionId(part_spec->start_part);
2348
    m_active_cursor= op;
joreland@mysql.com's avatar
joreland@mysql.com committed
2349
  } else {
2350
    restart= TRUE;
2351
    op= (NdbIndexScanOperation*)m_active_cursor;
joreland@mysql.com's avatar
joreland@mysql.com committed
2352
    
2353 2354 2355
    if (m_use_partition_function && part_spec != NULL &&
        part_spec->start_part == part_spec->end_part)
      op->setPartitionId(part_spec->start_part);
joreland@mysql.com's avatar
joreland@mysql.com committed
2356 2357
    DBUG_ASSERT(op->getSorted() == sorted);
    DBUG_ASSERT(op->getLockMode() == 
2358
                (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
2359
    if (op->reset_bounds(m_force_send))
joreland@mysql.com's avatar
joreland@mysql.com committed
2360 2361
      DBUG_RETURN(ndb_err(m_active_trans));
  }
2362
  
2363
  {
2364
    const key_range *keys[2]= { start_key, end_key };
2365
    res= set_bounds(op, active_index, false, keys);
2366 2367
    if (res)
      DBUG_RETURN(res);
2368
  }
2369

2370
  if (!restart)
2371
  {
2372 2373 2374
    if (generate_scan_filter(m_cond_stack, op))
      DBUG_RETURN(ndb_err(trans));

2375
    if ((res= define_read_attrs(buf, op)))
2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386
    {
      DBUG_RETURN(res);
    }
    
    // If table has user defined partitioning
    // and no primary key, we need to read the partition id
    // to support ORDER BY queries
    if (m_use_partition_function &&
        (table_share->primary_key == MAX_KEY) && 
        (get_ndb_partition_id(op)))
      ERR_RETURN(trans->getNdbError());
joreland@mysql.com's avatar
joreland@mysql.com committed
2387
  }
2388

2389
  if (execute_no_commit(this,trans,false) != 0)
2390 2391 2392 2393
    DBUG_RETURN(ndb_err(trans));
  
  DBUG_RETURN(next_result(buf));
}
2394 2395

/*
2396
  Start full table scan in NDB
2397 2398 2399 2400
 */

int ha_ndbcluster::full_table_scan(byte *buf)
{
2401
  int res;
2402
  NdbScanOperation *op;
2403
  NdbTransaction *trans= m_active_trans;
2404
  part_id_range part_spec;
2405 2406 2407

  DBUG_ENTER("full_table_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
2408
  m_write_op= FALSE;
2409

2410 2411
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
mskold@mysql.com's avatar
mskold@mysql.com committed
2412
  bool need_pk = (lm == NdbOperation::LM_Read);
2413
  if (!(op=trans->getNdbScanOperation(m_table)) ||
mskold@mysql.com's avatar
mskold@mysql.com committed
2414 2415 2416
      op->readTuples(lm, 
		     (need_pk)?NdbScanOperation::SF_KeyInfo:0, 
		     parallelism))
2417
    ERR_RETURN(trans->getNdbError());
2418
  m_active_cursor= op;
2419 2420 2421 2422

  if (m_use_partition_function)
  {
    part_spec.start_part= 0;
2423
    part_spec.end_part= m_part_info->get_tot_partitions() - 1;
2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445
    prune_partition_set(table, &part_spec);
    DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      m_active_cursor->setPartitionId(part_spec.start_part);
    }
2446 2447 2448 2449 2450 2451
    // If table has user defined partitioning
    // and no primary key, we need to read the partition id
    // to support ORDER BY queries
    if ((table_share->primary_key == MAX_KEY) && 
        (get_ndb_partition_id(op)))
      ERR_RETURN(trans->getNdbError());
2452 2453
  }

2454 2455
  if (generate_scan_filter(m_cond_stack, op))
    DBUG_RETURN(ndb_err(trans));
2456
  if ((res= define_read_attrs(buf, op)))
2457 2458
    DBUG_RETURN(res);

2459
  if (execute_no_commit(this,trans,false) != 0)
2460 2461 2462
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
2463 2464
}

2465 2466 2467 2468 2469
/*
  Insert one record into NDB
*/
int ha_ndbcluster::write_row(byte *record)
{
mskold@mysql.com's avatar
mskold@mysql.com committed
2470
  bool has_auto_increment;
2471
  uint i;
2472
  NdbTransaction *trans= m_active_trans;
2473 2474
  NdbOperation *op;
  int res;
2475
  THD *thd= current_thd;
2476 2477
  longlong func_value= 0;
  DBUG_ENTER("ha_ndbcluster::write_row");
2478

2479
  m_write_op= TRUE;
2480 2481 2482 2483 2484 2485 2486 2487 2488
  has_auto_increment= (table->next_number_field && record == table->record[0]);
  if (table_share->primary_key != MAX_KEY)
  {
    /*
     * Increase any auto_incremented primary key
     */
    if (has_auto_increment) 
    {
      THD *thd= table->in_use;
2489
      int error;
2490 2491

      m_skip_auto_increment= FALSE;
2492 2493
      if ((error= update_auto_increment()))
        DBUG_RETURN(error);
2494
      m_skip_auto_increment= (insert_id_for_cur_row == 0);
2495 2496 2497 2498 2499 2500 2501
    }
  }

  /*
   * If IGNORE the ignore constraint violations on primary and unique keys
   */
  if (!m_use_write && m_ignore_dup_key)
2502
  {
2503 2504 2505 2506 2507
    /*
      compare if expression with that in start_bulk_insert()
      start_bulk_insert will set parameters to ensure that each
      write_row is committed individually
    */
2508
    int peek_res= peek_indexed_rows(record);
2509 2510 2511 2512 2513 2514 2515
    
    if (!peek_res) 
    {
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
2516
  }
2517

2518
  statistic_increment(thd->status_var.ha_write_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2519 2520
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();
2521

2522
  if (!(op= trans->getNdbOperation(m_table)))
2523 2524 2525 2526 2527 2528
    ERR_RETURN(trans->getNdbError());

  res= (m_use_write) ? op->writeTuple() :op->insertTuple(); 
  if (res != 0)
    ERR_RETURN(trans->getNdbError());  
 
2529 2530 2531 2532
  if (m_use_partition_function)
  {
    uint32 part_id;
    int error;
2533 2534 2535 2536
    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
    error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
    dbug_tmp_restore_column_map(table->read_set, old_map);
    if (error)
2537 2538 2539 2540
      DBUG_RETURN(error);
    op->setPartitionId(part_id);
  }

2541
  if (table_share->primary_key == MAX_KEY) 
2542 2543
  {
    // Table has hidden primary key
2544
    Ndb *ndb= get_ndb();
2545 2546
    int ret;
    Uint64 auto_value;
2547 2548
    uint retries= NDB_AUTO_INCREMENT_RETRIES;
    do {
2549 2550
      Ndb_tuple_id_range_guard g(m_share);
      ret= ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1);
2551
    } while (ret == -1 && 
2552 2553
             --retries &&
             ndb->getNdbError().status == NdbError::TemporaryError);
2554
    if (ret == -1)
2555
      ERR_RETURN(ndb->getNdbError());
2556
    if (set_hidden_key(op, table_share->fields, (const byte*)&auto_value))
2557 2558 2559 2560
      ERR_RETURN(op->getNdbError());
  } 
  else 
  {
2561 2562 2563
    int error;
    if ((error= set_primary_key_from_record(op, record)))
      DBUG_RETURN(error);
2564 2565 2566
  }

  // Set non-key attribute(s)
2567
  bool set_blob_value= FALSE;
2568
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
2569
  for (i= 0; i < table_share->fields; i++) 
2570 2571 2572
  {
    Field *field= table->field[i];
    if (!(field->flags & PRI_KEY_FLAG) &&
2573
	(bitmap_is_set(table->write_set, i) || !m_use_write) &&
2574
        set_ndb_value(op, field, i, record-table->record[0], &set_blob_value))
2575
    {
2576
      m_skip_auto_increment= TRUE;
2577
      dbug_tmp_restore_column_map(table->read_set, old_map);
2578
      ERR_RETURN(op->getNdbError());
2579
    }
2580
  }
2581
  dbug_tmp_restore_column_map(table->read_set, old_map);
2582

2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598
  if (m_use_partition_function)
  {
    /*
      We need to set the value of the partition function value in
      NDB since the NDB kernel doesn't have easy access to the function
      to calculate the value.
    */
    if (func_value >= INT_MAX32)
      func_value= INT_MAX32;
    uint32 part_func_value= (uint32)func_value;
    uint no_fields= table_share->fields;
    if (table_share->primary_key == MAX_KEY)
      no_fields++;
    op->setValue(no_fields, part_func_value);
  }

2599 2600
  m_rows_changed++;

2601 2602 2603 2604 2605 2606 2607
  /*
    Execute write operation
    NOTE When doing inserts with many values in 
    each INSERT statement it should not be necessary
    to NoCommit the transaction between each row.
    Find out how this is detected!
  */
2608
  m_rows_inserted++;
2609
  no_uncommitted_rows_update(1);
2610
  m_bulk_insert_not_flushed= TRUE;
2611
  if ((m_rows_to_insert == (ha_rows) 1) || 
2612
      ((m_rows_inserted % m_bulk_insert_rows) == 0) ||
2613
      m_primary_key_update ||
2614
      set_blob_value)
2615 2616 2617
  {
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
2618 2619
                        "rows_inserted:%d, bulk_insert_rows: %d", 
                        (int)m_rows_inserted, (int)m_bulk_insert_rows));
2620

2621
    m_bulk_insert_not_flushed= FALSE;
2622
    if (m_transaction_on)
2623
    {
2624
      if (execute_no_commit(this,trans,false) != 0)
2625
      {
2626 2627 2628
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2629
      }
2630 2631
    }
    else
2632
    {
2633
      if (execute_commit(this,trans) != 0)
2634
      {
2635 2636 2637
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2638
      }
2639
      if (trans->restart() != 0)
2640
      {
2641 2642
        DBUG_ASSERT(0);
        DBUG_RETURN(-1);
2643
      }
2644
    }
2645
  }
2646
  if ((has_auto_increment) && (m_skip_auto_increment))
mskold@mysql.com's avatar
mskold@mysql.com committed
2647
  {
2648
    Ndb *ndb= get_ndb();
2649
    Uint64 next_val= (Uint64) table->next_number_field->val_int() + 1;
2650
    char buff[22];
mskold@mysql.com's avatar
mskold@mysql.com committed
2651
    DBUG_PRINT("info", 
2652 2653
               ("Trying to set next auto increment value to %s",
                llstr(next_val, buff)));
2654 2655
    Ndb_tuple_id_range_guard g(m_share);
    if (ndb->setAutoIncrementValue(m_table, g.range, next_val, TRUE)
2656
        == -1)
2657
      ERR_RETURN(ndb->getNdbError());
2658
  }
2659
  m_skip_auto_increment= TRUE;
2660

2661
  DBUG_PRINT("exit",("ok"));
2662 2663 2664 2665 2666 2667 2668
  DBUG_RETURN(0);
}


/* Compare if a key in a row has changed */

int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row,
2669
                           const byte * new_row)
2670 2671 2672 2673 2674 2675 2676 2677 2678
{
  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
  KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts;

  for (; key_part != end ; key_part++)
  {
    if (key_part->null_bit)
    {
      if ((old_row[key_part->null_offset] & key_part->null_bit) !=
2679 2680
          (new_row[key_part->null_offset] & key_part->null_bit))
        return 1;
2681
    }
2682
    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
2683 2684 2685
    {

      if (key_part->field->cmp_binary((char*) (old_row + key_part->offset),
2686 2687 2688
                                      (char*) (new_row + key_part->offset),
                                      (ulong) key_part->length))
        return 1;
2689 2690 2691 2692
    }
    else
    {
      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
2693 2694
                 key_part->length))
        return 1;
2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
    }
  }
  return 0;
}

/*
  Update one record in NDB using primary key
*/

int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
{
  THD *thd= current_thd;
2707
  NdbTransaction *trans= m_active_trans;
2708
  NdbScanOperation* cursor= m_active_cursor;
2709 2710
  NdbOperation *op;
  uint i;
2711 2712
  uint32 old_part_id= 0, new_part_id= 0;
  int error;
2713
  longlong func_value;
2714
  DBUG_ENTER("update_row");
2715
  m_write_op= TRUE;
2716
  
2717
  statistic_increment(thd->status_var.ha_update_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2718
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2719
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2720
    table->timestamp_field->set_time();
2721
    bitmap_set_bit(table->write_set, table->timestamp_field->field_index);
2722
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2723

2724 2725
  if (m_use_partition_function &&
      (error= get_parts_for_update(old_data, new_data, table->record[0],
2726 2727
                                   m_part_info, &old_part_id, &new_part_id,
                                   &func_value)))
2728 2729 2730 2731
  {
    DBUG_RETURN(error);
  }

2732 2733 2734 2735 2736 2737
  /*
   * Check for update of primary key or partition change
   * for special handling
   */  
  if (((table_share->primary_key != MAX_KEY) &&
       key_cmp(table_share->primary_key, old_data, new_data)) ||
2738
      (old_part_id != new_part_id))
2739
  {
2740
    int read_res, insert_res, delete_res, undo_res;
2741

2742 2743
    DBUG_PRINT("info", ("primary key update or partition change, "
                        "doing read+delete+insert"));
2744
    // Get all old fields, since we optimize away fields not in query
2745
    read_res= complemented_read(old_data, new_data, old_part_id);
2746 2747
    if (read_res)
    {
2748
      DBUG_PRINT("info", ("read failed"));
2749 2750
      DBUG_RETURN(read_res);
    }
2751
    // Delete old row
2752
    m_primary_key_update= TRUE;
2753
    delete_res= delete_row(old_data);
2754
    m_primary_key_update= FALSE;
2755 2756 2757
    if (delete_res)
    {
      DBUG_PRINT("info", ("delete failed"));
2758
      DBUG_RETURN(delete_res);
2759
    }     
2760 2761
    // Insert new row
    DBUG_PRINT("info", ("delete succeded"));
2762
    m_primary_key_update= TRUE;
2763
    insert_res= write_row(new_data);
2764
    m_primary_key_update= FALSE;
2765 2766 2767 2768 2769
    if (insert_res)
    {
      DBUG_PRINT("info", ("insert failed"));
      if (trans->commitStatus() == NdbConnection::Started)
      {
2770
        // Undo delete_row(old_data)
2771
        m_primary_key_update= TRUE;
2772 2773 2774 2775 2776 2777
        undo_res= write_row((byte *)old_data);
        if (undo_res)
          push_warning(current_thd, 
                       MYSQL_ERROR::WARN_LEVEL_WARN, 
                       undo_res, 
                       "NDB failed undoing delete at primary key update");
2778 2779 2780 2781 2782
        m_primary_key_update= FALSE;
      }
      DBUG_RETURN(insert_res);
    }
    DBUG_PRINT("info", ("delete+insert succeeded"));
2783
    DBUG_RETURN(0);
2784
  }
2785

2786
  if (cursor)
2787
  {
2788 2789 2790 2791 2792 2793 2794 2795
    /*
      We are scanning records and want to update the record
      that was just found, call updateTuple on the cursor 
      to take over the lock to a new update operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling updateTuple on cursor"));
2796
    if (!(op= cursor->updateCurrentTuple()))
2797
      ERR_RETURN(trans->getNdbError());
mskold@mysql.com's avatar
mskold@mysql.com committed
2798
    m_lock_tuple= false;
2799
    m_ops_pending++;
2800
    if (uses_blob_value())
2801
      m_blobs_pending= TRUE;
2802 2803
    if (m_use_partition_function)
      cursor->setPartitionId(new_part_id);
2804 2805 2806
  }
  else
  {  
2807
    if (!(op= trans->getNdbOperation(m_table)) ||
2808
        op->updateTuple() != 0)
2809 2810
      ERR_RETURN(trans->getNdbError());  
    
2811 2812
    if (m_use_partition_function)
      op->setPartitionId(new_part_id);
2813
    if (table_share->primary_key == MAX_KEY) 
2814 2815 2816 2817 2818
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
      // Require that the PK for this record has previously been 
2819 2820
      // read into m_ref
      DBUG_DUMP("key", m_ref, NDB_HIDDEN_PRIMARY_KEY_LENGTH);
2821
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2822
      if (set_hidden_key(op, table->s->fields, m_ref))
2823
        ERR_RETURN(op->getNdbError());
2824 2825 2826 2827
    } 
    else 
    {
      int res;
2828
      if ((res= set_primary_key_from_record(op, old_data)))
2829
        DBUG_RETURN(res);
2830
    }
2831 2832
  }

2833 2834
  m_rows_changed++;

2835
  // Set non-key attribute(s)
2836
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
2837
  for (i= 0; i < table_share->fields; i++) 
2838 2839
  {
    Field *field= table->field[i];
2840
    if (bitmap_is_set(table->write_set, i) &&
2841
        (!(field->flags & PRI_KEY_FLAG)) &&
2842
        set_ndb_value(op, field, i, new_data - table->record[0]))
2843 2844
    {
      dbug_tmp_restore_column_map(table->read_set, old_map);
2845
      ERR_RETURN(op->getNdbError());
2846
    }
2847
  }
2848
  dbug_tmp_restore_column_map(table->read_set, old_map);
2849

2850 2851 2852 2853 2854 2855 2856 2857 2858 2859
  if (m_use_partition_function)
  {
    if (func_value >= INT_MAX32)
      func_value= INT_MAX32;
    uint32 part_func_value= (uint32)func_value;
    uint no_fields= table_share->fields;
    if (table_share->primary_key == MAX_KEY)
      no_fields++;
    op->setValue(no_fields, part_func_value);
  }
2860
  // Execute update operation
2861
  if (!cursor && execute_no_commit(this,trans,false) != 0) {
2862
    no_uncommitted_rows_execute_failure();
2863
    DBUG_RETURN(ndb_err(trans));
2864
  }
2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875
  
  DBUG_RETURN(0);
}


/*
  Delete one record from NDB, using primary key 
*/

int ha_ndbcluster::delete_row(const byte *record)
{
2876
  THD *thd= current_thd;
2877
  NdbTransaction *trans= m_active_trans;
2878
  NdbScanOperation* cursor= m_active_cursor;
2879
  NdbOperation *op;
2880 2881
  uint32 part_id;
  int error;
2882
  DBUG_ENTER("delete_row");
2883
  m_write_op= TRUE;
2884

2885
  statistic_increment(thd->status_var.ha_delete_count,&LOCK_status);
2886
  m_rows_changed++;
2887

2888 2889 2890 2891 2892 2893 2894
  if (m_use_partition_function &&
      (error= get_part_for_delete(record, table->record[0], m_part_info,
                                  &part_id)))
  {
    DBUG_RETURN(error);
  }

2895
  if (cursor)
2896
  {
2897
    /*
2898
      We are scanning records and want to delete the record
2899
      that was just found, call deleteTuple on the cursor 
2900
      to take over the lock to a new delete operation
2901 2902 2903 2904
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
2905
    if (cursor->deleteCurrentTuple() != 0)
2906
      ERR_RETURN(trans->getNdbError());     
mskold@mysql.com's avatar
mskold@mysql.com committed
2907
    m_lock_tuple= false;
2908
    m_ops_pending++;
2909

2910 2911 2912
    if (m_use_partition_function)
      cursor->setPartitionId(part_id);

2913 2914
    no_uncommitted_rows_update(-1);

2915 2916 2917
    if (!m_primary_key_update)
      // If deleting from cursor, NoCommit will be handled in next_result
      DBUG_RETURN(0);
2918 2919
  }
  else
2920
  {
2921
    
2922
    if (!(op=trans->getNdbOperation(m_table)) || 
2923
        op->deleteTuple() != 0)
2924 2925
      ERR_RETURN(trans->getNdbError());
    
2926 2927 2928
    if (m_use_partition_function)
      op->setPartitionId(part_id);

2929 2930
    no_uncommitted_rows_update(-1);
    
2931
    if (table_share->primary_key == MAX_KEY) 
2932 2933 2934 2935
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2936
      if (set_hidden_key(op, table->s->fields, m_ref))
2937
        ERR_RETURN(op->getNdbError());
2938 2939 2940
    } 
    else 
    {
2941 2942
      if ((error= set_primary_key_from_record(op, record)))
        DBUG_RETURN(error);
2943
    }
2944
  }
2945

2946
  // Execute delete operation
2947
  if (execute_no_commit(this,trans,false) != 0) {
2948
    no_uncommitted_rows_execute_failure();
2949
    DBUG_RETURN(ndb_err(trans));
2950
  }
2951 2952
  DBUG_RETURN(0);
}
2953
  
2954 2955 2956 2957 2958
/*
  Unpack a record read from NDB 

  SYNOPSIS
    unpack_record()
2959
    buf                 Buffer to store read row
2960 2961 2962 2963 2964 2965 2966 2967

  NOTE
    The data for each row is read directly into the
    destination buffer. This function is primarily 
    called in order to check if any fields should be 
    set to null.
*/

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2968 2969
void ndb_unpack_record(TABLE *table, NdbValue *value,
                       MY_BITMAP *defined, byte *buf)
2970
{
2971
  Field **p_field= table->field, *field= *p_field;
2972
  my_ptrdiff_t row_offset= (my_ptrdiff_t) (buf - table->record[0]);
2973
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set);
2974
  DBUG_ENTER("ndb_unpack_record");
2975

2976
  // Set null flag(s)
2977
  bzero(buf, table->s->null_bytes);
2978 2979
  for ( ; field;
       p_field++, value++, field= *p_field)
2980
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
2981 2982
    if ((*value).ptr)
    {
2983
      if (!(field->flags & BLOB_FLAG))
pekka@mysql.com's avatar
pekka@mysql.com committed
2984
      {
2985 2986
        int is_null= (*value).rec->isNULL();
        if (is_null)
2987
        {
2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003
          if (is_null > 0)
          {
            DBUG_PRINT("info",("[%u] NULL",
                               (*value).rec->getColumn()->getColumnNo()));
            field->set_null(row_offset);
          }
          else
          {
            DBUG_PRINT("info",("[%u] UNDEFINED",
                               (*value).rec->getColumn()->getColumnNo()));
            bitmap_clear_bit(defined,
                             (*value).rec->getColumn()->getColumnNo());
          }
        }
        else if (field->type() == MYSQL_TYPE_BIT)
        {
3004 3005 3006 3007 3008 3009 3010 3011
          Field_bit *field_bit= static_cast<Field_bit*>(field);

          /*
            Move internal field pointer to point to 'buf'.  Calling
            the correct member function directly since we know the
            type of the object.
           */
          field_bit->Field_bit::move_field_offset(row_offset);
3012
          if (field->pack_length() < 5)
3013 3014
          {
            DBUG_PRINT("info", ("bit field H'%.8X", 
3015
                                (*value).rec->u_32_value()));
3016 3017
            field_bit->Field_bit::store((longlong) (*value).rec->u_32_value(),
                                        FALSE);
3018 3019 3020 3021
          }
          else
          {
            DBUG_PRINT("info", ("bit field H'%.8X%.8X",
3022 3023
                                *(Uint32*) (*value).rec->aRef(),
                                *((Uint32*) (*value).rec->aRef()+1)));
3024 3025
            field_bit->Field_bit::store((longlong) (*value).rec->u_64_value(), 
                                        TRUE);
3026
          }
3027 3028 3029 3030 3031
          /*
            Move back internal field pointer to point to original
            value (usually record[0]).
           */
          field_bit->Field_bit::move_field_offset(-row_offset);
3032 3033
          DBUG_PRINT("info",("[%u] SET",
                             (*value).rec->getColumn()->getColumnNo()));
3034
          DBUG_DUMP("info", (const char*) field->ptr, field->pack_length());
3035 3036 3037 3038 3039
        }
        else
        {
          DBUG_PRINT("info",("[%u] SET",
                             (*value).rec->getColumn()->getColumnNo()));
3040
          DBUG_DUMP("info", (const char*) field->ptr, field->pack_length());
3041
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
3042 3043 3044
      }
      else
      {
3045
        NdbBlob *ndb_blob= (*value).blob;
3046
        uint col_no = ndb_blob->getColumn()->getColumnNo();
3047 3048
        int isNull;
        ndb_blob->getDefined(isNull);
3049
        if (isNull == 1)
3050
        {
serg@serg.mylan's avatar
serg@serg.mylan committed
3051
          DBUG_PRINT("info",("[%u] NULL", col_no));
3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068
          field->set_null(row_offset);
        }
        else if (isNull == -1)
        {
          DBUG_PRINT("info",("[%u] UNDEFINED", col_no));
          bitmap_clear_bit(defined, col_no);
        }
        else
        {
#ifndef DBUG_OFF
          // pointer vas set in get_ndb_blobs_value
          Field_blob *field_blob= (Field_blob*)field;
          char* ptr;
          field_blob->get_ptr(&ptr, row_offset);
          uint32 len= field_blob->get_length(row_offset);
          DBUG_PRINT("info",("[%u] SET ptr=%p len=%u", col_no, ptr, len));
#endif
3069
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
3070 3071
      }
    }
3072
  }
3073
  dbug_tmp_restore_column_map(table->write_set, old_map);
3074 3075 3076 3077 3078 3079
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::unpack_record(byte *buf)
{
  ndb_unpack_record(table, m_value, 0, buf);
3080 3081
#ifndef DBUG_OFF
  // Read and print all values that was fetched
3082
  if (table_share->primary_key == MAX_KEY)
3083 3084
  {
    // Table with hidden primary key
3085
    int hidden_no= table_share->fields;
3086
    const NDBTAB *tab= m_table;
3087
    char buff[22];
3088
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
3089
    const NdbRecAttr* rec= m_value[hidden_no].rec;
3090
    DBUG_ASSERT(rec);
3091
    DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no,
3092
			  hidden_col->getName(),
3093
                          llstr(rec->u_64_value(), buff)));
serg@serg.mylan's avatar
serg@serg.mylan committed
3094 3095
  }
  //DBUG_EXECUTE("value", print_results(););
3096 3097 3098 3099 3100
#endif
}

/*
  Utility function to print/dump the fetched field
serg@serg.mylan's avatar
serg@serg.mylan committed
3101 3102 3103
  to avoid unnecessary work, wrap in DBUG_EXECUTE as in:

    DBUG_EXECUTE("value", print_results(););
3104 3105 3106 3107 3108 3109 3110
 */

void ha_ndbcluster::print_results()
{
  DBUG_ENTER("print_results");

#ifndef DBUG_OFF
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3111

3112
  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3113
  String type(buf_type, sizeof(buf_type), &my_charset_bin);
3114
  String val(buf_val, sizeof(buf_val), &my_charset_bin);
3115
  for (uint f= 0; f < table_share->fields; f++)
3116
  {
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3117
    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
3118
    char buf[2000];
3119
    Field *field;
3120
    void* ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
3121
    NdbValue value;
3122

3123
    buf[0]= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3124
    field= table->field[f];
pekka@mysql.com's avatar
pekka@mysql.com committed
3125
    if (!(value= m_value[f]).ptr)
3126
    {
3127
      strmov(buf, "not read");
3128
      goto print_value;
3129
    }
3130

3131
    ptr= field->ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
3132 3133

    if (! (field->flags & BLOB_FLAG))
3134
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
3135 3136
      if (value.rec->isNULL())
      {
3137
        strmov(buf, "NULL");
3138
        goto print_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
3139
      }
3140 3141 3142 3143 3144
      type.length(0);
      val.length(0);
      field->sql_type(type);
      field->val_str(&val);
      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
pekka@mysql.com's avatar
pekka@mysql.com committed
3145 3146 3147
    }
    else
    {
3148
      NdbBlob *ndb_blob= value.blob;
3149
      bool isNull= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
3150
      ndb_blob->getNull(isNull);
3151 3152
      if (isNull)
        strmov(buf, "NULL");
3153
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3154

3155
print_value:
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3156
    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
3157 3158 3159 3160 3161 3162
  }
#endif
  DBUG_VOID_RETURN;
}


3163
int ha_ndbcluster::index_init(uint index, bool sorted)
3164
{
3165
  DBUG_ENTER("ha_ndbcluster::index_init");
3166 3167 3168
  DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));
  active_index= index;
  m_sorted= sorted;
mskold@mysql.com's avatar
mskold@mysql.com committed
3169 3170 3171 3172 3173 3174 3175
  /*
    Locks are are explicitly released in scan
    unless m_lock.type == TL_READ_HIGH_PRIORITY
    and no sub-sequent call to unlock_row()
  */
  m_lock_tuple= false;
    m_lock_tuple= false;
3176
  DBUG_RETURN(0);
3177 3178 3179 3180 3181
}


int ha_ndbcluster::index_end()
{
3182
  DBUG_ENTER("ha_ndbcluster::index_end");
3183
  DBUG_RETURN(close_scan());
3184 3185
}

3186 3187 3188 3189 3190 3191 3192 3193
/**
 * Check if key contains null
 */
static
int
check_null_in_key(const KEY* key_info, const byte *key, uint key_len)
{
  KEY_PART_INFO *curr_part, *end_part;
3194
  const byte* end_ptr= key + key_len;
3195 3196 3197 3198 3199 3200
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->key_parts;
  

  for (; curr_part != end_part && key < end_ptr; curr_part++)
  {
3201
    if (curr_part->null_bit && *key)
3202 3203 3204 3205 3206 3207
      return 1;

    key += curr_part->store_length;
  }
  return 0;
}
3208 3209

int ha_ndbcluster::index_read(byte *buf,
3210 3211
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
3212
{
3213 3214
  key_range start_key;
  bool descending= FALSE;
3215
  DBUG_ENTER("ha_ndbcluster::index_read");
3216 3217 3218
  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                       active_index, key_len, find_flag));

3219 3220 3221
  start_key.key= key;
  start_key.length= key_len;
  start_key.flag= find_flag;
3222
  descending= FALSE;
3223 3224 3225 3226 3227 3228 3229 3230 3231 3232
  switch (find_flag) {
  case HA_READ_KEY_OR_PREV:
  case HA_READ_BEFORE_KEY:
  case HA_READ_PREFIX_LAST:
  case HA_READ_PREFIX_LAST_OR_PREV:
    descending= TRUE;
    break;
  default:
    break;
  }
3233 3234
  DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending,
                                      m_sorted, buf));
3235 3236 3237 3238
}


int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, 
3239 3240
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
3241
{
3242
  statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status);
3243
  DBUG_ENTER("ha_ndbcluster::index_read_idx");
3244
  DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len));  
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3245
  close_scan();
3246
  index_init(index_no, 0);  
3247 3248 3249 3250 3251 3252
  DBUG_RETURN(index_read(buf, key, key_len, find_flag));
}


int ha_ndbcluster::index_next(byte *buf)
{
3253
  DBUG_ENTER("ha_ndbcluster::index_next");
3254
  statistic_increment(current_thd->status_var.ha_read_next_count,
3255
                      &LOCK_status);
3256
  DBUG_RETURN(next_result(buf));
3257 3258 3259 3260 3261
}


int ha_ndbcluster::index_prev(byte *buf)
{
3262
  DBUG_ENTER("ha_ndbcluster::index_prev");
3263
  statistic_increment(current_thd->status_var.ha_read_prev_count,
3264
                      &LOCK_status);
3265
  DBUG_RETURN(next_result(buf));
3266 3267 3268 3269 3270
}


int ha_ndbcluster::index_first(byte *buf)
{
3271
  DBUG_ENTER("ha_ndbcluster::index_first");
3272
  statistic_increment(current_thd->status_var.ha_read_first_count,
3273
                      &LOCK_status);
3274 3275 3276
  // Start the ordered index scan and fetch the first row

  // Only HA_READ_ORDER indexes get called by index_first
3277
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL));
3278 3279 3280 3281 3282
}


int ha_ndbcluster::index_last(byte *buf)
{
3283
  DBUG_ENTER("ha_ndbcluster::index_last");
3284
  statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status);
3285
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL));
3286 3287
}

3288 3289 3290 3291 3292
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
{
  DBUG_ENTER("ha_ndbcluster::index_read_last");
  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
3293

3294
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
3295
                                           const key_range *end_key,
3296
                                           bool desc, bool sorted,
3297
                                           byte* buf)
3298
{
3299 3300 3301 3302
  part_id_range part_spec;
  ndb_index_type type= get_index_type(active_index);
  const KEY* key_info= table->key_info+active_index;
  int error; 
3303
  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
3304
  DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
3305

3306 3307 3308
  if (m_use_partition_function)
  {
    get_partition_set(table, buf, active_index, start_key, &part_spec);
3309 3310 3311 3312 3313 3314 3315 3316
    DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      sorted= FALSE;
    }
  }
3331

3332 3333
  m_write_op= FALSE;
  switch (type){
3334
  case PRIMARY_KEY_ORDERED_INDEX:
3335
  case PRIMARY_KEY_INDEX:
3336
    if (start_key && 
3337 3338
        start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT)
3339
    {
3340
      if (m_active_cursor && (error= close_scan()))
3341
        DBUG_RETURN(error);
3342 3343
      DBUG_RETURN(pk_read(start_key->key, start_key->length, buf,
                          part_spec.start_part));
3344
    }
3345
    break;
3346
  case UNIQUE_ORDERED_INDEX:
3347
  case UNIQUE_INDEX:
3348
    if (start_key && start_key->length == key_info->key_length &&
3349 3350
        start_key->flag == HA_READ_KEY_EXACT && 
        !check_null_in_key(key_info, start_key->key, start_key->length))
3351
    {
3352
      if (m_active_cursor && (error= close_scan()))
3353
        DBUG_RETURN(error);
3354
      DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf));
3355
    }
3356 3357 3358 3359
    break;
  default:
    break;
  }
3360
  // Start the ordered index scan and fetch the first row
3361 3362
  DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
                                 &part_spec));
3363 3364
}

joreland@mysql.com's avatar
joreland@mysql.com committed
3365
int ha_ndbcluster::read_range_first(const key_range *start_key,
3366 3367
                                    const key_range *end_key,
                                    bool eq_r, bool sorted)
joreland@mysql.com's avatar
joreland@mysql.com committed
3368 3369 3370
{
  byte* buf= table->record[0];
  DBUG_ENTER("ha_ndbcluster::read_range_first");
3371 3372
  DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
                                      sorted, buf));
joreland@mysql.com's avatar
joreland@mysql.com committed
3373 3374
}

3375
int ha_ndbcluster::read_range_next()
3376 3377 3378 3379 3380 3381
{
  DBUG_ENTER("ha_ndbcluster::read_range_next");
  DBUG_RETURN(next_result(table->record[0]));
}


3382 3383
int ha_ndbcluster::rnd_init(bool scan)
{
3384
  NdbScanOperation *cursor= m_active_cursor;
3385 3386
  DBUG_ENTER("rnd_init");
  DBUG_PRINT("enter", ("scan: %d", scan));
3387
  // Check if scan is to be restarted
mskold@mysql.com's avatar
mskold@mysql.com committed
3388 3389 3390 3391
  if (cursor)
  {
    if (!scan)
      DBUG_RETURN(1);
3392
    if (cursor->restart(m_force_send) != 0)
3393 3394 3395 3396
    {
      DBUG_ASSERT(0);
      DBUG_RETURN(-1);
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
3397
  }
3398
  index_init(table_share->primary_key, 0);
3399 3400 3401
  DBUG_RETURN(0);
}

3402 3403
int ha_ndbcluster::close_scan()
{
3404
  NdbTransaction *trans= m_active_trans;
3405 3406
  DBUG_ENTER("close_scan");

3407 3408
  m_multi_cursor= 0;
  if (!m_active_cursor && !m_multi_cursor)
3409 3410
    DBUG_RETURN(1);

3411
  NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor;
3412
  
3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
3425
      if (!(op= cursor->lockCurrentTuple()))
3426 3427 3428 3429 3430 3431
      {
	m_lock_tuple= false;
	ERR_RETURN(trans->getNdbError());
      }
      m_ops_pending++;      
  }
3432
  m_lock_tuple= false;
3433
  if (m_ops_pending)
3434 3435 3436 3437 3438
  {
    /*
      Take over any pending transactions to the 
      deleteing/updating transaction before closing the scan    
    */
3439
    DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
3440
    if (execute_no_commit(this,trans,false) != 0) {
3441
      no_uncommitted_rows_execute_failure();
3442
      DBUG_RETURN(ndb_err(trans));
3443
    }
3444
    m_ops_pending= 0;
3445 3446
  }
  
3447
  cursor->close(m_force_send, TRUE);
3448
  m_active_cursor= m_multi_cursor= NULL;
mskold@mysql.com's avatar
mskold@mysql.com committed
3449
  DBUG_RETURN(0);
3450
}
3451 3452 3453 3454

int ha_ndbcluster::rnd_end()
{
  DBUG_ENTER("rnd_end");
3455
  DBUG_RETURN(close_scan());
3456 3457 3458 3459 3460 3461
}


int ha_ndbcluster::rnd_next(byte *buf)
{
  DBUG_ENTER("rnd_next");
3462
  statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
3463
                      &LOCK_status);
3464

3465
  if (!m_active_cursor)
3466 3467
    DBUG_RETURN(full_table_scan(buf));
  DBUG_RETURN(next_result(buf));
3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480
}


/*
  An "interesting" record has been found and it's pk 
  retrieved by calling position
  Now it's time to read the record from db once 
  again
*/

int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
{
  DBUG_ENTER("rnd_pos");
3481
  statistic_increment(current_thd->status_var.ha_read_rnd_count,
3482
                      &LOCK_status);
3483 3484
  // The primary key for the record is stored in pos
  // Perform a pk_read using primary key "index"
3485 3486
  {
    part_id_range part_spec;
3487
    uint key_length= ref_length;
3488 3489
    if (m_use_partition_function)
    {
3490 3491 3492 3493 3494 3495
      if (table_share->primary_key == MAX_KEY)
      {
        /*
          The partition id has been fetched from ndb
          and has been stored directly after the hidden key
        */
3496
        DBUG_DUMP("key+part", (char *)pos, key_length);
3497
        key_length= ref_length - sizeof(m_part_id);
3498
        part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
3499 3500 3501 3502
      }
      else
      {
        key_range key_spec;
3503
        KEY *key_info= table->key_info + table_share->primary_key;
3504 3505 3506 3507 3508 3509 3510 3511
        key_spec.key= pos;
        key_spec.length= key_length;
        key_spec.flag= HA_READ_KEY_EXACT;
        get_full_part_id_from_key(table, buf, key_info, 
                                  &key_spec, &part_spec);
        DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
      }
      DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
3512
    }
3513
    DBUG_DUMP("key", (char *)pos, key_length);
3514
    DBUG_RETURN(pk_read(pos, key_length, buf, part_spec.start_part));
3515
  }
3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530
}


/*
  Store the primary key of this record in ref 
  variable, so that the row can be retrieved again later
  using "reference" in rnd_pos
*/

void ha_ndbcluster::position(const byte *record)
{
  KEY *key_info;
  KEY_PART_INFO *key_part;
  KEY_PART_INFO *end;
  byte *buff;
3531 3532
  uint key_length;

3533 3534
  DBUG_ENTER("position");

3535
  if (table_share->primary_key != MAX_KEY) 
3536
  {
3537
    key_length= ref_length;
3538
    key_info= table->key_info + table_share->primary_key;
3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553
    key_part= key_info->key_part;
    end= key_part + key_info->key_parts;
    buff= ref;
    
    for (; key_part != end; key_part++) 
    {
      if (key_part->null_bit) {
        /* Store 0 if the key part is a NULL part */      
        if (record[key_part->null_offset]
            & key_part->null_bit) {
          *buff++= 1;
          continue;
        }      
        *buff++= 0;
      }
3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573

      size_t len = key_part->length;
      const byte * ptr = record + key_part->offset;
      Field *field = key_part->field;
      if ((field->type() ==  MYSQL_TYPE_VARCHAR) &&
	  ((Field_varstring*)field)->length_bytes == 1)
      {
	/** 
	 * Keys always use 2 bytes length
	 */
	buff[0] = ptr[0];
	buff[1] = 0;
	memcpy(buff+2, ptr + 1, len);	
	len += 2;
      }
      else
      {
	memcpy(buff, ptr, len);
      }
      buff += len;
3574 3575 3576 3577 3578 3579
    }
  } 
  else 
  {
    // No primary key, get hidden key
    DBUG_PRINT("info", ("Getting hidden key"));
3580 3581 3582
    // If table has user defined partition save the partition id as well
    if(m_use_partition_function)
    {
3583
      DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
3584 3585 3586
      key_length= ref_length - sizeof(m_part_id);
      memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
    }
3587 3588
    else
      key_length= ref_length;
3589
#ifndef DBUG_OFF
3590
    int hidden_no= table->s->fields;
3591
    const NDBTAB *tab= m_table;  
3592 3593 3594
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
    DBUG_ASSERT(hidden_col->getPrimaryKey() && 
                hidden_col->getAutoIncrement() &&
3595
                key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
3596
#endif
3597
    memcpy(ref, m_ref, key_length);
3598
  }
3599 3600 3601 3602
#ifndef DBUG_OFF
  if (table_share->primary_key == MAX_KEY && m_use_partition_function) 
    DBUG_DUMP("key+part", (char*)ref, key_length+sizeof(m_part_id));
#endif
3603
  DBUG_DUMP("ref", (char*)ref, key_length);
3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619
  DBUG_VOID_RETURN;
}


void ha_ndbcluster::info(uint flag)
{
  DBUG_ENTER("info");
  DBUG_PRINT("enter", ("flag: %d", flag));
  
  if (flag & HA_STATUS_POS)
    DBUG_PRINT("info", ("HA_STATUS_POS"));
  if (flag & HA_STATUS_NO_LOCK)
    DBUG_PRINT("info", ("HA_STATUS_NO_LOCK"));
  if (flag & HA_STATUS_TIME)
    DBUG_PRINT("info", ("HA_STATUS_TIME"));
  if (flag & HA_STATUS_VARIABLE)
3620
  {
3621
    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
3622 3623
    if (m_table_info)
    {
3624
      if (m_ha_not_exact_count)
3625
        stats.records= 100;
3626
      else
3627
        records_update();
3628 3629 3630
    }
    else
    {
3631 3632 3633
      if ((my_errno= check_ndb_connection()))
        DBUG_VOID_RETURN;
      Ndb *ndb= get_ndb();
3634
      ndb->setDatabaseName(m_dbname);
3635
      struct Ndb_statistics stat;
3636
      ndb->setDatabaseName(m_dbname);
3637
      if (current_thd->variables.ndb_use_exact_count &&
3638
          ndb_get_table_statistics(ndb, m_table, &stat) == 0)
3639
      {
3640 3641 3642
        stats.mean_rec_length= stat.row_size;
        stats.data_file_length= stat.fragment_memory;
        stats.records= stat.row_count;
3643 3644 3645
      }
      else
      {
3646 3647
        stats.mean_rec_length= 0;
        stats.records= 100;
3648
      }
3649
    }
3650
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3651 3652 3653 3654 3655
  if (flag & HA_STATUS_CONST)
  {
    DBUG_PRINT("info", ("HA_STATUS_CONST"));
    set_rec_per_key();
  }
3656
  if (flag & HA_STATUS_ERRKEY)
3657
  {
3658
    DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
3659
    errkey= m_dupkey;
3660
  }
3661
  if (flag & HA_STATUS_AUTO)
3662
  {
3663
    DBUG_PRINT("info", ("HA_STATUS_AUTO"));
3664 3665 3666
    if (m_table)
    {
      Ndb *ndb= get_ndb();
3667
      Ndb_tuple_id_range_guard g(m_share);
3668
      
3669
      Uint64 auto_increment_value64;
3670
      if (ndb->readAutoIncrementValue(m_table, g.range,
3671
                                      auto_increment_value64) == -1)
3672 3673 3674 3675
      {
        const NdbError err= ndb->getNdbError();
        sql_print_error("Error %lu in readAutoIncrementValue(): %s",
                        (ulong) err.code, err.message);
3676
        stats.auto_increment_value= ~(ulonglong)0;
3677
      }
3678
      else
3679
        stats.auto_increment_value= (ulonglong)auto_increment_value64;
3680 3681
    }
  }
3682 3683 3684
  DBUG_VOID_RETURN;
}

3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698

void ha_ndbcluster::get_dynamic_partition_info(PARTITION_INFO *stat_info,
                                               uint part_id)
{
  /* 
     This functions should be fixed. Suggested fix: to
     implement ndb function which retrives the statistics
     about ndb partitions.
  */
  bzero((char*) stat_info, sizeof(PARTITION_INFO));
  return;
}


3699 3700 3701 3702 3703 3704
int ha_ndbcluster::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("extra");
  switch (operation) {
  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
3705 3706
    DBUG_PRINT("info", ("Ignoring duplicate key"));
    m_ignore_dup_key= TRUE;
3707 3708 3709
    break;
  case HA_EXTRA_NO_IGNORE_DUP_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
3710
    m_ignore_dup_key= FALSE;
3711
    break;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3712 3713 3714 3715 3716 3717 3718 3719 3720 3721
  case HA_EXTRA_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= TRUE;
    break;
  case HA_EXTRA_NO_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= FALSE;
    break;
3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734
  case HA_EXTRA_WRITE_CAN_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
    if (!m_has_unique_index)
    {
      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
      m_use_write= TRUE;
    }
    break;
  case HA_EXTRA_WRITE_CANNOT_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
    m_use_write= FALSE;
    break;
3735 3736
  default:
    break;
3737 3738 3739 3740 3741
  }
  
  DBUG_RETURN(0);
}

3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757

int ha_ndbcluster::reset()
{
  DBUG_ENTER("ha_ndbcluster::reset");
  cond_clear();
  /*
    Regular partition pruning will set the bitmap appropriately.
    Some queries like ALTER TABLE doesn't use partition pruning and
    thus the 'used_partitions' bitmap needs to be initialized
  */
  if (m_part_info)
    bitmap_set_all(&m_part_info->used_partitions);
  DBUG_RETURN(0);
}


3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770
/* 
   Start of an insert, remember number of rows to be inserted, it will
   be used in write_row and get_autoincrement to send an optimal number
   of rows in each roundtrip to the server

   SYNOPSIS
   rows     number of rows to insert, 0 if unknown

*/

void ha_ndbcluster::start_bulk_insert(ha_rows rows)
{
  int bytes, batch;
3771
  const NDBTAB *tab= m_table;    
3772 3773

  DBUG_ENTER("start_bulk_insert");
pekka@mysql.com's avatar
pekka@mysql.com committed
3774
  DBUG_PRINT("enter", ("rows: %d", (int)rows));
3775
  
3776
  m_rows_inserted= (ha_rows) 0;
3777
  if (!m_use_write && m_ignore_dup_key)
3778 3779 3780
  {
    /*
      compare if expression with that in write_row
3781
      we have a situation where peek_indexed_rows() will be called
3782 3783 3784 3785 3786 3787 3788 3789
      so we cannot batch
    */
    DBUG_PRINT("info", ("Batching turned off as duplicate key is "
                        "ignored by using peek_row"));
    m_rows_to_insert= 1;
    m_bulk_insert_rows= 1;
    DBUG_VOID_RETURN;
  }
3790
  if (rows == (ha_rows) 0)
3791
  {
3792 3793
    /* We don't know how many will be inserted, guess */
    m_rows_to_insert= m_autoincrement_prefetch;
3794
  }
3795 3796
  else
    m_rows_to_insert= rows; 
3797 3798 3799 3800 3801 3802 3803 3804

  /* 
    Calculate how many rows that should be inserted
    per roundtrip to NDB. This is done in order to minimize the 
    number of roundtrips as much as possible. However performance will 
    degrade if too many bytes are inserted, thus it's limited by this 
    calculation.   
  */
3805
  const int bytesperbatch= 8192;
3806
  bytes= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
3807
  batch= bytesperbatch/bytes;
3808 3809
  batch= batch == 0 ? 1 : batch;
  DBUG_PRINT("info", ("batch: %d, bytes: %d", batch, bytes));
3810
  m_bulk_insert_rows= batch;
3811 3812 3813 3814 3815 3816 3817 3818 3819

  DBUG_VOID_RETURN;
}

/*
  End of an insert
 */
int ha_ndbcluster::end_bulk_insert()
{
3820 3821
  int error= 0;

3822
  DBUG_ENTER("end_bulk_insert");
3823
  // Check if last inserts need to be flushed
3824
  if (m_bulk_insert_not_flushed)
3825
  {
3826
    NdbTransaction *trans= m_active_trans;
3827 3828 3829
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
                        "rows_inserted:%d, bulk_insert_rows: %d", 
3830
                        (int) m_rows_inserted, (int) m_bulk_insert_rows)); 
3831
    m_bulk_insert_not_flushed= FALSE;
3832 3833
    if (m_transaction_on)
    {
3834
      if (execute_no_commit(this, trans,false) != 0)
3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
    }
    else
    {
      if (execute_commit(this, trans) != 0)
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
      else
      {
        int res= trans->restart();
        DBUG_ASSERT(res == 0);
      }
3852
    }
3853 3854
  }

3855 3856
  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
3857
  DBUG_RETURN(error);
3858 3859
}

3860 3861 3862 3863

int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  DBUG_ENTER("extra_opt");
pekka@mysql.com's avatar
pekka@mysql.com committed
3864
  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
3865 3866 3867
  DBUG_RETURN(extra(operation));
}

3868 3869 3870 3871
static const char *ha_ndbcluster_exts[] = {
 ha_ndb_ext,
 NullS
};
3872

3873
const char** ha_ndbcluster::bas_ext() const
3874 3875 3876
{
  return ha_ndbcluster_exts;
}
3877 3878 3879 3880 3881 3882 3883 3884 3885

/*
  How many seeks it will take to read through the table
  This is to be comparable to the number returned by records_in_range so
  that we can decide if we should scan the table or use keys.
*/

double ha_ndbcluster::scan_time()
{
3886
  DBUG_ENTER("ha_ndbcluster::scan_time()");
3887
  double res= rows2double(stats.records*1000);
3888
  DBUG_PRINT("exit", ("table: %s value: %f", 
3889
                      m_tabname, res));
3890
  DBUG_RETURN(res);
3891 3892
}

3893 3894 3895 3896 3897 3898 3899
/*
  Convert MySQL table locks into locks supported by Ndb Cluster.
  Note that MySQL Cluster does currently not support distributed
  table locks, so to be safe one should set cluster in Single
  User Mode, before relying on table locks when updating tables
  from several MySQL servers
*/
3900 3901 3902 3903 3904 3905 3906 3907

THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
                                          THR_LOCK_DATA **to,
                                          enum thr_lock_type lock_type)
{
  DBUG_ENTER("store_lock");
  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) 
  {
3908

3909 3910 3911
    /* If we are not doing a LOCK TABLE, then allow multiple
       writers */
    
3912 3913 3914
    /* Since NDB does not currently have table locks
       this is treated as a ordinary lock */

3915
    if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930
         lock_type <= TL_WRITE) && !thd->in_lock_tables)      
      lock_type= TL_WRITE_ALLOW_WRITE;
    
    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
       MySQL would use the lock TL_READ_NO_INSERT on t2, and that
       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
       to t2. Convert the lock to a normal read lock to allow
       concurrent inserts to t2. */
    
    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
      lock_type= TL_READ;
    
    m_lock.type=lock_type;
  }
  *to++= &m_lock;
3931 3932

  DBUG_PRINT("exit", ("lock_type: %d", lock_type));
3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954
  
  DBUG_RETURN(to);
}

#ifndef DBUG_OFF
#define PRINT_OPTION_FLAGS(t) { \
      if (t->options & OPTION_NOT_AUTOCOMMIT) \
        DBUG_PRINT("thd->options", ("OPTION_NOT_AUTOCOMMIT")); \
      if (t->options & OPTION_BEGIN) \
        DBUG_PRINT("thd->options", ("OPTION_BEGIN")); \
      if (t->options & OPTION_TABLE_LOCK) \
        DBUG_PRINT("thd->options", ("OPTION_TABLE_LOCK")); \
}
#else
#define PRINT_OPTION_FLAGS(t)
#endif


/*
  As MySQL will execute an external lock for every new table it uses
  we can use this to start the transactions.
  If we are in auto_commit mode we just need to start a transaction
3955
  for the statement, this will be stored in thd_ndb.stmt.
3956
  If not, we have to start a master transaction if there doesn't exist
3957
  one from before, this will be stored in thd_ndb.all
3958 3959 3960
 
  When a table lock is held one transaction will be started which holds
  the table lock and for each statement a hupp transaction will be started  
3961
  If we are locking the table then:
3962
  - save the NdbDictionary::Table for easy access
3963 3964
  - save reference to table statistics
  - refresh list of the indexes for the table if needed (if altered)
3965 3966 3967 3968 3969
 */

int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
  int error=0;
3970
  NdbTransaction* trans= NULL;
3971
  DBUG_ENTER("external_lock");
3972

3973 3974 3975 3976
  /*
    Check that this handler instance has a connection
    set up to the Ndb object of thd
   */
3977
  if (check_ndb_connection(thd))
3978
    DBUG_RETURN(1);
3979

3980
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
3981
  Ndb *ndb= thd_ndb->ndb;
3982

3983
  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
3984
                       "thd_ndb->lock_count: %d",
3985 3986
                       (long) this, (long) thd, (long) thd_ndb,
                       thd_ndb->lock_count));
3987

3988 3989
  if (lock_type != F_UNLCK)
  {
3990
    DBUG_PRINT("info", ("lock_type != F_UNLCK"));
3991 3992 3993 3994 3995 3996 3997 3998
    if (thd->lex->sql_command == SQLCOM_LOAD)
    {
      m_transaction_on= FALSE;
      /* Would be simpler if has_transactions() didn't always say "yes" */
      thd->options|= OPTION_STATUS_NO_TRANS_UPDATE;
      thd->no_trans_update= TRUE;
    }
    else if (!thd->transaction.on)
3999 4000 4001
      m_transaction_on= FALSE;
    else
      m_transaction_on= thd->variables.ndb_use_transactions;
4002
    if (!thd_ndb->lock_count++)
4003 4004
    {
      PRINT_OPTION_FLAGS(thd);
4005
      if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) 
4006 4007
      {
        // Autocommit transaction
4008
        DBUG_ASSERT(!thd_ndb->stmt);
4009 4010
        DBUG_PRINT("trans",("Starting transaction stmt"));      

4011
        trans= ndb->startTransaction();
4012
        if (trans == NULL)
4013
          ERR_RETURN(ndb->getNdbError());
4014
        thd_ndb->init_open_tables();
4015
        thd_ndb->stmt= trans;
4016
	thd_ndb->query_state&= NDB_QUERY_NORMAL;
4017
        trans_register_ha(thd, FALSE, ndbcluster_hton);
4018 4019 4020
      } 
      else 
      { 
4021
        if (!thd_ndb->all)
4022
        {
4023 4024 4025 4026
          // Not autocommit transaction
          // A "master" transaction ha not been started yet
          DBUG_PRINT("trans",("starting transaction, all"));
          
4027
          trans= ndb->startTransaction();
4028
          if (trans == NULL)
4029
            ERR_RETURN(ndb->getNdbError());
4030
          thd_ndb->init_open_tables();
4031
          thd_ndb->all= trans; 
4032
	  thd_ndb->query_state&= NDB_QUERY_NORMAL;
4033
          trans_register_ha(thd, TRUE, ndbcluster_hton);
4034 4035 4036 4037 4038 4039 4040 4041

          /*
            If this is the start of a LOCK TABLE, a table look 
            should be taken on the table in NDB
           
            Check if it should be read or write lock
           */
          if (thd->options & (OPTION_TABLE_LOCK))
4042
          {
4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
            //lockThisTable();
            DBUG_PRINT("info", ("Locking the table..." ));
          }

        }
      }
    }
    /*
      This is the place to make sure this handler instance
      has a started transaction.
     
      The transaction is started by the first handler on which 
      MySQL Server calls external lock
     
      Other handlers in the same stmt or transaction should use 
      the same NDB transaction. This is done by setting up the m_active_trans
      pointer to point to the NDB transaction. 
     */

4062 4063 4064
    // store thread specific data first to set the right context
    m_force_send=          thd->variables.ndb_force_send;
    m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
4065 4066
    m_autoincrement_prefetch= 
      (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
4067

4068
    m_active_trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt;
4069
    DBUG_ASSERT(m_active_trans);
4070
    // Start of transaction
4071 4072
    m_rows_changed= 0;
    m_ops_pending= 0;
4073 4074

    // TODO remove double pointers...
4075 4076
    m_thd_ndb_share= thd_ndb->get_open_table(thd, m_table);
    m_table_info= &m_thd_ndb_share->stat;
4077 4078
  }
  else
4079
  {
4080
    DBUG_PRINT("info", ("lock_type == F_UNLCK"));
4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098

    if (ndb_cache_check_time && m_rows_changed)
    {
      DBUG_PRINT("info", ("Rows has changed and util thread is running"));
      if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      {
        DBUG_PRINT("info", ("Add share to list of tables to be invalidated"));
        /* NOTE push_back allocates memory using transactions mem_root! */
        thd_ndb->changed_tables.push_back(m_share, &thd->transaction.mem_root);
      }

      pthread_mutex_lock(&m_share->mutex);
      DBUG_PRINT("info", ("Invalidating commit_count"));
      m_share->commit_count= 0;
      m_share->commit_count_lock++;
      pthread_mutex_unlock(&m_share->mutex);
    }

4099
    if (!--thd_ndb->lock_count)
4100 4101 4102 4103
    {
      DBUG_PRINT("trans", ("Last external_lock"));
      PRINT_OPTION_FLAGS(thd);

4104
      if (thd_ndb->stmt)
4105 4106 4107 4108 4109 4110 4111
      {
        /*
          Unlock is done without a transaction commit / rollback.
          This happens if the thread didn't update any rows
          We must in this case close the transaction to release resources
        */
        DBUG_PRINT("trans",("ending non-updating transaction"));
4112
        ndb->closeTransaction(m_active_trans);
4113
        thd_ndb->stmt= NULL;
4114 4115
      }
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4116
    m_table_info= NULL;
4117

4118 4119 4120 4121 4122 4123 4124 4125 4126
    /*
      This is the place to make sure this handler instance
      no longer are connected to the active transaction.

      And since the handler is no longer part of the transaction 
      it can't have open cursors, ops or blobs pending.
    */
    m_active_trans= NULL;    

4127 4128
    if (m_active_cursor)
      DBUG_PRINT("warning", ("m_active_cursor != NULL"));
4129 4130
    m_active_cursor= NULL;

4131 4132 4133 4134
    if (m_multi_cursor)
      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
    m_multi_cursor= NULL;
    
4135
    if (m_blobs_pending)
4136
      DBUG_PRINT("warning", ("blobs_pending != 0"));
4137
    m_blobs_pending= 0;
4138
    
4139
    if (m_ops_pending)
4140
      DBUG_PRINT("warning", ("ops_pending != 0L"));
4141
    m_ops_pending= 0;
4142
  }
4143
  thd->set_current_stmt_binlog_row_based_if_mixed();
4144 4145 4146
  DBUG_RETURN(error);
}

mskold@mysql.com's avatar
mskold@mysql.com committed
4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162
/*
  Unlock the last row read in an open scan.
  Rows are unlocked by default in ndb, but
  for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
  locks are kept if unlock_row() is not called.
*/

void ha_ndbcluster::unlock_row() 
{
  DBUG_ENTER("unlock_row");

  DBUG_PRINT("info", ("Unlocking row"));
  m_lock_tuple= false;
  DBUG_VOID_RETURN;
}

4163
/*
4164 4165 4166 4167 4168
  Start a transaction for running a statement if one is not
  already running in a transaction. This will be the case in
  a BEGIN; COMMIT; block
  When using LOCK TABLE's external_lock will start a transaction
  since ndb does not currently does not support table locking
4169 4170
*/

serg@serg.mylan's avatar
serg@serg.mylan committed
4171
int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
4172 4173 4174 4175 4176
{
  int error=0;
  DBUG_ENTER("start_stmt");
  PRINT_OPTION_FLAGS(thd);

4177
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4178
  NdbTransaction *trans= (thd_ndb->stmt)?thd_ndb->stmt:thd_ndb->all;
4179
  if (!trans){
4180
    Ndb *ndb= thd_ndb->ndb;
4181
    DBUG_PRINT("trans",("Starting transaction stmt"));  
4182
    trans= ndb->startTransaction();
4183
    if (trans == NULL)
4184
      ERR_RETURN(ndb->getNdbError());
4185
    no_uncommitted_rows_reset(thd);
4186
    thd_ndb->stmt= trans;
4187
    trans_register_ha(thd, FALSE, ndbcluster_hton);
4188
  }
4189
  thd_ndb->query_state&= NDB_QUERY_NORMAL;
4190
  m_active_trans= trans;
4191

4192
  // Start of statement
4193
  m_ops_pending= 0;    
4194 4195
  thd->set_current_stmt_binlog_row_based_if_mixed();

4196 4197 4198 4199 4200
  DBUG_RETURN(error);
}


/*
4201
  Commit a transaction started in NDB
4202 4203
 */

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4204
static int ndbcluster_commit(THD *thd, bool all)
4205 4206
{
  int res= 0;
4207 4208 4209
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
4210 4211 4212

  DBUG_ENTER("ndbcluster_commit");
  DBUG_PRINT("transaction",("%s",
4213
                            trans == thd_ndb->stmt ?
4214 4215 4216
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

4217
  if (execute_commit(thd,trans) != 0)
4218 4219
  {
    const NdbError err= trans->getNdbError();
4220
    const NdbOperation *error_op= trans->getNdbErrorOperation();
4221
    ERR_PRINT(err);
4222
    res= ndb_to_mysql_error(&err);
4223
    if (res != -1)
4224
      ndbcluster_print_error(res, error_op);
4225
  }
4226
  ndb->closeTransaction(trans);
4227

4228
  if (all)
4229 4230 4231
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;
4232 4233 4234 4235 4236 4237 4238

  /* Clear commit_count for tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    pthread_mutex_lock(&share->mutex);
4239 4240
    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ",
			share->key, share->commit_count));
4241 4242 4243 4244 4245 4246
    share->commit_count= 0;
    share->commit_count_lock++;
    pthread_mutex_unlock(&share->mutex);
  }
  thd_ndb->changed_tables.empty();

4247 4248 4249 4250 4251 4252 4253 4254
  DBUG_RETURN(res);
}


/*
  Rollback a transaction started in NDB
 */

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4255
static int ndbcluster_rollback(THD *thd, bool all)
4256 4257
{
  int res= 0;
4258 4259 4260
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
4261 4262 4263

  DBUG_ENTER("ndbcluster_rollback");
  DBUG_PRINT("transaction",("%s",
4264
                            trans == thd_ndb->stmt ? 
4265 4266 4267
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

4268
  if (trans->execute(NdbTransaction::Rollback) != 0)
4269 4270
  {
    const NdbError err= trans->getNdbError();
4271
    const NdbOperation *error_op= trans->getNdbErrorOperation();
4272 4273
    ERR_PRINT(err);     
    res= ndb_to_mysql_error(&err);
4274 4275
    if (res != -1) 
      ndbcluster_print_error(res, error_op);
4276 4277
  }
  ndb->closeTransaction(trans);
4278

4279
  if (all)
4280 4281 4282 4283
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;

4284 4285 4286
  /* Clear list of tables changed by transaction */
  thd_ndb->changed_tables.empty();

4287
  DBUG_RETURN(res);
4288 4289 4290 4291
}


/*
pekka@mysql.com's avatar
pekka@mysql.com committed
4292 4293 4294
  Define NDB column based on Field.
  Returns 0 or mysql error code.
  Not member of ha_ndbcluster because NDBCOL cannot be declared.
pekka@mysql.com's avatar
pekka@mysql.com committed
4295 4296 4297

  MySQL text types with character set "binary" are mapped to true
  NDB binary types without a character set.  This may change.
4298 4299
 */

pekka@mysql.com's avatar
pekka@mysql.com committed
4300 4301 4302
static int create_ndb_column(NDBCOL &col,
                             Field *field,
                             HA_CREATE_INFO *info)
4303
{
pekka@mysql.com's avatar
pekka@mysql.com committed
4304
  // Set name
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
4305
  col.setName(field->field_name);
pekka@mysql.com's avatar
pekka@mysql.com committed
4306 4307
  // Get char set
  CHARSET_INFO *cs= field->charset();
pekka@mysql.com's avatar
pekka@mysql.com committed
4308 4309 4310 4311
  // Set type and sizes
  const enum enum_field_types mysql_type= field->real_type();
  switch (mysql_type) {
  // Numeric types
4312
  case MYSQL_TYPE_TINY:        
pekka@mysql.com's avatar
pekka@mysql.com committed
4313 4314 4315 4316 4317 4318
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Tinyunsigned);
    else
      col.setType(NDBCOL::Tinyint);
    col.setLength(1);
    break;
4319
  case MYSQL_TYPE_SHORT:
pekka@mysql.com's avatar
pekka@mysql.com committed
4320 4321 4322 4323 4324 4325
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Smallunsigned);
    else
      col.setType(NDBCOL::Smallint);
    col.setLength(1);
    break;
4326
  case MYSQL_TYPE_LONG:
pekka@mysql.com's avatar
pekka@mysql.com committed
4327 4328 4329 4330 4331 4332
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Unsigned);
    else
      col.setType(NDBCOL::Int);
    col.setLength(1);
    break;
4333
  case MYSQL_TYPE_INT24:       
pekka@mysql.com's avatar
pekka@mysql.com committed
4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Mediumunsigned);
    else
      col.setType(NDBCOL::Mediumint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONGLONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Bigunsigned);
    else
      col.setType(NDBCOL::Bigint);
    col.setLength(1);
4346 4347
    break;
  case MYSQL_TYPE_FLOAT:
pekka@mysql.com's avatar
pekka@mysql.com committed
4348 4349 4350
    col.setType(NDBCOL::Float);
    col.setLength(1);
    break;
4351
  case MYSQL_TYPE_DOUBLE:
pekka@mysql.com's avatar
pekka@mysql.com committed
4352 4353 4354
    col.setType(NDBCOL::Double);
    col.setLength(1);
    break;
4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374
  case MYSQL_TYPE_DECIMAL:    
    {
      Field_decimal *f= (Field_decimal*)field;
      uint precision= f->pack_length();
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Olddecimalunsigned);
        precision-= (scale > 0);
      }
      else
      {
        col.setType(NDBCOL::Olddecimal);
        precision-= 1 + (scale > 0);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
4375 4376 4377
  case MYSQL_TYPE_NEWDECIMAL:    
    {
      Field_new_decimal *f= (Field_new_decimal*)field;
4378
      uint precision= f->precision;
4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Decimalunsigned);
      }
      else
      {
        col.setType(NDBCOL::Decimal);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4393 4394 4395 4396 4397
  // Date types
  case MYSQL_TYPE_DATETIME:    
    col.setType(NDBCOL::Datetime);
    col.setLength(1);
    break;
4398 4399 4400 4401
  case MYSQL_TYPE_DATE: // ?
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4402
  case MYSQL_TYPE_NEWDATE:
4403 4404 4405
    col.setType(NDBCOL::Date);
    col.setLength(1);
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4406
  case MYSQL_TYPE_TIME:        
4407 4408 4409
    col.setType(NDBCOL::Time);
    col.setLength(1);
    break;
4410 4411 4412 4413 4414 4415 4416
  case MYSQL_TYPE_YEAR:
    col.setType(NDBCOL::Year);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP:
    col.setType(NDBCOL::Timestamp);
    col.setLength(1);
pekka@mysql.com's avatar
pekka@mysql.com committed
4417 4418 4419
    break;
  // Char types
  case MYSQL_TYPE_STRING:      
4420
    if (field->pack_length() == 0)
4421 4422 4423 4424
    {
      col.setType(NDBCOL::Bit);
      col.setLength(1);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4425
    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
4426
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
4427
      col.setType(NDBCOL::Binary);
4428
      col.setLength(field->pack_length());
pekka@mysql.com's avatar
pekka@mysql.com committed
4429
    }
4430
    else
4431 4432 4433
    {
      col.setType(NDBCOL::Char);
      col.setCharset(cs);
4434
      col.setLength(field->pack_length());
4435
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4436
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4437 4438 4439 4440 4441 4442
  case MYSQL_TYPE_VAR_STRING: // ?
  case MYSQL_TYPE_VARCHAR:
    {
      Field_varstring* f= (Field_varstring*)field;
      if (f->length_bytes == 1)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4443
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4444 4445 4446 4447 4448 4449 4450 4451
          col.setType(NDBCOL::Varbinary);
        else {
          col.setType(NDBCOL::Varchar);
          col.setCharset(cs);
        }
      }
      else if (f->length_bytes == 2)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4452
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463
          col.setType(NDBCOL::Longvarbinary);
        else {
          col.setType(NDBCOL::Longvarchar);
          col.setCharset(cs);
        }
      }
      else
      {
        return HA_ERR_UNSUPPORTED;
      }
      col.setLength(field->field_length);
pekka@mysql.com's avatar
pekka@mysql.com committed
4464
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4465 4466 4467 4468
    break;
  // Blob types (all come in as MYSQL_TYPE_BLOB)
  mysql_type_tiny_blob:
  case MYSQL_TYPE_TINY_BLOB:
pekka@mysql.com's avatar
pekka@mysql.com committed
4469
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4470
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4471
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4472
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4473 4474
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4475 4476 4477 4478 4479
    col.setInlineSize(256);
    // No parts
    col.setPartSize(0);
    col.setStripeSize(0);
    break;
4480
  //mysql_type_blob:
4481
  case MYSQL_TYPE_GEOMETRY:
pekka@mysql.com's avatar
pekka@mysql.com committed
4482
  case MYSQL_TYPE_BLOB:    
pekka@mysql.com's avatar
pekka@mysql.com committed
4483
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4484
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4485
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4486
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4487 4488
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504
    // Use "<=" even if "<" is the exact condition
    if (field->max_length() <= (1 << 8))
      goto mysql_type_tiny_blob;
    else if (field->max_length() <= (1 << 16))
    {
      col.setInlineSize(256);
      col.setPartSize(2000);
      col.setStripeSize(16);
    }
    else if (field->max_length() <= (1 << 24))
      goto mysql_type_medium_blob;
    else
      goto mysql_type_long_blob;
    break;
  mysql_type_medium_blob:
  case MYSQL_TYPE_MEDIUM_BLOB:   
pekka@mysql.com's avatar
pekka@mysql.com committed
4505
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4506
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4507
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4508
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4509 4510
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4511 4512 4513 4514 4515 4516
    col.setInlineSize(256);
    col.setPartSize(4000);
    col.setStripeSize(8);
    break;
  mysql_type_long_blob:
  case MYSQL_TYPE_LONG_BLOB:  
pekka@mysql.com's avatar
pekka@mysql.com committed
4517
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4518
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4519
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4520
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4521 4522
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535
    col.setInlineSize(256);
    col.setPartSize(8000);
    col.setStripeSize(4);
    break;
  // Other types
  case MYSQL_TYPE_ENUM:
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_SET:         
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
4536 4537
  case MYSQL_TYPE_BIT:
  {
4538
    int no_of_bits= field->field_length;
4539 4540 4541 4542 4543 4544 4545
    col.setType(NDBCOL::Bit);
    if (!no_of_bits)
      col.setLength(1);
      else
        col.setLength(no_of_bits);
    break;
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4546 4547 4548 4549 4550
  case MYSQL_TYPE_NULL:        
    goto mysql_type_unsupported;
  mysql_type_unsupported:
  default:
    return HA_ERR_UNSUPPORTED;
4551
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4552 4553 4554 4555 4556 4557
  // Set nullable and pk
  col.setNullable(field->maybe_null());
  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
  // Set autoincrement
  if (field->flags & AUTO_INCREMENT_FLAG) 
  {
4558
    char buff[22];
pekka@mysql.com's avatar
pekka@mysql.com committed
4559 4560
    col.setAutoIncrement(TRUE);
    ulonglong value= info->auto_increment_value ?
4561
      info->auto_increment_value : (ulonglong) 1;
4562
    DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));
pekka@mysql.com's avatar
pekka@mysql.com committed
4563
    col.setAutoIncrementInitialValue(value);
4564
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4565
  else
4566
    col.setAutoIncrement(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
4567
  return 0;
4568 4569
}

4570 4571 4572 4573
/*
  Create a table in NDB Cluster
*/

4574
int ha_ndbcluster::create(const char *name, 
4575 4576
                          TABLE *form, 
                          HA_CREATE_INFO *info)
4577
{
4578
  THD *thd= current_thd;
4579 4580
  NDBTAB tab;
  NDBCOL col;
joreland@mysql.com's avatar
joreland@mysql.com committed
4581
  uint pack_length, length, i, pk_length= 0;
4582
  const void *data, *pack_data;
4583
  bool create_from_engine= (info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
4584
  bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
4585

pekka@mysql.com's avatar
pekka@mysql.com committed
4586
  DBUG_ENTER("ha_ndbcluster::create");
4587
  DBUG_PRINT("enter", ("name: %s", name));
4588

4589 4590 4591
  DBUG_ASSERT(*fn_rext((char*)name) == 0);
  set_dbname(name);
  set_tabname(name);
4592

mskold@mysql.com's avatar
mskold@mysql.com committed
4593 4594 4595 4596 4597 4598
  if (is_truncate)
  {
    DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
    if ((my_errno= delete_table(name)))
      DBUG_RETURN(my_errno);
  }
4599
  table= form;
4600 4601 4602
  if (create_from_engine)
  {
    /*
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4603
      Table already exists in NDB and frm file has been created by 
4604 4605 4606
      caller.
      Do Ndb specific stuff, such as create a .ndb file
    */
4607
    if ((my_errno= write_ndb_file(name)))
4608
      DBUG_RETURN(my_errno);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4609
#ifdef HAVE_NDB_BINLOG
4610
    ndbcluster_create_binlog_setup(get_ndb(), name, strlen(name),
4611
                                   m_dbname, m_tabname, FALSE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4612
#endif /* HAVE_NDB_BINLOG */
4613 4614
    DBUG_RETURN(my_errno);
  }
4615

4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow table creation unless
    schema distribution table is setup
    ( unless it is a creation of the schema dist table itself )
  */
  if (!schema_share &&
      !(strcmp(m_dbname, NDB_REP_DB) == 0 &&
        strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
#endif /* HAVE_NDB_BINLOG */

4631 4632 4633 4634 4635 4636 4637 4638
  DBUG_PRINT("table", ("name: %s", m_tabname));  
  tab.setName(m_tabname);
  tab.setLogging(!(info->options & HA_LEX_CREATE_TMP_TABLE));    
   
  // Save frm data for this table
  if (readfrm(name, &data, &length))
    DBUG_RETURN(1);
  if (packfrm(data, length, &pack_data, &pack_length))
4639 4640
  {
    my_free((char*)data, MYF(0));
4641
    DBUG_RETURN(2);
4642 4643
  }

4644
  DBUG_PRINT("info", ("setFrm data=%lx  len=%d", pack_data, pack_length));
4645 4646 4647 4648
  tab.setFrm(pack_data, pack_length);      
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
  
4649
  for (i= 0; i < form->s->fields; i++) 
4650 4651 4652 4653
  {
    Field *field= form->field[i];
    DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d", 
                        field->field_name, field->real_type(),
4654
                        field->pack_length()));
4655
    if ((my_errno= create_ndb_column(col, field, info)))
pekka@mysql.com's avatar
pekka@mysql.com committed
4656
      DBUG_RETURN(my_errno);
4657 4658
 
    if (info->store_on_disk || getenv("NDB_DEFAULT_DISK"))
4659 4660 4661 4662
      col.setStorageType(NdbDictionary::Column::StorageTypeDisk);
    else
      col.setStorageType(NdbDictionary::Column::StorageTypeMemory);

4663
    tab.addColumn(col);
4664
    if (col.getPrimaryKey())
joreland@mysql.com's avatar
joreland@mysql.com committed
4665
      pk_length += (field->pack_length() + 3) / 4;
4666
  }
4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682

  KEY* key_info;
  for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
  {
    KEY_PART_INFO *key_part= key_info->key_part;
    KEY_PART_INFO *end= key_part + key_info->key_parts;
    for (; key_part != end; key_part++)
      tab.getColumn(key_part->fieldnr-1)->setStorageType(
                             NdbDictionary::Column::StorageTypeMemory);
  }

  if (info->store_on_disk)
    if (info->tablespace)
      tab.setTablespace(info->tablespace);
    else
      tab.setTablespace("DEFAULT-TS");
4683
  // No primary key, create shadow key as 64 bit, auto increment  
4684
  if (form->s->primary_key == MAX_KEY) 
4685 4686 4687 4688 4689
  {
    DBUG_PRINT("info", ("Generating shadow key"));
    col.setName("$PK");
    col.setType(NdbDictionary::Column::Bigunsigned);
    col.setLength(1);
4690
    col.setNullable(FALSE);
4691 4692 4693
    col.setPrimaryKey(TRUE);
    col.setAutoIncrement(TRUE);
    tab.addColumn(col);
joreland@mysql.com's avatar
joreland@mysql.com committed
4694 4695
    pk_length += 2;
  }
4696
 
joreland@mysql.com's avatar
joreland@mysql.com committed
4697
  // Make sure that blob tables don't have to big part size
4698
  for (i= 0; i < form->s->fields; i++) 
joreland@mysql.com's avatar
joreland@mysql.com committed
4699 4700 4701 4702 4703 4704 4705
  {
    /**
     * The extra +7 concists
     * 2 - words from pk in blob table
     * 5 - from extra words added by tup/dict??
     */
    switch (form->field[i]->real_type()) {
4706
    case MYSQL_TYPE_GEOMETRY:
joreland@mysql.com's avatar
joreland@mysql.com committed
4707 4708 4709 4710
    case MYSQL_TYPE_BLOB:    
    case MYSQL_TYPE_MEDIUM_BLOB:   
    case MYSQL_TYPE_LONG_BLOB: 
    {
4711 4712
      NdbDictionary::Column * col= tab.getColumn(i);
      int size= pk_length + (col->getPartSize()+3)/4 + 7;
4713
      if (size > NDB_MAX_TUPLE_SIZE_IN_WORDS && 
4714
         (pk_length+7) < NDB_MAX_TUPLE_SIZE_IN_WORDS)
joreland@mysql.com's avatar
joreland@mysql.com committed
4715
      {
4716 4717
        size= NDB_MAX_TUPLE_SIZE_IN_WORDS - pk_length - 7;
        col->setPartSize(4*size);
joreland@mysql.com's avatar
joreland@mysql.com committed
4718 4719 4720 4721 4722 4723 4724 4725 4726 4727
      }
      /**
       * If size > NDB_MAX and pk_length+7 >= NDB_MAX
       *   then the table can't be created anyway, so skip
       *   changing part size, and have error later
       */ 
    }
    default:
      break;
    }
4728
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4729

4730
  // Check partition info
4731
  partition_info *part_info= form->part_info;
4732
  if ((my_errno= set_up_partition_info(part_info, form, (void*)&tab)))
4733
  {
4734
    DBUG_RETURN(my_errno);
4735 4736
  }

4737
  if ((my_errno= check_ndb_connection()))
4738 4739 4740
    DBUG_RETURN(my_errno);
  
  // Create the table in NDB     
4741 4742
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
4743
  if (dict->createTable(tab) != 0) 
4744 4745 4746 4747 4748 4749
  {
    const NdbError err= dict->getNdbError();
    ERR_PRINT(err);
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
  }
4750 4751 4752 4753 4754 4755

  Ndb_table_guard ndbtab_g(dict, m_tabname);
  // temporary set m_table during create
  // reset at return
  m_table= ndbtab_g.get_table();
  // TODO check also that we have the same frm...
4756 4757 4758 4759 4760 4761 4762 4763 4764
  if (!m_table)
  {
    /* purecov: begin deadcode */
    const NdbError err= dict->getNdbError();
    ERR_PRINT(err);
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
    /* purecov: end */
  }
4765

4766 4767
  DBUG_PRINT("info", ("Table %s/%s created successfully", 
                      m_dbname, m_tabname));
4768

4769
  // Create secondary indexes
4770
  my_errno= create_indexes(ndb, form);
4771

4772
  if (!my_errno)
4773
    my_errno= write_ndb_file(name);
4774 4775 4776 4777 4778 4779
  else
  {
    /*
      Failed to create an index,
      drop the table (and all it's indexes)
    */
4780
    while (dict->dropTableGlobal(*m_table))
4781
    {
4782 4783 4784 4785 4786 4787 4788 4789 4790
      switch (dict->getNdbError().status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed) 
            continue; // retry indefinitly
          break;
        default:
          break;
      }
4791
      break;
4792
    }
4793 4794
    m_table = 0;
    DBUG_RETURN(my_errno);
4795
  }
4796

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4797 4798 4799 4800 4801 4802 4803 4804 4805
#ifdef HAVE_NDB_BINLOG
  if (!my_errno)
  {
    NDB_SHARE *share= 0;
    pthread_mutex_lock(&ndbcluster_mutex);
    /*
      First make sure we get a "fresh" share here, not an old trailing one...
    */
    {
4806
      uint length= (uint) strlen(name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4807
      if ((share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
4808
                                           (byte*) name, length)))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4809 4810 4811 4812 4813
        handle_trailing_share(share);
    }
    /*
      get a new share
    */
4814 4815

    if (!(share= get_share(name, form, true, true)))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4816
    {
4817
      sql_print_error("NDB: allocating table share for %s failed", name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4818 4819 4820 4821 4822 4823 4824 4825
      /* my_errno is set */
    }
    pthread_mutex_unlock(&ndbcluster_mutex);

    while (!IS_TMP_PREFIX(m_tabname))
    {
      String event_name(INJECTOR_EVENT_LEN);
      ndb_rep_event_name(&event_name,m_dbname,m_tabname);
4826 4827 4828 4829 4830 4831
      int do_event_op= ndb_binlog_running;

      if (!schema_share &&
          strcmp(share->db, NDB_REP_DB) == 0 &&
          strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
        do_event_op= 1;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4832 4833 4834 4835 4836

      /*
        Always create an event for the table, as other mysql servers
        expect it to be there.
      */
4837
      if (!ndbcluster_create_event(ndb, m_table, event_name.c_ptr(), share,
4838
                                   share && do_event_op ? 2 : 1/* push warning */))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4839
      {
4840 4841 4842 4843
        if (ndb_extra_logging)
          sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
                                event_name.c_ptr());
        if (share && do_event_op &&
4844
            ndbcluster_create_event_ops(share, m_table, event_name.c_ptr()))
4845 4846 4847 4848 4849
        {
          sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
                          " Event: %s", name);
          /* a warning has been issued to the client */
        }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4850
      }
4851 4852 4853 4854
      /*
        warning has been issued if ndbcluster_create_event failed
        and (share && do_event_op)
      */
4855
      if (share && !do_event_op)
4856
        share->flags|= NSF_NO_BINLOG;
4857 4858
      ndbcluster_log_schema_op(thd, share,
                               thd->query, thd->query_length,
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4859
                               share->db, share->table_name,
4860 4861
                               m_table->getObjectId(),
                               m_table->getObjectVersion(),
mskold@mysql.com's avatar
mskold@mysql.com committed
4862 4863 4864
                               (is_truncate) ?
			       SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE, 
			       0, 0, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4865 4866 4867 4868 4869
      break;
    }
  }
#endif /* HAVE_NDB_BINLOG */

4870
  m_table= 0;
4871 4872 4873
  DBUG_RETURN(my_errno);
}

4874 4875
int ha_ndbcluster::create_handler_files(const char *file,
                                        const char *old_name,
4876 4877
                                        int action_flag,
                                        HA_CREATE_INFO *info) 
4878
{ 
4879
  char path[FN_REFLEN];
4880 4881 4882 4883 4884 4885 4886 4887 4888
  const char *name;
  Ndb* ndb;
  const NDBTAB *tab;
  const void *data, *pack_data;
  uint length, pack_length;
  int error= 0;

  DBUG_ENTER("create_handler_files");

4889
  if (action_flag != CHF_INDEX_FLAG)
4890 4891 4892
  {
    DBUG_RETURN(FALSE);
  }
4893
  DBUG_PRINT("enter", ("file: %s", file));
4894 4895 4896 4897
  if (!(ndb= get_ndb()))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  NDBDICT *dict= ndb->getDictionary();
4898
  if (!info->frm_only)
4899
    DBUG_RETURN(0); // Must be a create, ignore since frm is saved in create
4900 4901 4902 4903

  // TODO handle this
  DBUG_ASSERT(m_table != 0);

4904 4905
  set_dbname(file);
  set_tabname(file);
4906
  Ndb_table_guard ndbtab_g(dict, m_tabname);
4907
  DBUG_PRINT("info", ("m_dbname: %s, m_tabname: %s", m_dbname, m_tabname));
4908
  if (!(tab= ndbtab_g.get_table()))
4909 4910
    DBUG_RETURN(0); // Unkown table, must be temporary table

4911
  DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED);
4912
  if (readfrm(file, &data, &length) ||
4913 4914 4915 4916 4917
      packfrm(data, length, &pack_data, &pack_length))
  {
    DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
4918
    error= 1;
4919
  }
4920 4921
  else
  {
4922 4923
    DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb", 
                        m_tabname));
4924 4925 4926 4927 4928 4929
    NdbDictionary::Table new_tab= *tab;
    new_tab.setFrm(pack_data, pack_length);
    if (dict->alterTableGlobal(*tab, new_tab))
    {
      error= ndb_to_mysql_error(&dict->getNdbError());
    }
4930 4931
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
4932
  }
4933
  
4934
  set_ndb_share_state(m_share, NSS_INITIAL);
4935
  free_share(&m_share); // Decrease ref_count
4936 4937 4938 4939

  DBUG_RETURN(error);
}

4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980
int ha_ndbcluster::create_index(const char *name, KEY *key_info, 
                                NDB_INDEX_TYPE idx_type, uint idx_no)
{
  int error= 0;
  char unique_name[FN_LEN];
  static const char* unique_suffix= "$unique";
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
  DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));  

  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
                        unique_name, idx_no));
  }
    
  switch (idx_type){
  case PRIMARY_KEY_INDEX:
    // Do nothing, already created
    break;
  case PRIMARY_KEY_ORDERED_INDEX:
    error= create_ordered_index(name, key_info);
    break;
  case UNIQUE_ORDERED_INDEX:
    if (!(error= create_ordered_index(name, key_info)))
      error= create_unique_index(unique_name, key_info);
    break;
  case UNIQUE_INDEX:
    if (!(error= check_index_fields_not_null(idx_no)))
      error= create_unique_index(unique_name, key_info);
    break;
  case ORDERED_INDEX:
    error= create_ordered_index(name, key_info);
    break;
  default:
    DBUG_ASSERT(FALSE);
    break;
  }
  
  DBUG_RETURN(error);
}
4981

4982
int ha_ndbcluster::create_ordered_index(const char *name, 
4983
                                        KEY *key_info)
4984
{
4985
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
4986
  DBUG_RETURN(create_ndb_index(name, key_info, FALSE));
4987 4988 4989
}

int ha_ndbcluster::create_unique_index(const char *name, 
4990
                                       KEY *key_info)
4991 4992
{

4993
  DBUG_ENTER("ha_ndbcluster::create_unique_index");
4994
  DBUG_RETURN(create_ndb_index(name, key_info, TRUE));
4995 4996 4997
}


4998 4999 5000 5001
/*
  Create an index in NDB Cluster
 */

5002 5003 5004
int ha_ndbcluster::create_ndb_index(const char *name, 
                                     KEY *key_info,
                                     bool unique)
5005
{
5006 5007
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
5008 5009 5010
  KEY_PART_INFO *key_part= key_info->key_part;
  KEY_PART_INFO *end= key_part + key_info->key_parts;
  
5011
  DBUG_ENTER("ha_ndbcluster::create_index");
5012
  DBUG_PRINT("enter", ("name: %s ", name));
5013

5014
  NdbDictionary::Index ndb_index(name);
5015
  if (unique)
5016 5017 5018 5019 5020
    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
  else 
  {
    ndb_index.setType(NdbDictionary::Index::OrderedIndex);
    // TODO Only temporary ordered indexes supported
5021
    ndb_index.setLogging(FALSE); 
5022 5023 5024 5025 5026 5027 5028
  }
  ndb_index.setTable(m_tabname);

  for (; key_part != end; key_part++) 
  {
    Field *field= key_part->field;
    DBUG_PRINT("info", ("attr: %s", field->field_name));
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
5029
    ndb_index.addColumnName(field->field_name);
5030 5031
  }
  
5032
  if (dict->createIndex(ndb_index, *m_table))
5033 5034 5035 5036 5037 5038 5039
    ERR_RETURN(dict->getNdbError());

  // Success
  DBUG_PRINT("info", ("Created index %s", name));
  DBUG_RETURN(0);  
}

5040 5041 5042 5043 5044 5045 5046 5047 5048
/*
 Prepare for an on-line alter table
*/ 
void ha_ndbcluster::prepare_for_alter()
{
  ndbcluster_get_share(m_share); // Increase ref_count
  set_ndb_share_state(m_share, NSS_ALTERED);
}

5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060
/*
  Add an index on-line to a table
*/
int ha_ndbcluster::add_index(TABLE *table_arg, 
                             KEY *key_info, uint num_of_keys)
{
  DBUG_ENTER("ha_ndbcluster::add_index");
  DBUG_PRINT("info", ("ha_ndbcluster::add_index to table %s", 
                      table_arg->s->table_name));
  int error= 0;
  uint idx;

5061
  DBUG_ASSERT(m_share->state == NSS_ALTERED);
5062 5063 5064 5065 5066
  for (idx= 0; idx < num_of_keys; idx++)
  {
    KEY *key= key_info + idx;
    KEY_PART_INFO *key_part= key->key_part;
    KEY_PART_INFO *end= key_part + key->key_parts;
5067
    NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key, false);
5068 5069 5070 5071 5072 5073 5074 5075 5076
    DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
    // Add fields to key_part struct
    for (; key_part != end; key_part++)
      key_part->field= table->field[key_part->fieldnr];
    // Check index type
    // Create index in ndb
    if((error= create_index(key_info[idx].name, key, idx_type, idx)))
      break;
  }
5077
  if (error)
5078
  {
5079 5080
    set_ndb_share_state(m_share, NSS_INITIAL);
    free_share(&m_share); // Decrease ref_count
5081
  }
5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092
  DBUG_RETURN(error);  
}

/*
  Mark one or several indexes for deletion. and
  renumber the remaining indexes
*/
int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, 
                                      uint *key_num, uint num_of_keys)
{
  DBUG_ENTER("ha_ndbcluster::prepare_drop_index");
5093
  DBUG_ASSERT(m_share->state == NSS_ALTERED);
5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104
  // Mark indexes for deletion
  uint idx;
  for (idx= 0; idx < num_of_keys; idx++)
  {
    DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num));
    m_index[*key_num++].status= TO_BE_DROPPED;
  }
  // Renumber indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
5105 5106
  renumber_indexes(ndb, table_arg);
  DBUG_RETURN(0);
5107 5108 5109 5110 5111 5112 5113
}
 
/*
  Really drop all indexes marked for deletion
*/
int ha_ndbcluster::final_drop_index(TABLE *table_arg)
{
5114
  int error;
5115 5116 5117 5118 5119 5120
  DBUG_ENTER("ha_ndbcluster::final_drop_index");
  DBUG_PRINT("info", ("ha_ndbcluster::final_drop_index"));
  // Really drop indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
5121 5122 5123 5124 5125 5126
  if((error= drop_indexes(ndb, table_arg)))
  {
    m_share->state= NSS_INITIAL;
    free_share(&m_share); // Decrease ref_count
  }
  DBUG_RETURN(error);
5127 5128
}

5129 5130 5131 5132 5133 5134
/*
  Rename a table in NDB Cluster
*/

int ha_ndbcluster::rename_table(const char *from, const char *to)
{
5135
  NDBDICT *dict;
5136
  char old_dbname[FN_HEADLEN];
5137
  char new_dbname[FN_HEADLEN];
5138
  char new_tabname[FN_HEADLEN];
5139 5140
  const NDBTAB *orig_tab;
  int result;
5141 5142
  bool recreate_indexes= FALSE;
  NDBDICT::List index_list;
5143 5144

  DBUG_ENTER("ha_ndbcluster::rename_table");
5145
  DBUG_PRINT("info", ("Renaming %s to %s", from, to));
5146
  set_dbname(from, old_dbname);
5147
  set_dbname(to, new_dbname);
5148 5149 5150
  set_tabname(from);
  set_tabname(to, new_tabname);

5151 5152 5153
  if (check_ndb_connection())
    DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);

mskold@mysql.com's avatar
mskold@mysql.com committed
5154
  Ndb *ndb= get_ndb();
5155
  ndb->setDatabaseName(old_dbname);
mskold@mysql.com's avatar
mskold@mysql.com committed
5156
  dict= ndb->getDictionary();
5157 5158
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  if (!(orig_tab= ndbtab_g.get_table()))
5159
    ERR_RETURN(dict->getNdbError());
5160

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5161
#ifdef HAVE_NDB_BINLOG
5162 5163 5164
  int ndb_table_id= orig_tab->getObjectId();
  int ndb_table_version= orig_tab->getObjectVersion();

5165 5166
  NDB_SHARE *share= get_share(from, 0, false);
  if (share)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5167 5168 5169 5170 5171
  {
    int r= rename_share(share, to);
    DBUG_ASSERT(r == 0);
  }
#endif
5172 5173 5174 5175 5176
  if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
  {
    dict->listIndexes(index_list, *orig_tab);    
    recreate_indexes= TRUE;
  }
5177 5178
  // Change current database to that of target table
  set_dbname(to);
mskold@mysql.com's avatar
mskold@mysql.com committed
5179
  ndb->setDatabaseName(m_dbname);
5180

5181 5182 5183
  NdbDictionary::Table new_tab= *orig_tab;
  new_tab.setName(new_tabname);
  if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
5184
  {
5185
    NdbError ndb_error= dict->getNdbError();
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5186 5187 5188 5189 5190 5191 5192 5193
#ifdef HAVE_NDB_BINLOG
    if (share)
    {
      int r= rename_share(share, from);
      DBUG_ASSERT(r == 0);
      free_share(&share);
    }
#endif
5194
    ERR_RETURN(ndb_error);
5195 5196 5197 5198
  }
  
  // Rename .ndb file
  if ((result= handler::rename_table(from, to)))
5199
  {
5200
    // ToDo in 4.1 should rollback alter table...
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5201 5202 5203 5204
#ifdef HAVE_NDB_BINLOG
    if (share)
      free_share(&share);
#endif
5205
    DBUG_RETURN(result);
5206
  }
5207

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218
#ifdef HAVE_NDB_BINLOG
  int is_old_table_tmpfile= 1;
  if (share && share->op)
    dict->forceGCPWait();

  /* handle old table */
  if (!IS_TMP_PREFIX(m_tabname))
  {
    is_old_table_tmpfile= 0;
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, from + sizeof(share_prefix) - 1, 0);
5219 5220
    ndbcluster_handle_drop_table(ndb, event_name.c_ptr(), share,
                                 "rename table");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5221 5222 5223 5224 5225 5226 5227
  }

  if (!result && !IS_TMP_PREFIX(new_tabname))
  {
    /* always create an event for the table */
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, to + sizeof(share_prefix) - 1, 0);
5228 5229
    Ndb_table_guard ndbtab_g2(dict, new_tabname);
    const NDBTAB *ndbtab= ndbtab_g2.get_table();
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5230

5231
    if (!ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share,
5232
                                 share && ndb_binlog_running ? 2 : 1/* push warning */))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5233 5234 5235 5236
    {
      if (ndb_extra_logging)
        sql_print_information("NDB Binlog: RENAME Event: %s",
                              event_name.c_ptr());
5237 5238
      if (share && ndb_binlog_running &&
          ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr()))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5239
      {
5240 5241 5242
        sql_print_error("NDB Binlog: FAILED create event operations "
                        "during RENAME. Event %s", event_name.c_ptr());
        /* a warning has been issued to the client */
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5243 5244
      }
    }
5245 5246 5247 5248
    /*
      warning has been issued if ndbcluster_create_event failed
      and (share && ndb_binlog_running)
    */
5249
    if (!is_old_table_tmpfile)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5250 5251
      ndbcluster_log_schema_op(current_thd, share,
                               current_thd->query, current_thd->query_length,
5252 5253
                               old_dbname, m_tabname,
                               ndb_table_id, ndb_table_version,
5254
                               SOT_RENAME_TABLE,
5255
                               m_dbname, new_tabname, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5256
  }
5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281

  // If we are moving tables between databases, we need to recreate
  // indexes
  if (recreate_indexes)
  {
    for (unsigned i = 0; i < index_list.count; i++) 
    {
        NDBDICT::List::Element& index_el = index_list.elements[i];
	// Recreate any indexes not stored in the system database
	if (my_strcasecmp(system_charset_info, 
			  index_el.database, NDB_SYSTEM_DATABASE))
	{
	  set_dbname(from);
	  ndb->setDatabaseName(m_dbname);
	  const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);
	  DBUG_PRINT("info", ("Creating index %s/%s",
			      index_el.database, index->getName()));
	  dict->createIndex(*index, new_tab);
	  DBUG_PRINT("info", ("Dropping index %s/%s",
			      index_el.database, index->getName()));
	  set_dbname(from);
	  ndb->setDatabaseName(m_dbname);
	  dict->dropIndexGlobal(*index);
	}
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5282 5283 5284 5285 5286
  }
  if (share)
    free_share(&share);
#endif

5287 5288 5289 5290 5291
  DBUG_RETURN(result);
}


/*
5292 5293
  Delete table from NDB Cluster

5294 5295
 */

5296 5297 5298 5299 5300 5301 5302 5303
/* static version which does not need a handler */

int
ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb,
                            const char *path,
                            const char *db,
                            const char *table_name)
{
5304
  THD *thd= current_thd;
5305 5306
  DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table");
  NDBDICT *dict= ndb->getDictionary();
5307 5308
  int ndb_table_id= 0;
  int ndb_table_version= 0;
5309
#ifdef HAVE_NDB_BINLOG
5310 5311 5312 5313 5314 5315 5316 5317 5318
  /*
    Don't allow drop table unless
    schema distribution table is setup
  */
  if (!schema_share)
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5319 5320
  NDB_SHARE *share= get_share(path, 0, false);
#endif
5321 5322 5323

  /* Drop the table from NDB */
  
5324
  int res= 0;
5325
  if (h && h->m_table)
5326
  {
5327 5328
retry_temporary_error1:
    if (dict->dropTableGlobal(*h->m_table) == 0)
5329 5330 5331 5332
    {
      ndb_table_id= h->m_table->getObjectId();
      ndb_table_version= h->m_table->getObjectVersion();
    }
5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345
    else
    {
      switch (dict->getNdbError().status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed) 
            goto retry_temporary_error1; // retry indefinitly
          break;
        default:
          break;
      }
      res= ndb_to_mysql_error(&dict->getNdbError());
    }
5346
    h->release_metadata(thd, ndb);
5347 5348 5349 5350
  }
  else
  {
    ndb->setDatabaseName(db);
5351 5352 5353 5354 5355
    while (1)
    {
      Ndb_table_guard ndbtab_g(dict, table_name);
      if (ndbtab_g.get_table())
      {
5356
    retry_temporary_error2:
5357 5358 5359 5360 5361
        if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0)
        {
          ndb_table_id= ndbtab_g.get_table()->getObjectId();
          ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
        }
5362
        else
5363
        {
5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377
          switch (dict->getNdbError().status)
          {
            case NdbError::TemporaryError:
              if (!thd->killed) 
                goto retry_temporary_error2; // retry indefinitly
              break;
            default:
              if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
              {
                ndbtab_g.invalidate();
                continue;
              }
              break;
          }
5378 5379 5380 5381 5382 5383
        }
      }
      else
        res= ndb_to_mysql_error(&dict->getNdbError());
      break;
    }
5384 5385 5386 5387
  }

  if (res)
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405
#ifdef HAVE_NDB_BINLOG
    /* the drop table failed for some reason, drop the share anyways */
    if (share)
    {
      pthread_mutex_lock(&ndbcluster_mutex);
      if (share->state != NSS_DROPPED)
      {
        /*
          The share kept by the server has not been freed, free it
        */
        share->state= NSS_DROPPED;
        free_share(&share, TRUE);
      }
      /* free the share taken above */
      free_share(&share, TRUE);
      pthread_mutex_unlock(&ndbcluster_mutex);
    }
#endif
5406 5407 5408
    DBUG_RETURN(res);
  }

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419
#ifdef HAVE_NDB_BINLOG
  /* stop the logging of the dropped table, and cleanup */

  /*
    drop table is successful even if table does not exist in ndb
    and in case table was actually not dropped, there is no need
    to force a gcp, and setting the event_name to null will indicate
    that there is no event to be dropped
  */
  int table_dropped= dict->getNdbError().code != 709;

mskold@mysql.com's avatar
mskold@mysql.com committed
5420 5421
  if (!IS_TMP_PREFIX(table_name) && share &&
      current_thd->lex->sql_command != SQLCOM_TRUNCATE)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5422
  {
5423 5424
    ndbcluster_log_schema_op(thd, share,
                             thd->query, thd->query_length,
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5425
                             share->db, share->table_name,
5426
                             ndb_table_id, ndb_table_version,
5427
                             SOT_DROP_TABLE, 0, 0, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438
  }
  else if (table_dropped && share && share->op) /* ndbcluster_log_schema_op
                                                   will do a force GCP */
    dict->forceGCPWait();

  if (!IS_TMP_PREFIX(table_name))
  {
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, path + sizeof(share_prefix) - 1, 0);
    ndbcluster_handle_drop_table(ndb,
                                 table_dropped ? event_name.c_ptr() : 0,
5439
                                 share, "delete table");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457
  }

  if (share)
  {
    pthread_mutex_lock(&ndbcluster_mutex);
    if (share->state != NSS_DROPPED)
    {
      /*
        The share kept by the server has not been freed, free it
      */
      share->state= NSS_DROPPED;
      free_share(&share, TRUE);
    }
    /* free the share taken above */
    free_share(&share, TRUE);
    pthread_mutex_unlock(&ndbcluster_mutex);
  }
#endif
5458 5459 5460
  DBUG_RETURN(0);
}

5461 5462
int ha_ndbcluster::delete_table(const char *name)
{
5463
  DBUG_ENTER("ha_ndbcluster::delete_table");
5464 5465 5466
  DBUG_PRINT("enter", ("name: %s", name));
  set_dbname(name);
  set_tabname(name);
5467

5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow drop table unless
    schema distribution table is setup
  */
  if (!schema_share)
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
#endif

5480 5481
  if (check_ndb_connection())
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
5482 5483

  /* Call ancestor function to delete .ndb file */
5484
  handler::delete_table(name);
5485 5486

  DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname));
5487 5488 5489
}


5490 5491 5492 5493
void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
                                       ulonglong nb_desired_values,
                                       ulonglong *first_value,
                                       ulonglong *nb_reserved_values)
5494
{  
5495 5496
  int cache_size;
  Uint64 auto_value;
5497 5498
  DBUG_ENTER("get_auto_increment");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
5499
  Ndb *ndb= get_ndb();
5500
   
5501
  if (m_rows_inserted > m_rows_to_insert)
5502
  {
5503 5504
    /* We guessed too low */
    m_rows_to_insert+= m_autoincrement_prefetch;
5505
  }
serg@serg.mylan's avatar
serg@serg.mylan committed
5506
  cache_size= 
5507 5508 5509 5510
    (int) ((m_rows_to_insert - m_rows_inserted < m_autoincrement_prefetch) ?
           m_rows_to_insert - m_rows_inserted :
           ((m_rows_to_insert > m_autoincrement_prefetch) ?
            m_rows_to_insert : m_autoincrement_prefetch));
5511
  int ret;
5512 5513
  uint retries= NDB_AUTO_INCREMENT_RETRIES;
  do {
5514
    Ndb_tuple_id_range_guard g(m_share);
5515 5516
    ret=
      m_skip_auto_increment ? 
5517 5518
      ndb->readAutoIncrementValue(m_table, g.range, auto_value) :
      ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size);
5519
  } while (ret == -1 && 
5520 5521
           --retries &&
           ndb->getNdbError().status == NdbError::TemporaryError);
5522
  if (ret == -1)
5523 5524 5525 5526
  {
    const NdbError err= ndb->getNdbError();
    sql_print_error("Error %lu in ::get_auto_increment(): %s",
                    (ulong) err.code, err.message);
5527 5528
    *first_value= ~(ulonglong) 0;
    DBUG_VOID_RETURN;
5529
  }
5530 5531 5532 5533
  *first_value= (longlong)auto_value;
  /* From the point of view of MySQL, NDB reserves one row at a time */
  *nb_reserved_values= 1;
  DBUG_VOID_RETURN;
5534 5535 5536 5537 5538 5539 5540
}


/*
  Constructor for the NDB Cluster table handler 
 */

5541 5542 5543 5544 5545 5546 5547
#define HA_NDBCLUSTER_TABLE_FLAGS \
                HA_REC_NOT_IN_SEQ | \
                HA_NULL_IN_KEY | \
                HA_AUTO_PART_KEY | \
                HA_NO_PREFIX_CHAR_KEYS | \
                HA_NEED_READ_RANGE_BUFFER | \
                HA_CAN_GEOMETRY | \
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5548
                HA_CAN_BIT_FIELD | \
5549 5550
                HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \
                HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \
5551
                HA_PARTIAL_COLUMN_READ | \
5552 5553
                HA_HAS_OWN_BINLOGGING | \
                HA_HAS_RECORDS
5554

5555
ha_ndbcluster::ha_ndbcluster(TABLE_SHARE *table_arg):
5556
  handler(ndbcluster_hton, table_arg),
5557 5558 5559
  m_active_trans(NULL),
  m_active_cursor(NULL),
  m_table(NULL),
5560
  m_table_info(NULL),
5561
  m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS),
5562
  m_share(0),
5563 5564 5565
  m_part_info(NULL),
  m_use_partition_function(FALSE),
  m_sorted(FALSE),
5566
  m_use_write(FALSE),
5567
  m_ignore_dup_key(FALSE),
5568
  m_has_unique_index(FALSE),
5569
  m_primary_key_update(FALSE),
5570
  m_ignore_no_key(FALSE),
5571 5572 5573
  m_rows_to_insert((ha_rows) 1),
  m_rows_inserted((ha_rows) 0),
  m_bulk_insert_rows((ha_rows) 1024),
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5574
  m_rows_changed((ha_rows) 0),
5575 5576 5577 5578
  m_bulk_insert_not_flushed(FALSE),
  m_ops_pending(0),
  m_skip_auto_increment(TRUE),
  m_blobs_pending(0),
5579
  m_blobs_offset(0),
5580 5581
  m_blobs_buffer(0),
  m_blobs_buffer_size(0),
5582 5583 5584
  m_dupkey((uint) -1),
  m_ha_not_exact_count(FALSE),
  m_force_send(TRUE),
5585
  m_autoincrement_prefetch((ha_rows) 32),
5586
  m_transaction_on(TRUE),
mskold@mysql.com's avatar
mskold@mysql.com committed
5587 5588
  m_cond_stack(NULL),
  m_multi_cursor(NULL)
5589
{
5590
  int i;
5591
 
5592 5593 5594 5595 5596
  DBUG_ENTER("ha_ndbcluster");

  m_tabname[0]= '\0';
  m_dbname[0]= '\0';

5597 5598
  stats.records= ~(ha_rows)0; // uninitialized
  stats.block_size= 1024;
5599

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
5600 5601
  for (i= 0; i < MAX_KEY; i++)
    ndb_init_index(m_index[i]);
5602

5603 5604 5605 5606
  DBUG_VOID_RETURN;
}


5607 5608 5609 5610 5611 5612 5613 5614 5615 5616
int ha_ndbcluster::ha_initialise()
{
  DBUG_ENTER("ha_ndbcluster::ha_initialise");
  if (check_ndb_in_thd(current_thd))
  {
    DBUG_RETURN(FALSE);
  }
  DBUG_RETURN(TRUE);
}

5617 5618 5619 5620 5621 5622
/*
  Destructor for NDB Cluster table handler
 */

ha_ndbcluster::~ha_ndbcluster() 
{
5623 5624
  THD *thd= current_thd;
  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
5625 5626
  DBUG_ENTER("~ha_ndbcluster");

5627
  if (m_share)
5628 5629 5630
  {
    free_share(&m_share);
  }
5631
  release_metadata(thd, ndb);
5632 5633
  my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
  m_blobs_buffer= 0;
5634 5635

  // Check for open cursor/transaction
5636 5637
  if (m_active_cursor) {
  }
5638
  DBUG_ASSERT(m_active_cursor == NULL);
5639 5640
  if (m_active_trans) {
  }
5641 5642
  DBUG_ASSERT(m_active_trans == NULL);

5643 5644 5645 5646
  // Discard the condition stack
  DBUG_PRINT("info", ("Clearing condition stack"));
  cond_clear();

5647 5648 5649 5650
  DBUG_VOID_RETURN;
}


mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5651

5652 5653 5654 5655
/*
  Open a table for further use
  - fetch metadata for this table from NDB
  - check that table exists
5656 5657 5658 5659

  RETURN
    0    ok
    < 0  Table has changed
5660 5661 5662 5663
*/

int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
{
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5664
  int res;
5665
  KEY *key;
5666 5667 5668
  DBUG_ENTER("ha_ndbcluster::open");
  DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",
                       name, mode, test_if_locked));
5669
  
5670 5671 5672 5673
  /*
    Setup ref_length to make room for the whole 
    primary key to be written in the ref variable
  */
5674
  
5675
  if (table_share->primary_key != MAX_KEY) 
5676
  {
5677
    key= table->key_info+table_share->primary_key;
5678 5679
    ref_length= key->key_length;
  }
5680 5681 5682 5683 5684 5685 5686 5687 5688 5689
  else // (table_share->primary_key == MAX_KEY) 
  {
    if (m_use_partition_function)
    {
      ref_length+= sizeof(m_part_id);
    }
  }

  DBUG_PRINT("info", ("ref_length: %d", ref_length));

5690
  // Init table lock structure 
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5691
  if (!(m_share=get_share(name, table)))
5692 5693 5694 5695 5696 5697
    DBUG_RETURN(1);
  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
  
  set_dbname(name);
  set_tabname(name);
  
5698
  if (check_ndb_connection()) {
5699 5700
    free_share(&m_share);
    m_share= 0;
5701
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
5702
  }
5703
  
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5704 5705 5706
  res= get_metadata(name);
  if (!res)
    info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
5707

5708 5709 5710 5711 5712
#ifdef HAVE_NDB_BINLOG
  if (!ndb_binlog_tables_inited && ndb_binlog_running)
    table->db_stat|= HA_READ_ONLY;
#endif

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5713
  DBUG_RETURN(res);
5714 5715
}

5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729
/*
  Set partition info

  SYNOPSIS
    set_part_info()
    part_info

  RETURN VALUE
    NONE

  DESCRIPTION
    Set up partition info when handler object created
*/

5730 5731 5732 5733 5734
void ha_ndbcluster::set_part_info(partition_info *part_info)
{
  m_part_info= part_info;
  if (!(m_part_info->part_type == HASH_PARTITION &&
        m_part_info->list_of_part_fields &&
5735
        !m_part_info->is_sub_partitioned()))
5736 5737
    m_use_partition_function= TRUE;
}
5738 5739 5740 5741 5742 5743 5744 5745

/*
  Close the table
  - release resources setup by open()
 */

int ha_ndbcluster::close(void)
{
5746 5747 5748
  DBUG_ENTER("close");
  THD *thd= current_thd;
  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
5749 5750
  free_share(&m_share);
  m_share= 0;
5751
  release_metadata(thd, ndb);
5752 5753 5754 5755
  DBUG_RETURN(0);
}


5756
Thd_ndb* ha_ndbcluster::seize_thd_ndb()
5757
{
5758 5759
  Thd_ndb *thd_ndb;
  DBUG_ENTER("seize_thd_ndb");
5760

5761 5762
  thd_ndb= new Thd_ndb();
  if (thd_ndb->ndb->init(max_transactions) != 0)
5763
  {
5764
    ERR_PRINT(thd_ndb->ndb->getNdbError());
5765 5766 5767 5768 5769 5770
    /*
      TODO 
      Alt.1 If init fails because to many allocated Ndb 
      wait on condition for a Ndb object to be released.
      Alt.2 Seize/release from pool, wait until next release 
    */
5771 5772
    delete thd_ndb;
    thd_ndb= NULL;
5773
  }
5774
  DBUG_RETURN(thd_ndb);
5775 5776 5777
}


5778
void ha_ndbcluster::release_thd_ndb(Thd_ndb* thd_ndb)
5779
{
5780 5781
  DBUG_ENTER("release_thd_ndb");
  delete thd_ndb;
5782 5783 5784 5785 5786
  DBUG_VOID_RETURN;
}


/*
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
5787
  If this thread already has a Thd_ndb object allocated
5788
  in current THD, reuse it. Otherwise
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
5789
  seize a Thd_ndb object, assign it to current THD and use it.
5790 5791 5792
 
*/

5793
Ndb* check_ndb_in_thd(THD* thd)
5794
{
5795
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
5796
  if (!thd_ndb)
5797
  {
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
5798
    if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5799
      return NULL;
5800
    set_thd_ndb(thd, thd_ndb);
5801
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5802
  return thd_ndb->ndb;
5803 5804
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
5805

5806

5807
int ha_ndbcluster::check_ndb_connection(THD* thd)
5808
{
5809
  Ndb *ndb;
5810 5811
  DBUG_ENTER("check_ndb_connection");
  
5812
  if (!(ndb= check_ndb_in_thd(thd)))
5813
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
5814
  ndb->setDatabaseName(m_dbname);
5815 5816 5817
  DBUG_RETURN(0);
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
5818

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5819
static int ndbcluster_close_connection(THD *thd)
5820
{
5821
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
5822
  DBUG_ENTER("ndbcluster_close_connection");
5823 5824
  if (thd_ndb)
  {
5825
    ha_ndbcluster::release_thd_ndb(thd_ndb);
5826
    set_thd_ndb(thd, NULL); // not strictly required but does not hurt either
5827
  }
5828
  DBUG_RETURN(0);
5829 5830 5831 5832 5833 5834 5835
}


/*
  Try to discover one table from NDB
 */

5836
int ndbcluster_discover(THD* thd, const char *db, const char *name,
5837
                        const void** frmblob, uint* frmlen)
5838
{
5839 5840
  int error= 0;
  NdbError ndb_error;
5841 5842
  uint len;
  const void* data;
5843
  Ndb* ndb;
5844
  char key[FN_REFLEN];
5845
  DBUG_ENTER("ndbcluster_discover");
5846
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 
5847

5848 5849 5850 5851
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);  
  ndb->setDatabaseName(db);
  NDBDICT* dict= ndb->getDictionary();
5852
  build_table_filename(key, sizeof(key), db, name, "", 0);
5853
  NDB_SHARE *share= get_share(key, 0, false);
5854
  if (share && get_ndb_share_state(share) == NSS_ALTERED)
5855
  {
5856 5857 5858 5859
    // Frm has been altered on disk, but not yet written to ndb
    if (readfrm(key, &data, &len))
    {
      DBUG_PRINT("error", ("Could not read frm"));
5860 5861
      error= 1;
      goto err;
5862
    }
5863
  }
5864
  else
5865
  {
5866 5867 5868 5869
    Ndb_table_guard ndbtab_g(dict, name);
    const NDBTAB *tab= ndbtab_g.get_table();
    if (!tab)
    {
5870 5871
      const NdbError err= dict->getNdbError();
      if (err.code == 709 || err.code == 723)
5872 5873 5874 5875
        error= -1;
      else
        ndb_error= err;
      goto err;
5876 5877 5878 5879 5880 5881 5882
    }
    DBUG_PRINT("info", ("Found table %s", tab->getName()));
    
    len= tab->getFrmLength();  
    if (len == 0 || tab->getFrmData() == NULL)
    {
      DBUG_PRINT("error", ("No frm data found."));
5883 5884
      error= 1;
      goto err;
5885 5886 5887 5888 5889
    }
    
    if (unpackfrm(&data, &len, tab->getFrmData()))
    {
      DBUG_PRINT("error", ("Could not unpack table"));
5890 5891
      error= 1;
      goto err;
5892
    }
5893
  }
5894 5895 5896 5897

  *frmlen= len;
  *frmblob= data;
  
5898 5899 5900
  if (share)
    free_share(&share);

5901
  DBUG_RETURN(0);
5902 5903 5904 5905 5906 5907 5908 5909
err:
  if (share)
    free_share(&share);
  if (ndb_error.code)
  {
    ERR_RETURN(ndb_error);
  }
  DBUG_RETURN(error);
5910 5911 5912
}

/*
5913
  Check if a table exists in NDB
5914

5915
 */
5916

5917 5918
int ndbcluster_table_exists_in_engine(THD* thd, const char *db,
                                      const char *name)
5919 5920
{
  Ndb* ndb;
5921
  DBUG_ENTER("ndbcluster_table_exists_in_engine");
5922
  DBUG_PRINT("enter", ("db: %s  name: %s", db, name));
5923 5924

  if (!(ndb= check_ndb_in_thd(thd)))
5925
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
5926 5927

  NDBDICT* dict= ndb->getDictionary();
5928 5929 5930
  NdbDictionary::Dictionary::List list;
  if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
    ERR_RETURN(dict->getNdbError());
5931
  for (uint i= 0 ; i < list.count ; i++)
5932
  {
5933 5934 5935 5936 5937 5938 5939
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    if (my_strcasecmp(system_charset_info, elmt.database, db))
      continue;
    if (my_strcasecmp(system_charset_info, elmt.name, name))
      continue;
    DBUG_PRINT("info", ("Found table"));
    DBUG_RETURN(1);
5940
  }
5941
  DBUG_RETURN(0);
5942 5943
}

5944 5945


5946
extern "C" byte* tables_get_key(const char *entry, uint *length,
5947
                                my_bool not_used __attribute__((unused)))
5948 5949 5950 5951 5952 5953
{
  *length= strlen(entry);
  return (byte*) entry;
}


5954 5955
/*
  Drop a database in NDB Cluster
5956 5957
  NOTE add a dummy void function, since stupid handlerton is returning void instead of int...
*/
5958

5959
int ndbcluster_drop_database_impl(const char *path)
5960 5961 5962 5963 5964 5965 5966 5967 5968
{
  DBUG_ENTER("ndbcluster_drop_database");
  THD *thd= current_thd;
  char dbname[FN_HEADLEN];
  Ndb* ndb;
  NdbDictionary::Dictionary::List list;
  uint i;
  char *tabname;
  List<char> drop_list;
5969
  int ret= 0;
5970 5971 5972 5973
  ha_ndbcluster::set_dbname(path, (char *)&dbname);
  DBUG_PRINT("enter", ("db: %s", dbname));
  
  if (!(ndb= check_ndb_in_thd(thd)))
5974
    DBUG_RETURN(-1);
5975 5976 5977 5978 5979
  
  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
5980
    DBUG_RETURN(-1);
5981 5982
  for (i= 0 ; i < list.count ; i++)
  {
5983 5984
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));     
5985 5986
    
    // Add only tables that belongs to db
5987
    if (my_strcasecmp(system_charset_info, elmt.database, dbname))
5988
      continue;
5989 5990
    DBUG_PRINT("info", ("%s must be dropped", elmt.name));     
    drop_list.push_back(thd->strdup(elmt.name));
5991 5992
  }
  // Drop any tables belonging to database
5993
  char full_path[FN_REFLEN];
5994
  char *tmp= full_path +
5995
    build_table_filename(full_path, sizeof(full_path), dbname, "", "", 0);
5996

5997 5998 5999
  ndb->setDatabaseName(dbname);
  List_iterator_fast<char> it(drop_list);
  while ((tabname=it++))
6000
  {
6001
    tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1);
6002
    VOID(pthread_mutex_lock(&LOCK_open));
6003
    if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname))
6004 6005
    {
      const NdbError err= dict->getNdbError();
6006
      if (err.code != 709 && err.code != 723)
6007 6008
      {
        ERR_PRINT(err);
6009
        ret= ndb_to_mysql_error(&err);
6010
      }
6011
    }
6012
    VOID(pthread_mutex_unlock(&LOCK_open));
6013 6014
  }
  DBUG_RETURN(ret);      
6015 6016
}

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6017
static void ndbcluster_drop_database(char *path)
6018
{
6019
  THD *thd= current_thd;
6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032
  DBUG_ENTER("ndbcluster_drop_database");
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow drop database unless
    schema distribution table is setup
  */
  if (!schema_share)
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_VOID_RETURN;
    //DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
#endif
6033
  ndbcluster_drop_database_impl(path);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6034 6035 6036
#ifdef HAVE_NDB_BINLOG
  char db[FN_REFLEN];
  ha_ndbcluster::set_dbname(path, db);
6037 6038
  ndbcluster_log_schema_op(thd, 0,
                           thd->query, thd->query_length,
6039
                           db, "", 0, 0, SOT_DROP_DB, 0, 0, 0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6040
#endif
6041
  DBUG_VOID_RETURN;
6042
}
6043 6044 6045
/*
  find all tables in ndb and discover those needed
*/
6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057
int ndb_create_table_from_engine(THD *thd, const char *db,
                                 const char *table_name)
{
  LEX *old_lex= thd->lex, newlex;
  thd->lex= &newlex;
  newlex.current_select= NULL;
  lex_start(thd, (const uchar*) "", 0);
  int res= ha_create_table_from_engine(thd, db, table_name);
  thd->lex= old_lex;
  return res;
}

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6058
int ndbcluster_find_all_files(THD *thd)
6059 6060 6061 6062 6063 6064 6065 6066 6067 6068
{
  DBUG_ENTER("ndbcluster_find_all_files");
  Ndb* ndb;
  char key[FN_REFLEN];

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  NDBDICT *dict= ndb->getDictionary();

6069
  int unhandled, retries= 5, skipped;
6070 6071
  LINT_INIT(unhandled);
  LINT_INIT(skipped);
6072 6073
  do
  {
jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
6074
    NdbDictionary::Dictionary::List list;
6075 6076 6077
    if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
      ERR_RETURN(dict->getNdbError());
    unhandled= 0;
6078 6079
    skipped= 0;
    retries--;
6080 6081 6082
    for (uint i= 0 ; i < list.count ; i++)
    {
      NDBDICT::List::Element& elmt= list.elements[i];
6083
      if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6084 6085 6086 6087
      {
        DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
        continue;
      }
6088
      DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name));
6089 6090 6091
      if (elmt.state != NDBOBJ::StateOnline &&
          elmt.state != NDBOBJ::StateBackup &&
          elmt.state != NDBOBJ::StateBuilding)
6092 6093 6094
      {
        sql_print_information("NDB: skipping setup table %s.%s, in state %d",
                              elmt.database, elmt.name, elmt.state);
6095
        skipped++;
6096 6097 6098 6099
        continue;
      }

      ndb->setDatabaseName(elmt.database);
6100 6101 6102
      Ndb_table_guard ndbtab_g(dict, elmt.name);
      const NDBTAB *ndbtab= ndbtab_g.get_table();
      if (!ndbtab)
6103
      {
6104
        if (retries == 0)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6105 6106 6107 6108
          sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s",
                          elmt.database, elmt.name,
                          dict->getNdbError().code,
                          dict->getNdbError().message);
6109 6110 6111 6112 6113 6114 6115
        unhandled++;
        continue;
      }

      if (ndbtab->getFrmLength() == 0)
        continue;
    
6116
      /* check if database exists */
6117
      char *end= key +
6118
        build_table_filename(key, sizeof(key), elmt.database, "", "", 0);
6119 6120 6121 6122 6123
      if (my_access(key, F_OK))
      {
        /* no such database defined, skip table */
        continue;
      }
6124 6125 6126
      /* finalize construction of path */
      end+= tablename_to_filename(elmt.name, end,
                                  sizeof(key)-(end-key));
6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138
      const void *data= 0, *pack_data= 0;
      uint length, pack_length;
      int discover= 0;
      if (readfrm(key, &data, &length) ||
          packfrm(data, length, &pack_data, &pack_length))
      {
        discover= 1;
        sql_print_information("NDB: missing frm for %s.%s, discovering...",
                              elmt.database, elmt.name);
      }
      else if (cmp_frm(ndbtab, pack_data, pack_length))
      {
6139
        NDB_SHARE *share= get_share(key, 0, false);
6140
        if (!share || get_ndb_share_state(share) != NSS_ALTERED)
6141 6142 6143 6144 6145
        {
          discover= 1;
          sql_print_information("NDB: mismatch in frm for %s.%s, discovering...",
                                elmt.database, elmt.name);
        }
6146 6147
        if (share)
          free_share(&share);
6148 6149 6150 6151
      }
      my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR));
      my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR));

6152
      pthread_mutex_lock(&LOCK_open);
6153 6154 6155
      if (discover)
      {
        /* ToDo 4.1 database needs to be created if missing */
6156
        if (ndb_create_table_from_engine(thd, elmt.database, elmt.name))
6157 6158 6159 6160
        {
          /* ToDo 4.1 handle error */
        }
      }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6161
#ifdef HAVE_NDB_BINLOG
6162
      else
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6163 6164
      {
        /* set up replication for this table */
6165 6166 6167
        ndbcluster_create_binlog_setup(ndb, key, end-key,
                                       elmt.database, elmt.name,
                                       TRUE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6168 6169
      }
#endif
6170
      pthread_mutex_unlock(&LOCK_open);
6171 6172
    }
  }
6173
  while (unhandled && retries);
6174

6175
  DBUG_RETURN(-(skipped + unhandled));
6176
}
6177

6178
int ndbcluster_find_files(THD *thd,const char *db,const char *path,
6179
                          const char *wild, bool dir, List<char> *files)
6180
{
6181 6182 6183
  DBUG_ENTER("ndbcluster_find_files");
  DBUG_PRINT("enter", ("db: %s", db));
  { // extra bracket to avoid gcc 2.95.3 warning
6184
  uint i;
6185
  Ndb* ndb;
6186
  char name[FN_REFLEN];
6187
  HASH ndb_tables, ok_tables;
6188
  NDBDICT::List list;
6189 6190 6191 6192

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

6193
  if (dir)
6194
    DBUG_RETURN(0); // Discover of databases not yet supported
6195

6196
  // List tables in NDB
6197
  NDBDICT *dict= ndb->getDictionary();
6198
  if (dict->listObjects(list, 
6199
                        NdbDictionary::Object::UserTable) != 0)
6200
    ERR_RETURN(dict->getNdbError());
6201

6202
  if (hash_init(&ndb_tables, system_charset_info,list.count,0,0,
6203
                (hash_get_key)tables_get_key,0,0))
6204 6205 6206 6207 6208 6209
  {
    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
    DBUG_RETURN(-1);
  }

  if (hash_init(&ok_tables, system_charset_info,32,0,0,
6210
                (hash_get_key)tables_get_key,0,0))
6211 6212 6213 6214 6215 6216
  {
    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
    hash_free(&ndb_tables);
    DBUG_RETURN(-1);
  }  

6217 6218
  for (i= 0 ; i < list.count ; i++)
  {
6219
    NDBDICT::List::Element& elmt= list.elements[i];
6220
    if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6221 6222 6223 6224
    {
      DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
      continue;
    }
6225
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
6226

6227
    // Add only tables that belongs to db
6228
    if (my_strcasecmp(system_charset_info, elmt.database, db))
6229
      continue;
6230

6231
    // Apply wildcard to list of tables in NDB
6232
    if (wild)
6233
    {
6234 6235
      if (lower_case_table_names)
      {
6236
        if (wild_case_compare(files_charset_info, elmt.name, wild))
6237
          continue;
6238
      }
6239
      else if (wild_compare(elmt.name,wild,0))
6240
        continue;
6241
    }
6242 6243
    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));     
    my_hash_insert(&ndb_tables, (byte*)thd->strdup(elmt.name));
6244 6245
  }

6246 6247 6248 6249 6250
  char *file_name;
  List_iterator<char> it(*files);
  List<char> delete_list;
  while ((file_name=it++))
  {
6251
    bool file_on_disk= false;
6252 6253 6254 6255
    DBUG_PRINT("info", ("%s", file_name));     
    if (hash_search(&ndb_tables, file_name, strlen(file_name)))
    {
      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name));
6256
      file_on_disk= true;
6257 6258
    }
    
6259
    // Check for .ndb file with this name
6260
    build_table_filename(name, sizeof(name), db, file_name, ha_ndb_ext, 0);
6261
    DBUG_PRINT("info", ("Check access for %s", name));
6262
    if (my_access(name, F_OK))
6263 6264 6265
    {
      DBUG_PRINT("info", ("%s did not exist on disk", name));     
      // .ndb file did not exist on disk, another table type
6266
      if (file_on_disk)
6267 6268 6269 6270 6271
      {
	// Ignore this ndb table
	gptr record=  hash_search(&ndb_tables, file_name, strlen(file_name));
	DBUG_ASSERT(record);
	hash_delete(&ndb_tables, record);
6272 6273 6274 6275
	push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
			    ER_TABLE_EXISTS_ERROR,
			    "Local table %s.%s shadows ndb table",
			    db, file_name);
6276
      }
6277 6278 6279 6280
      continue;
    }
    if (file_on_disk) 
    {
6281
      // File existed in NDB and as frm file, put in ok_tables list
6282
      my_hash_insert(&ok_tables, (byte*)file_name);
6283
      continue;
6284
    }
6285 6286 6287
    DBUG_PRINT("info", ("%s existed on disk", name));     
    // The .ndb file exists on disk, but it's not in list of tables in ndb
    // Verify that handler agrees table is gone.
6288
    if (ndbcluster_table_exists_in_engine(thd, db, file_name) == 0)    
6289 6290 6291 6292 6293 6294 6295
    {
      DBUG_PRINT("info", ("NDB says %s does not exists", file_name));     
      it.remove();
      // Put in list of tables to remove from disk
      delete_list.push_back(thd->strdup(file_name));
    }
  }
6296

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6297 6298 6299
#ifdef HAVE_NDB_BINLOG
  /* setup logging to binlog for all discovered tables */
  {
6300
    char *end, *end1= name +
6301
      build_table_filename(name, sizeof(name), db, "", "", 0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6302 6303 6304
    for (i= 0; i < ok_tables.records; i++)
    {
      file_name= (char*)hash_element(&ok_tables, i);
6305 6306
      end= end1 +
        tablename_to_filename(file_name, end1, sizeof(name) - (end1 - name));
6307 6308 6309 6310
      pthread_mutex_lock(&LOCK_open);
      ndbcluster_create_binlog_setup(ndb, name, end-name,
                                     db, file_name, TRUE);
      pthread_mutex_unlock(&LOCK_open);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6311 6312 6313 6314
    }
  }
#endif

6315 6316 6317 6318
  // Check for new files to discover
  DBUG_PRINT("info", ("Checking for new files to discover"));       
  List<char> create_list;
  for (i= 0 ; i < ndb_tables.records ; i++)
6319
  {
6320 6321
    file_name= hash_element(&ndb_tables, i);
    if (!hash_search(&ok_tables, file_name, strlen(file_name)))
6322
    {
6323
      build_table_filename(name, sizeof(name), db, file_name, reg_ext, 0);
6324
      if (my_access(name, F_OK))
6325 6326 6327 6328 6329 6330
      {
        DBUG_PRINT("info", ("%s must be discovered", file_name));
        // File is in list of ndb tables and not in ok_tables
        // This table need to be created
        create_list.push_back(thd->strdup(file_name));
      }
6331 6332
    }
  }
6333

6334 6335
  // Lock mutex before deleting and creating frm files
  pthread_mutex_lock(&LOCK_open);
6336

6337 6338 6339 6340 6341
  if (!global_read_lock)
  {
    // Delete old files
    List_iterator_fast<char> it3(delete_list);
    while ((file_name=it3++))
6342 6343
    {
      DBUG_PRINT("info", ("Remove table %s/%s", db, file_name));
6344 6345 6346 6347
      // Delete the table and all related files
      TABLE_LIST table_list;
      bzero((char*) &table_list,sizeof(table_list));
      table_list.db= (char*) db;
6348
      table_list.alias= table_list.table_name= (char*)file_name;
6349
      (void)mysql_rm_table_part2(thd, &table_list,
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6350 6351 6352 6353
                                                                 /* if_exists */ FALSE,
                                                                 /* drop_temporary */ FALSE,
                                                                 /* drop_view */ FALSE,
                                                                 /* dont_log_query*/ TRUE);
6354 6355
      /* Clear error message that is returned when table is deleted */
      thd->clear_error();
6356 6357 6358
    }
  }

6359 6360 6361 6362
  // Create new files
  List_iterator_fast<char> it2(create_list);
  while ((file_name=it2++))
  {  
6363
    DBUG_PRINT("info", ("Table %s need discovery", file_name));
6364
    if (ndb_create_table_from_engine(thd, db, file_name) == 0)
6365
      files->push_back(thd->strdup(file_name)); 
6366 6367
  }

6368
  pthread_mutex_unlock(&LOCK_open);
6369 6370
  
  hash_free(&ok_tables);
6371
  hash_free(&ndb_tables);
6372
  } // extra bracket to avoid gcc 2.95.3 warning
6373
  DBUG_RETURN(0);    
6374 6375 6376 6377 6378 6379 6380 6381
}


/*
  Initialise all gloal variables before creating 
  a NDB Cluster table handler
 */

6382 6383 6384 6385
/* Call back after cluster connect */
static int connect_callback()
{
  update_status_variables(g_ndb_cluster_connection);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6386 6387 6388 6389 6390 6391 6392

  uint node_id, i= 0;
  Ndb_cluster_connection_node_iter node_iter;
  memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map));
  while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter)))
    g_node_id_map[node_id]= i++;

6393
  pthread_cond_signal(&COND_ndb_util_thread);
6394 6395 6396
  return 0;
}

6397
extern int ndb_dictionary_is_mysqld;
6398

6399
static int ndbcluster_init(void *p)
6400
{
6401
  int res;
6402
  DBUG_ENTER("ndbcluster_init");
6403

6404
  ndb_dictionary_is_mysqld= 1;
6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421
  ndbcluster_hton= (handlerton *)p;

  {
    handlerton *h= ndbcluster_hton;
    h->state=            have_ndbcluster;
    h->db_type=          DB_TYPE_NDBCLUSTER;
    h->close_connection= ndbcluster_close_connection;
    h->commit=           ndbcluster_commit;
    h->rollback=         ndbcluster_rollback;
    h->create=           ndbcluster_create_handler; /* Create a new handler */
    h->drop_database=    ndbcluster_drop_database;  /* Drop a database */
    h->panic=            ndbcluster_end;            /* Panic call */
    h->show_status=      ndbcluster_show_status;    /* Show status */
    h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */
    h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */
    h->alter_table_flags=ndbcluster_alter_table_flags; /* Alter table flags */
    h->fill_files_table= ndbcluster_fill_files_table;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6422 6423 6424
#ifdef HAVE_NDB_BINLOG
    ndbcluster_binlog_init_handlerton();
#endif
6425 6426 6427 6428
    h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED;
    h->discover=         ndbcluster_discover;
    h->find_files= ndbcluster_find_files;
    h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6429 6430
  }

6431 6432 6433
  if (have_ndbcluster != SHOW_OPTION_YES)
    DBUG_RETURN(0); // nothing else to do

6434 6435 6436
  // Initialize ndb interface
  ndb_init_internal();

6437
  // Set connectstring if specified
6438 6439
  if (opt_ndbcluster_connectstring != 0)
    DBUG_PRINT("connectstring", ("%s", opt_ndbcluster_connectstring));     
6440
  if ((g_ndb_cluster_connection=
6441
       new Ndb_cluster_connection(opt_ndbcluster_connectstring)) == 0)
6442
  {
6443
    DBUG_PRINT("error",("Ndb_cluster_connection(%s)",
6444
                        opt_ndbcluster_connectstring));
6445
    goto ndbcluster_init_error;
6446
  }
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
6447 6448 6449 6450 6451
  {
    char buf[128];
    my_snprintf(buf, sizeof(buf), "mysqld --server-id=%d", server_id);
    g_ndb_cluster_connection->set_name(buf);
  }
6452 6453 6454
  g_ndb_cluster_connection->set_optimized_node_selection
    (opt_ndb_optimized_node_selection);

6455
  // Create a Ndb object to open the connection  to NDB
6456 6457 6458 6459 6460
  if ( (g_ndb= new Ndb(g_ndb_cluster_connection, "sys")) == 0 )
  {
    DBUG_PRINT("error", ("failed to create global ndb object"));
    goto ndbcluster_init_error;
  }
6461 6462 6463
  if (g_ndb->init() != 0)
  {
    ERR_PRINT (g_ndb->getNdbError());
6464
    goto ndbcluster_init_error;
6465
  }
6466

6467
  if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0)
6468
  {
6469
    connect_callback();
6470
    DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d",
6471 6472
                       g_ndb_cluster_connection->get_connected_host(),
                       g_ndb_cluster_connection->get_connected_port()));
6473
    g_ndb_cluster_connection->wait_until_ready(10,3);
6474
  } 
6475
  else if (res == 1)
6476
  {
6477
    if (g_ndb_cluster_connection->start_connect_thread(connect_callback)) 
6478
    {
6479
      DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()"));
6480 6481
      goto ndbcluster_init_error;
    }
6482
#ifndef DBUG_OFF
6483 6484
    {
      char buf[1024];
6485
      DBUG_PRINT("info",
6486 6487 6488 6489
                 ("NDBCLUSTER storage engine not started, "
                  "will connect using %s",
                  g_ndb_cluster_connection->
                  get_connectstring(buf,sizeof(buf))));
6490
    }
6491
#endif
6492
  }
6493
  else
6494 6495 6496
  {
    DBUG_ASSERT(res == -1);
    DBUG_PRINT("error", ("permanent error"));
6497
    goto ndbcluster_init_error;
6498
  }
6499
  
6500 6501 6502
  (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0,
                   (hash_get_key) ndbcluster_get_key,0,0);
  pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6503 6504
#ifdef HAVE_NDB_BINLOG
  /* start the ndb injector thread */
6505 6506
  if (ndbcluster_binlog_start())
    goto ndbcluster_init_error;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6507
#endif /* HAVE_NDB_BINLOG */
6508

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6509 6510
  pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&COND_ndb_util_thread, NULL);
6511

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6512

jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
6513
  ndb_cache_check_time = opt_ndb_cache_check_time;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6514 6515 6516 6517 6518
  // Create utility thread
  pthread_t tmp;
  if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
  {
    DBUG_PRINT("error", ("Could not create ndb utility thread"));
6519 6520 6521 6522
    hash_free(&ndbcluster_open_tables);
    pthread_mutex_destroy(&ndbcluster_mutex);
    pthread_mutex_destroy(&LOCK_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_thread);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6523 6524
    goto ndbcluster_init_error;
  }
6525

6526
  ndbcluster_inited= 1;
6527
  DBUG_RETURN(FALSE);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6528

6529
ndbcluster_init_error:
6530
  if (g_ndb)
6531 6532 6533 6534 6535
    delete g_ndb;
  g_ndb= NULL;
  if (g_ndb_cluster_connection)
    delete g_ndb_cluster_connection;
  g_ndb_cluster_connection= NULL;
6536
  have_ndbcluster= SHOW_OPTION_DISABLED;	// If we couldn't use handler
6537 6538
  ndbcluster_hton->state= SHOW_OPTION_DISABLED;               // If we couldn't use handler

6539
  DBUG_RETURN(TRUE);
6540 6541
}

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6542
static int ndbcluster_end(ha_panic_function type)
6543 6544
{
  DBUG_ENTER("ndbcluster_end");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6545

6546 6547 6548
  if (!ndbcluster_inited)
    DBUG_RETURN(0);

6549 6550 6551
#ifdef HAVE_NDB_BINLOG
  {
    pthread_mutex_lock(&ndbcluster_mutex);
6552
    while (ndbcluster_open_tables.records)
6553 6554
    {
      NDB_SHARE *share=
6555
        (NDB_SHARE*) hash_element(&ndbcluster_open_tables, 0);
6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566
#ifndef DBUG_OFF
      fprintf(stderr, "NDB: table share %s with use_count %d not freed\n",
              share->key, share->use_count);
#endif
      real_free_share(&share);
    }
    pthread_mutex_unlock(&ndbcluster_mutex);
  }
#endif
  hash_free(&ndbcluster_open_tables);

6567
  if (g_ndb)
6568 6569
  {
#ifndef DBUG_OFF
6570 6571
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
6572 6573 6574 6575 6576 6577 6578 6579 6580 6581
    while (g_ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
6582
    delete g_ndb;
6583
    g_ndb= NULL;
6584
  }
6585
  delete g_ndb_cluster_connection;
6586
  g_ndb_cluster_connection= NULL;
6587

6588 6589 6590
  // cleanup ndb interface
  ndb_end_internal();

6591
  pthread_mutex_destroy(&ndbcluster_mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6592 6593
  pthread_mutex_destroy(&LOCK_ndb_util_thread);
  pthread_cond_destroy(&COND_ndb_util_thread);
6594 6595 6596 6597
  ndbcluster_inited= 0;
  DBUG_RETURN(0);
}

6598 6599 6600 6601 6602 6603
void ha_ndbcluster::print_error(int error, myf errflag)
{
  DBUG_ENTER("ha_ndbcluster::print_error");
  DBUG_PRINT("enter", ("error = %d", error));

  if (error == HA_ERR_NO_PARTITION_FOUND)
6604
    m_part_info->print_no_partition_found(table);
6605 6606 6607 6608 6609 6610
  else
    handler::print_error(error, errflag);
  DBUG_VOID_RETURN;
}


6611 6612 6613 6614 6615
/*
  Static error print function called from
  static handler method ndbcluster_commit
  and ndbcluster_rollback
*/
6616 6617

void ndbcluster_print_error(int error, const NdbOperation *error_op)
6618
{
6619
  DBUG_ENTER("ndbcluster_print_error");
6620
  TABLE_SHARE share;
6621
  const char *tab_name= (error_op) ? error_op->getTableName() : "";
6622 6623 6624 6625 6626
  share.db.str= (char*) "";
  share.db.length= 0;
  share.table_name.str= (char *) tab_name;
  share.table_name.length= strlen(tab_name);
  ha_ndbcluster error_handler(&share);
6627
  error_handler.print_error(error, MYF(0));
ndbdev@ndbmaster.mysql.com's avatar
ndbdev@ndbmaster.mysql.com committed
6628
  DBUG_VOID_RETURN;
6629
}
6630

6631 6632 6633
/**
 * Set a given location from full pathname to database name
 *
6634
 */
6635
void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
6636
{
6637 6638 6639 6640
  char *end, *ptr, *tmp_name;
  char tmp_buff[FN_REFLEN];
 
  tmp_name= tmp_buff;
6641
  /* Scan name from the end */
6642 6643 6644 6645 6646 6647
  ptr= strend(path_name)-1;
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  ptr--;
  end= ptr;
6648 6649 6650 6651
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  uint name_len= end - ptr;
6652 6653
  memcpy(tmp_name, ptr + 1, name_len);
  tmp_name[name_len]= '\0';
6654 6655
#ifdef __WIN__
  /* Put to lower case */
6656
  
6657
  ptr= tmp_name;
6658 6659
  
  while (*ptr != '\0') {
6660
    *ptr= tolower(*ptr);
6661 6662 6663
    ptr++;
  }
#endif
6664
  filename_to_tablename(tmp_name, dbname, FN_REFLEN);
6665 6666
}

6667 6668 6669 6670 6671 6672 6673 6674 6675
/*
  Set m_dbname from full pathname to table file
 */

void ha_ndbcluster::set_dbname(const char *path_name)
{
  set_dbname(path_name, m_dbname);
}

6676 6677 6678 6679 6680 6681 6682
/**
 * Set a given location from full pathname to table file
 *
 */
void
ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
{
6683 6684 6685 6686
  char *end, *ptr, *tmp_name;
  char tmp_buff[FN_REFLEN];

  tmp_name= tmp_buff;
6687
  /* Scan name from the end */
6688 6689
  end= strend(path_name)-1;
  ptr= end;
6690 6691 6692
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
6693
  uint name_len= end - ptr;
6694 6695
  memcpy(tmp_name, ptr + 1, end - ptr);
  tmp_name[name_len]= '\0';
6696 6697
#ifdef __WIN__
  /* Put to lower case */
6698
  ptr= tmp_name;
6699 6700 6701 6702 6703 6704
  
  while (*ptr != '\0') {
    *ptr= tolower(*ptr);
    ptr++;
  }
#endif
6705
  filename_to_tablename(tmp_name, tabname, FN_REFLEN);
6706 6707 6708
}

/*
6709
  Set m_tabname from full pathname to table file 
6710 6711
 */

6712
void ha_ndbcluster::set_tabname(const char *path_name)
6713
{
6714
  set_tabname(path_name, m_tabname);
6715 6716 6717 6718
}


ha_rows 
6719 6720 6721 6722
ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
                                key_range *max_key)
{
  KEY *key_info= table->key_info + inx;
6723
  uint key_length= key_info->key_length;
6724
  NDB_INDEX_TYPE idx_type= get_index_type(inx);  
6725 6726

  DBUG_ENTER("records_in_range");
6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739
  // Prevent partial read of hash indexes by returning HA_POS_ERROR
  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
      ((min_key && min_key->length < key_length) ||
       (max_key && max_key->length < key_length)))
    DBUG_RETURN(HA_POS_ERROR);
  
  // Read from hash index with full key
  // This is a "const" table which returns only one record!      
  if ((idx_type != ORDERED_INDEX) &&
      ((min_key && min_key->length == key_length) || 
       (max_key && max_key->length == key_length)))
    DBUG_RETURN(1);
  
6740 6741 6742 6743 6744 6745
  if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
       idx_type == UNIQUE_ORDERED_INDEX ||
       idx_type == ORDERED_INDEX) &&
    m_index[inx].index_stat != NULL)
  {
    NDB_INDEX_DATA& d=m_index[inx];
6746
    const NDBINDEX* index= d.index;
6747 6748 6749 6750 6751 6752 6753 6754 6755 6756
    Ndb* ndb=get_ndb();
    NdbTransaction* trans=NULL;
    NdbIndexScanOperation* op=NULL;
    int res=0;
    Uint64 rows;

    do
    {
      // We must provide approx table rows
      Uint64 table_rows=0;
6757
      Ndb_local_table_statistics *info= m_table_info;
6758 6759 6760 6761 6762 6763 6764 6765
      if (info->records != ~(ha_rows)0 && info->records != 0)
      {
        table_rows = info->records;
        DBUG_PRINT("info", ("use info->records: %llu", table_rows));
      }
      else
      {
        Ndb_statistics stat;
6766
        if ((res=ndb_get_table_statistics(ndb, m_table, &stat)) != 0)
6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779
          break;
        table_rows=stat.row_count;
        DBUG_PRINT("info", ("use db row_count: %llu", table_rows));
        if (table_rows == 0) {
          // Problem if autocommit=0
#ifdef ndb_get_table_statistics_uses_active_trans
          rows=0;
          break;
#endif
        }
      }

      // Define scan op for the range
6780 6781
      if ((trans=m_active_trans) == NULL || 
	  trans->commitStatus() != NdbTransaction::Started)
6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817
      {
        DBUG_PRINT("info", ("no active trans"));
        if (! (trans=ndb->startTransaction()))
          ERR_BREAK(ndb->getNdbError(), res);
      }
      if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table)))
        ERR_BREAK(trans->getNdbError(), res);
      if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1)
        ERR_BREAK(op->getNdbError(), res);
      const key_range *keys[2]={ min_key, max_key };
      if ((res=set_bounds(op, inx, true, keys)) != 0)
        break;

      // Decide if db should be contacted
      int flags=0;
      if (d.index_stat_query_count < d.index_stat_cache_entries ||
          (d.index_stat_update_freq != 0 &&
           d.index_stat_query_count % d.index_stat_update_freq == 0))
      {
        DBUG_PRINT("info", ("force stat from db"));
        flags|=NdbIndexStat::RR_UseDb;
      }
      if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1)
        ERR_BREAK(d.index_stat->getNdbError(), res);
      d.index_stat_query_count++;
    } while (0);

    if (trans != m_active_trans && rows == 0)
      rows = 1;
    if (trans != m_active_trans && trans != NULL)
      ndb->closeTransaction(trans);
    if (res != 0)
      DBUG_RETURN(HA_POS_ERROR);
    DBUG_RETURN(rows);
  }

6818
  DBUG_RETURN(10); /* Good guess when you don't know anything */
6819 6820
}

6821
ulonglong ha_ndbcluster::table_flags(void) const
6822 6823
{
  if (m_ha_not_exact_count)
6824 6825
    return m_table_flags & ~HA_STATS_RECORDS_IS_EXACT;
  return m_table_flags;
6826 6827 6828
}
const char * ha_ndbcluster::table_type() const 
{
6829
  return("NDBCLUSTER");
6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846
}
uint ha_ndbcluster::max_supported_record_length() const
{ 
  return NDB_MAX_TUPLE_SIZE;
}
uint ha_ndbcluster::max_supported_keys() const
{
  return MAX_KEY;
}
uint ha_ndbcluster::max_supported_key_parts() const 
{
  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
}
uint ha_ndbcluster::max_supported_key_length() const
{
  return NDB_MAX_KEY_SIZE;
}
pekka@mysql.com's avatar
pekka@mysql.com committed
6847 6848 6849 6850
uint ha_ndbcluster::max_supported_key_part_length() const
{
  return NDB_MAX_KEY_SIZE;
}
6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871
bool ha_ndbcluster::low_byte_first() const
{ 
#ifdef WORDS_BIGENDIAN
  return FALSE;
#else
  return TRUE;
#endif
}
const char* ha_ndbcluster::index_type(uint key_number)
{
  switch (get_index_type(key_number)) {
  case ORDERED_INDEX:
  case UNIQUE_ORDERED_INDEX:
  case PRIMARY_KEY_ORDERED_INDEX:
    return "BTREE";
  case UNIQUE_INDEX:
  case PRIMARY_KEY_INDEX:
  default:
    return "HASH";
  }
}
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6872

6873 6874
uint8 ha_ndbcluster::table_cache_type()
{
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6875 6876 6877 6878 6879 6880
  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
}


uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname,
6881
                         Uint64 *commit_count)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6882
{
6883 6884
  char name[FN_REFLEN];
  NDB_SHARE *share;
6885 6886
  DBUG_ENTER("ndb_get_commitcount");

6887
  build_table_filename(name, sizeof(name), dbname, tabname, "", 0);
6888 6889 6890 6891 6892 6893 6894
  DBUG_PRINT("enter", ("name: %s", name));
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) name,
                                       strlen(name))))
  {
    pthread_mutex_unlock(&ndbcluster_mutex);
6895
    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name));
6896 6897 6898 6899 6900 6901
    DBUG_RETURN(1);
  }
  share->use_count++;
  pthread_mutex_unlock(&ndbcluster_mutex);

  pthread_mutex_lock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6902 6903
  if (ndb_cache_check_time > 0)
  {
6904
    if (share->commit_count != 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6905
    {
6906
      *commit_count= share->commit_count;
6907 6908 6909
      char buff[22];
      DBUG_PRINT("info", ("Getting commit_count: %s from share",
                          llstr(share->commit_count, buff)));
6910
      pthread_mutex_unlock(&share->mutex);
6911
      free_share(&share);
6912
      DBUG_RETURN(0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6913 6914
    }
  }
6915
  DBUG_PRINT("info", ("Get commit_count from NDB"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6916 6917 6918 6919
  Ndb *ndb;
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(1);
  ndb->setDatabaseName(dbname);
6920 6921
  uint lock= share->commit_count_lock;
  pthread_mutex_unlock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6922 6923

  struct Ndb_statistics stat;
6924
  {
6925 6926 6927 6928 6929 6930 6931
    Ndb_table_guard ndbtab_g(ndb->getDictionary(), tabname);
    if (ndbtab_g.get_table() == 0
        || ndb_get_table_statistics(ndb, ndbtab_g.get_table(), &stat))
    {
      free_share(&share);
      DBUG_RETURN(1);
    }
6932 6933 6934
  }

  pthread_mutex_lock(&share->mutex);
6935
  if (share->commit_count_lock == lock)
6936
  {
6937 6938 6939
    char buff[22];
    DBUG_PRINT("info", ("Setting commit_count to %s",
                        llstr(stat.commit_count, buff)));
6940 6941 6942 6943 6944 6945 6946 6947 6948
    share->commit_count= stat.commit_count;
    *commit_count= stat.commit_count;
  }
  else
  {
    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
    *commit_count= 0;
  }
  pthread_mutex_unlock(&share->mutex);
6949
  free_share(&share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985
  DBUG_RETURN(0);
}


/*
  Check if a cached query can be used.
  This is done by comparing the supplied engine_data to commit_count of
  the table.
  The commit_count is either retrieved from the share for the table, where
  it has been cached by the util thread. If the util thread is not started,
  NDB has to be contacetd to retrieve the commit_count, this will introduce
  a small delay while waiting for NDB to answer.


  SYNOPSIS
  ndbcluster_cache_retrieval_allowed
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1

    engine_data    parameter retrieved when query was first inserted into
                   the cache. If the value of engine_data is changed,
                   all queries for this table should be invalidated.

  RETURN VALUE
    TRUE  Yes, use the query from cache
    FALSE No, don't use the cached query, and if engine_data
          has changed, all queries for this table should be invalidated

*/

static my_bool
ndbcluster_cache_retrieval_allowed(THD *thd,
6986 6987
                                   char *full_name, uint full_name_len,
                                   ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6988 6989 6990 6991 6992
{
  Uint64 commit_count;
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
  char *dbname= full_name;
  char *tabname= dbname+strlen(dbname)+1;
6993 6994
  char buff[22], buff2[22];
  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
6995 6996
  DBUG_PRINT("enter", ("dbname: %s, tabname: %s, is_autocommit: %d",
                       dbname, tabname, is_autocommit));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6997 6998

  if (!is_autocommit)
6999 7000
  {
    DBUG_PRINT("exit", ("No, don't use cache in transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7001
    DBUG_RETURN(FALSE);
7002
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7003 7004 7005

  if (ndb_get_commitcount(thd, dbname, tabname, &commit_count))
  {
7006 7007
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7008 7009
    DBUG_RETURN(FALSE);
  }
7010 7011
  DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",
                      llstr(*engine_data, buff), llstr(commit_count, buff2)));
7012
  if (commit_count == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7013
  {
7014 7015
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, local commit has been performed"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7016 7017
    DBUG_RETURN(FALSE);
  }
7018 7019 7020 7021 7022 7023
  else if (*engine_data != commit_count)
  {
    *engine_data= commit_count; /* invalidate */
     DBUG_PRINT("exit", ("No, commit_count has changed"));
     DBUG_RETURN(FALSE);
   }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7024

7025 7026
  DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",
                      llstr(*engine_data, buff)));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054
  DBUG_RETURN(TRUE);
}


/**
   Register a table for use in the query cache. Fetch the commit_count
   for the table and return it in engine_data, this will later be used
   to check if the table has changed, before the cached query is reused.

   SYNOPSIS
   ha_ndbcluster::can_query_cache_table
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1
    qc_engine_callback  function to be called before using cache on this table
    engine_data    out, commit_count for this table

  RETURN VALUE
    TRUE  Yes, it's ok to cahce this query
    FALSE No, don't cach the query

*/

my_bool
ha_ndbcluster::register_query_cache_table(THD *thd,
7055 7056 7057
                                          char *full_name, uint full_name_len,
                                          qc_engine_callback *engine_callback,
                                          ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7058
{
7059 7060
  Uint64 commit_count;
  char buff[22];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7061
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
7062
  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
7063 7064 7065
  DBUG_PRINT("enter",("dbname: %s, tabname: %s, is_autocommit: %d",
		      m_dbname, m_tabname, is_autocommit));

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7066
  if (!is_autocommit)
7067
  {
serg@serg.mylan's avatar
serg@serg.mylan committed
7068
    DBUG_PRINT("exit", ("Can't register table during transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7069
    DBUG_RETURN(FALSE);
7070
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7071 7072 7073 7074

  if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count))
  {
    *engine_data= 0;
serg@serg.mylan's avatar
serg@serg.mylan committed
7075
    DBUG_PRINT("exit", ("Error, could not get commitcount"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7076 7077 7078 7079
    DBUG_RETURN(FALSE);
  }
  *engine_data= commit_count;
  *engine_callback= ndbcluster_cache_retrieval_allowed;
7080
  DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));
7081
  DBUG_RETURN(commit_count > 0);
7082
}
7083

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7084

7085
/*
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7086
  Handling the shared NDB_SHARE structure that is needed to
7087 7088 7089 7090 7091 7092
  provide table locking.
  It's also used for sharing data with other NDB handlers
  in the same MySQL Server. There is currently not much
  data we want to or can share.
 */

7093
static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
7094
                                my_bool not_used __attribute__((unused)))
7095
{
7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113
  *length= share->key_length;
  return (byte*) share->key;
}

#ifndef DBUG_OFF
static void dbug_print_open_tables()
{
  DBUG_ENTER("dbug_print_open_tables");
  for (uint i= 0; i < ndbcluster_open_tables.records; i++)
  {
    NDB_SHARE *share= (NDB_SHARE*) hash_element(&ndbcluster_open_tables, i);
    DBUG_PRINT("share",
               ("[%d] 0x%lx key: %s  key_length: %d",
                i, share, share->key, share->key_length));
    DBUG_PRINT("share",
               ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
                share->db, share->table_name,
                share->use_count, share->commit_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7114 7115 7116 7117 7118 7119
#ifdef HAVE_NDB_BINLOG
    if (share->table)
      DBUG_PRINT("share",
                 ("table->s->db.table_name: %s.%s",
                  share->table->s->db.str, share->table->s->table_name.str));
#endif
7120 7121
  }
  DBUG_VOID_RETURN;
7122
}
7123 7124 7125
#else
#define dbug_print_open_tables()
#endif
7126

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139
#ifdef HAVE_NDB_BINLOG
/*
  For some reason a share is still around, try to salvage the situation
  by closing all cached tables. If the share still exists, there is an
  error somewhere but only report this to the error log.  Keep this
  "trailing share" but rename it since there are still references to it
  to avoid segmentation faults.  There is a risk that the memory for
  this trailing share leaks.
  
  Must be called with previous pthread_mutex_lock(&ndbcluster_mutex)
*/
int handle_trailing_share(NDB_SHARE *share)
{
7140
  THD *thd= current_thd;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7141 7142 7143 7144 7145 7146
  static ulong trailing_share_id= 0;
  DBUG_ENTER("handle_trailing_share");

  ++share->use_count;
  pthread_mutex_unlock(&ndbcluster_mutex);

7147 7148 7149 7150
  TABLE_LIST table_list;
  bzero((char*) &table_list,sizeof(table_list));
  table_list.db= share->db;
  table_list.alias= table_list.table_name= share->table_name;
7151
  close_cached_tables(thd, 0, &table_list, TRUE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181

  pthread_mutex_lock(&ndbcluster_mutex);
  if (!--share->use_count)
  {
    DBUG_PRINT("info", ("NDB_SHARE: close_cashed_tables %s freed share.",
               share->key)); 
    real_free_share(&share);
    DBUG_RETURN(0);
  }

  /*
    share still exists, if share has not been dropped by server
    release that share
  */
  if (share->state != NSS_DROPPED && !--share->use_count)
  {
    DBUG_PRINT("info", ("NDB_SHARE: %s already exists, "
                        "use_count=%d  state != NSS_DROPPED.",
                        share->key, share->use_count)); 
    real_free_share(&share);
    DBUG_RETURN(0);
  }
  DBUG_PRINT("error", ("NDB_SHARE: %s already exists  use_count=%d.",
                       share->key, share->use_count));

  sql_print_error("NDB_SHARE: %s already exists  use_count=%d."
                  " Moving away for safety, but possible memleak.",
                  share->key, share->use_count);
  dbug_print_open_tables();

7182 7183 7184 7185 7186
  /*
    Ndb share has not been released as it should
  */
  DBUG_ASSERT(FALSE);

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275
  /*
    This is probably an error.  We can however save the situation
    at the cost of a possible mem leak, by "renaming" the share
    - First remove from hash
  */
  hash_delete(&ndbcluster_open_tables, (byte*) share);

  /*
    now give it a new name, just a running number
    if space is not enough allocate some more
  */
  {
    const uint min_key_length= 10;
    if (share->key_length < min_key_length)
    {
      share->key= alloc_root(&share->mem_root, min_key_length + 1);
      share->key_length= min_key_length;
    }
    share->key_length=
      my_snprintf(share->key, min_key_length + 1, "#leak%d",
                  trailing_share_id++);
  }
  /* Keep it for possible the future trailing free */
  my_hash_insert(&ndbcluster_open_tables, (byte*) share);

  DBUG_RETURN(0);
}

/*
  Rename share is used during rename table.
*/
static int rename_share(NDB_SHARE *share, const char *new_key)
{
  NDB_SHARE *tmp;
  pthread_mutex_lock(&ndbcluster_mutex);
  uint new_length= (uint) strlen(new_key);
  DBUG_PRINT("rename_share", ("old_key: %s  old__length: %d",
                              share->key, share->key_length));
  if ((tmp= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                     (byte*) new_key, new_length)))
    handle_trailing_share(tmp);

  /* remove the share from hash */
  hash_delete(&ndbcluster_open_tables, (byte*) share);
  dbug_print_open_tables();

  /* save old stuff if insert should fail */
  uint old_length= share->key_length;
  char *old_key= share->key;

  /*
    now allocate and set the new key, db etc
    enough space for key, db, and table_name
  */
  share->key= alloc_root(&share->mem_root, 2 * (new_length + 1));
  strmov(share->key, new_key);
  share->key_length= new_length;

  if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
  {
    // ToDo free the allocated stuff above?
    DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed",
                         share->key));
    share->key= old_key;
    share->key_length= old_length;
    if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
    {
      sql_print_error("rename_share: failed to recover %s", share->key);
      DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed",
                           share->key));
    }
    dbug_print_open_tables();
    pthread_mutex_unlock(&ndbcluster_mutex);
    return -1;
  }
  dbug_print_open_tables();

  share->db= share->key + new_length + 1;
  ha_ndbcluster::set_dbname(new_key, share->db);
  share->table_name= share->db + strlen(share->db) + 1;
  ha_ndbcluster::set_tabname(new_key, share->table_name);

  DBUG_PRINT("rename_share",
             ("0x%lx key: %s  key_length: %d",
              share, share->key, share->key_length));
  DBUG_PRINT("rename_share",
             ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
              share->db, share->table_name,
              share->use_count, share->commit_count));
7276
  if (share->table)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7277
  {
7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288
    DBUG_PRINT("rename_share",
               ("table->s->db.table_name: %s.%s",
                share->table->s->db.str, share->table->s->table_name.str));

    if (share->op == 0)
    {
      share->table->s->db.str= share->db;
      share->table->s->db.length= strlen(share->db);
      share->table->s->table_name.str= share->table_name;
      share->table->s->table_name.length= strlen(share->table_name);
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7289 7290 7291 7292 7293 7294 7295 7296 7297 7298
  }
  /* else rename will be handled when the ALTER event comes */
  share->old_names= old_key;
  // ToDo free old_names after ALTER EVENT

  pthread_mutex_unlock(&ndbcluster_mutex);
  return 0;
}
#endif

7299 7300 7301 7302
/*
  Increase refcount on existing share.
  Always returns share and cannot fail.
*/
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7303
NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
7304 7305
{
  pthread_mutex_lock(&ndbcluster_mutex);
7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320
  share->use_count++;

  dbug_print_open_tables();

  DBUG_PRINT("get_share",
             ("0x%lx key: %s  key_length: %d",
              share, share->key, share->key_length));
  DBUG_PRINT("get_share",
             ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
              share->db, share->table_name,
              share->use_count, share->commit_count));
  pthread_mutex_unlock(&ndbcluster_mutex);
  return share;
}

monty@mysql.com's avatar
monty@mysql.com committed
7321

7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335
/*
  Get a share object for key

  Returns share for key, and increases the refcount on the share.

  create_if_not_exists == TRUE:
    creates share if it does not alreade exist
    returns 0 only due to out of memory, and then sets my_error

  create_if_not_exists == FALSE:
    returns 0 if share does not exist

  have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken
*/
monty@mysql.com's avatar
monty@mysql.com committed
7336

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7337 7338 7339
NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
                                bool create_if_not_exists,
                                bool have_lock)
7340
{
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7341
  THD *thd= current_thd;
7342
  NDB_SHARE *share;
monty@mysql.com's avatar
monty@mysql.com committed
7343 7344 7345 7346
  uint length= (uint) strlen(key);
  DBUG_ENTER("ndbcluster_get_share");
  DBUG_PRINT("enter", ("key: '%s'", key));

7347 7348 7349 7350 7351
  if (!have_lock)
    pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                        (byte*) key,
                                        length)))
7352
  {
7353 7354 7355 7356 7357
    if (!create_if_not_exists)
    {
      DBUG_PRINT("error", ("get_share: %s does not exist", key));
      if (!have_lock)
        pthread_mutex_unlock(&ndbcluster_mutex);
7358
      DBUG_RETURN(0);
7359 7360
    }
    if ((share= (NDB_SHARE*) my_malloc(sizeof(*share),
7361 7362
                                       MYF(MY_WME | MY_ZEROFILL))))
    {
7363 7364 7365 7366 7367
      MEM_ROOT **root_ptr=
        my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC);
      MEM_ROOT *old_root= *root_ptr;
      init_sql_alloc(&share->mem_root, 1024, 0);
      *root_ptr= &share->mem_root; // remember to reset before return
7368
      share->state= NSS_INITIAL;
7369 7370 7371 7372
      /* enough space for key, db, and table_name */
      share->key= alloc_root(*root_ptr, 2 * (length + 1));
      share->key_length= length;
      strmov(share->key, key);
7373 7374
      if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
      {
7375 7376 7377 7378 7379
        free_root(&share->mem_root, MYF(0));
        my_free((gptr) share, 0);
        *root_ptr= old_root;
        if (!have_lock)
          pthread_mutex_unlock(&ndbcluster_mutex);
7380
        DBUG_RETURN(0);
7381 7382
      }
      thr_lock_init(&share->lock);
7383
      pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7384
      share->commit_count= 0;
7385
      share->commit_count_lock= 0;
7386 7387 7388 7389
      share->db= share->key + length + 1;
      ha_ndbcluster::set_dbname(key, share->db);
      share->table_name= share->db + strlen(share->db) + 1;
      ha_ndbcluster::set_tabname(key, share->table_name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7390 7391 7392
#ifdef HAVE_NDB_BINLOG
      ndbcluster_binlog_init_share(share, table);
#endif
7393
      *root_ptr= old_root;
7394 7395 7396
    }
    else
    {
7397 7398 7399 7400
      DBUG_PRINT("error", ("get_share: failed to alloc share"));
      if (!have_lock)
        pthread_mutex_unlock(&ndbcluster_mutex);
      my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share));
7401
      DBUG_RETURN(0);
7402 7403 7404
    }
  }
  share->use_count++;
7405

7406 7407
  dbug_print_open_tables();

monty@mysql.com's avatar
monty@mysql.com committed
7408
  DBUG_PRINT("info",
7409 7410
             ("0x%lx key: %s  key_length: %d  key: %s",
              share, share->key, share->key_length, key));
monty@mysql.com's avatar
monty@mysql.com committed
7411
  DBUG_PRINT("info",
7412 7413 7414 7415 7416
             ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
              share->db, share->table_name,
              share->use_count, share->commit_count));
  if (!have_lock)
    pthread_mutex_unlock(&ndbcluster_mutex);
7417
  DBUG_RETURN(share);
7418 7419
}

monty@mysql.com's avatar
monty@mysql.com committed
7420

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7421
void ndbcluster_real_free_share(NDB_SHARE **share)
7422
{
monty@mysql.com's avatar
monty@mysql.com committed
7423
  DBUG_ENTER("ndbcluster_real_free_share");
7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435
  DBUG_PRINT("real_free_share",
             ("0x%lx key: %s  key_length: %d",
              (*share), (*share)->key, (*share)->key_length));
  DBUG_PRINT("real_free_share",
             ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
              (*share)->db, (*share)->table_name,
              (*share)->use_count, (*share)->commit_count));

  hash_delete(&ndbcluster_open_tables, (byte*) *share);
  thr_lock_delete(&(*share)->lock);
  pthread_mutex_destroy(&(*share)->mutex);

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7436 7437 7438
#ifdef HAVE_NDB_BINLOG
  if ((*share)->table)
  {
7439
    // (*share)->table->mem_root is freed by closefrm
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7440
    closefrm((*share)->table, 0);
7441 7442
    // (*share)->table_share->mem_root is freed by free_table_share
    free_table_share((*share)->table_share);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7443 7444 7445 7446 7447 7448 7449 7450
#ifndef DBUG_OFF
    bzero((gptr)(*share)->table_share, sizeof(*(*share)->table_share));
    bzero((gptr)(*share)->table, sizeof(*(*share)->table));
    (*share)->table_share= 0;
    (*share)->table= 0;
#endif
  }
#endif
monty@mysql.com's avatar
monty@mysql.com committed
7451
  free_root(&(*share)->mem_root, MYF(0));
7452 7453 7454 7455
  my_free((gptr) *share, MYF(0));
  *share= 0;

  dbug_print_open_tables();
monty@mysql.com's avatar
monty@mysql.com committed
7456
  DBUG_VOID_RETURN;
7457 7458 7459 7460 7461
}

/*
  decrease refcount of share
  calls real_free_share when refcount reaches 0
7462

7463 7464
  have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken
*/
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7465
void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
7466
{
7467 7468 7469 7470 7471
  if (!have_lock)
    pthread_mutex_lock(&ndbcluster_mutex);
  if ((*share)->util_lock == current_thd)
    (*share)->util_lock= 0;
  if (!--(*share)->use_count)
7472
  {
7473
    real_free_share(share);
7474
  }
7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487
  else
  {
    dbug_print_open_tables();
    DBUG_PRINT("free_share",
               ("0x%lx key: %s  key_length: %d",
                *share, (*share)->key, (*share)->key_length));
    DBUG_PRINT("free_share",
               ("db.tablename: %s.%s  use_count: %d  commit_count: %d",
                (*share)->db, (*share)->table_name,
                (*share)->use_count, (*share)->commit_count));
  }
  if (!have_lock)
    pthread_mutex_unlock(&ndbcluster_mutex);
7488 7489 7490
}


7491 7492
static 
int
7493
ndb_get_table_statistics(Ndb* ndb, const NDBTAB *ndbtab,
7494
                         struct Ndb_statistics * ndbstat)
7495
{
7496
  NdbTransaction* pTrans;
7497
  NdbError error;
7498 7499
  int retries= 10;
  int retry_sleep= 30 * 1000; /* 30 milliseconds */
7500 7501
  char buff[22], buff2[22], buff3[22], buff4[22];
  DBUG_ENTER("ndb_get_table_statistics");
kostja@bodhi.local's avatar
kostja@bodhi.local committed
7502
  DBUG_PRINT("enter", ("table: %s", ndbtab->getName()));
7503

7504 7505
  DBUG_ASSERT(ndbtab != 0);

7506
  do
7507
  {
7508
    Uint64 rows, commits, fixed_mem, var_mem;
7509
    Uint32 size;
7510
    Uint32 count= 0;
7511 7512
    Uint64 sum_rows= 0;
    Uint64 sum_commits= 0;
7513 7514
    Uint64 sum_row_size= 0;
    Uint64 sum_mem= 0;
7515 7516 7517 7518 7519
    NdbScanOperation*pOp;
    NdbResultSet *rs;
    int check;

    if ((pTrans= ndb->startTransaction()) == NULL)
7520
    {
7521 7522 7523
      error= ndb->getNdbError();
      goto retry;
    }
7524
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7525
    if ((pOp= pTrans->getNdbScanOperation(ndbtab)) == NULL)
7526 7527 7528
    {
      error= pTrans->getNdbError();
      goto retry;
7529
    }
7530
    
7531
    if (pOp->readTuples(NdbOperation::LM_CommittedRead))
7532 7533 7534 7535
    {
      error= pOp->getNdbError();
      goto retry;
    }
7536
    
7537 7538 7539 7540 7541
    if (pOp->interpret_exit_last_row() == -1)
    {
      error= pOp->getNdbError();
      goto retry;
    }
7542 7543 7544
    
    pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows);
    pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits);
7545
    pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size);
7546 7547 7548 7549
    pOp->getValue(NdbDictionary::Column::FRAGMENT_FIXED_MEMORY, 
		  (char*)&fixed_mem);
    pOp->getValue(NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY, 
		  (char*)&var_mem);
7550
    
7551 7552 7553
    if (pTrans->execute(NdbTransaction::NoCommit,
                        NdbTransaction::AbortOnError,
                        TRUE) == -1)
7554
    {
7555 7556
      error= pTrans->getNdbError();
      goto retry;
7557
    }
7558
    
monty@mishka.local's avatar
monty@mishka.local committed
7559
    while ((check= pOp->nextResult(TRUE, TRUE)) == 0)
7560 7561 7562
    {
      sum_rows+= rows;
      sum_commits+= commits;
7563
      if (sum_row_size < size)
7564
        sum_row_size= size;
7565
      sum_mem+= fixed_mem + var_mem;
7566
      count++;
7567 7568 7569
    }
    
    if (check == -1)
7570 7571 7572 7573
    {
      error= pOp->getNdbError();
      goto retry;
    }
7574

7575
    pOp->close(TRUE);
7576

7577
    ndb->closeTransaction(pTrans);
7578 7579 7580 7581 7582 7583

    ndbstat->row_count= sum_rows;
    ndbstat->commit_count= sum_commits;
    ndbstat->row_size= sum_row_size;
    ndbstat->fragment_memory= sum_mem;

7584 7585 7586 7587 7588 7589 7590
    DBUG_PRINT("exit", ("records: %s  commits: %s "
                        "row_size: %s  mem: %s count: %u",
			llstr(sum_rows, buff),
                        llstr(sum_commits, buff2),
                        llstr(sum_row_size, buff3),
                        llstr(sum_mem, buff4),
                        count));
7591

7592
    DBUG_RETURN(0);
7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604
retry:
    if (pTrans)
    {
      ndb->closeTransaction(pTrans);
      pTrans= NULL;
    }
    if (error.status == NdbError::TemporaryError && retries--)
    {
      my_sleep(retry_sleep);
      continue;
    }
    break;
7605
  } while(1);
7606 7607
  DBUG_PRINT("exit", ("failed, error %u(%s)", error.code, error.message));
  ERR_RETURN(error);
7608 7609
}

7610 7611 7612 7613 7614
/*
  Create a .ndb file to serve as a placeholder indicating 
  that the table with this name is a ndb table
*/

7615
int ha_ndbcluster::write_ndb_file(const char *name)
7616 7617 7618 7619 7620 7621
{
  File file;
  bool error=1;
  char path[FN_REFLEN];
  
  DBUG_ENTER("write_ndb_file");
7622
  DBUG_PRINT("enter", ("name: %s", name));
7623

7624
  (void)strxnmov(path, FN_REFLEN-1, 
7625
                 mysql_data_home,"/",name,ha_ndb_ext,NullS);
7626 7627 7628 7629 7630 7631 7632 7633 7634 7635

  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
  {
    // It's an empty file
    error=0;
    my_close(file,MYF(0));
  }
  DBUG_RETURN(error);
}

7636
void 
7637 7638
ha_ndbcluster::release_completed_operations(NdbTransaction *trans,
					    bool force_release)
7639 7640 7641 7642 7643 7644 7645 7646
{
  if (trans->hasBlobOperation())
  {
    /* We are reading/writing BLOB fields, 
       releasing operation records is unsafe
    */
    return;
  }
7647 7648 7649 7650 7651 7652 7653 7654 7655 7656
  if (!force_release)
  {
    if (get_thd_ndb(current_thd)->query_state & NDB_QUERY_MULTI_READ_RANGE)
    {
      /* We are batching reads and have not consumed all fetched
	 rows yet, releasing operation records is unsafe 
      */
      return;
    }
  }
7657
  trans->releaseCompletedOperations();
7658 7659
}

7660
int
7661
ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
7662 7663 7664 7665
                                      KEY_MULTI_RANGE *ranges, 
                                      uint range_count,
                                      bool sorted, 
                                      HANDLER_BUFFER *buffer)
7666 7667
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
7668
  m_write_op= FALSE;
7669
  
7670 7671
  int res;
  KEY* key_info= table->key_info + active_index;
7672
  NDB_INDEX_TYPE index_type= get_index_type(active_index);
7673
  ulong reclength= table_share->reclength;
7674
  NdbOperation* op;
7675
  Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
7676

7677
  if (uses_blob_value())
7678 7679 7680 7681
  {
    /**
     * blobs can't be batched currently
     */
7682
    m_disable_multi_read= TRUE;
7683
    DBUG_RETURN(handler::read_multi_range_first(found_range_p, 
7684 7685 7686 7687
                                                ranges, 
                                                range_count,
                                                sorted, 
                                                buffer));
7688
  }
7689
  thd_ndb->query_state|= NDB_QUERY_MULTI_READ_RANGE;
7690
  m_disable_multi_read= FALSE;
7691 7692 7693 7694

  /**
   * Copy arguments into member variables
   */
7695 7696 7697
  m_multi_ranges= ranges;
  multi_range_curr= ranges;
  multi_range_end= ranges+range_count;
7698 7699 7700
  multi_range_sorted= sorted;
  multi_range_buffer= buffer;

7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711
  /**
   * read multi range will read ranges as follows (if not ordered)
   *
   * input    read order
   * ======   ==========
   * pk-op 1  pk-op 1
   * pk-op 2  pk-op 2
   * range 3  range (3,5) NOTE result rows will be intermixed
   * pk-op 4  pk-op 4
   * range 5
   * pk-op 6  pk-ok 6
7712 7713
   */   

mskold@mysql.com's avatar
mskold@mysql.com committed
7714
  /**
7715 7716
   * Variables for loop
   */
7717 7718
  byte *curr= (byte*)buffer->buffer;
  byte *end_of_buffer= (byte*)buffer->buffer_end;
7719 7720
  NdbOperation::LockMode lm= 
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
mskold@mysql.com's avatar
mskold@mysql.com committed
7721
  bool need_pk = (lm == NdbOperation::LM_Read);
7722 7723 7724
  const NDBTAB *tab= m_table;
  const NDBINDEX *unique_idx= m_index[active_index].unique_index;
  const NDBINDEX *idx= m_index[active_index].index; 
7725 7726
  const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
  NdbIndexScanOperation* scanOp= 0;
7727 7728
  for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; 
       multi_range_curr++)
7729
  {
7730 7731 7732 7733 7734 7735
    part_id_range part_spec;
    if (m_use_partition_function)
    {
      get_partition_set(table, curr, active_index,
                        &multi_range_curr->start_key,
                        &part_spec);
7736 7737 7738 7739 7740 7741
      DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
                          part_spec.start_part, part_spec.end_part));
      /*
        If partition pruning has found no partition in set
        we can skip this scan
      */
7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753
      if (part_spec.start_part > part_spec.end_part)
      {
        /*
          We can skip this partition since the key won't fit into any
          partition
        */
        curr += reclength;
        multi_range_curr->range_flag |= SKIP_RANGE;
        continue;
      }
    }
    switch(index_type){
7754 7755
    case PRIMARY_KEY_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
7756 7757 7758
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
        goto range;
      // else fall through
7759
    case PRIMARY_KEY_INDEX:
7760
    {
7761
      multi_range_curr->range_flag |= UNIQUE_RANGE;
7762
      if ((op= m_active_trans->getNdbOperation(tab)) && 
7763 7764 7765
          !op->readTuple(lm) && 
          !set_primary_key(op, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
7766 7767 7768
          (op->setAbortOption(AO_IgnoreError), TRUE) &&
          (!m_use_partition_function ||
           (op->setPartitionId(part_spec.start_part), true)))
7769
        curr += reclength;
7770
      else
7771
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
7772
      break;
7773 7774
    }
    break;
7775 7776
    case UNIQUE_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
7777 7778 7779 7780 7781
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
          !check_null_in_key(key_info, multi_range_curr->start_key.key,
                             multi_range_curr->start_key.length)))
        goto range;
      // else fall through
7782
    case UNIQUE_INDEX:
7783
    {
7784
      multi_range_curr->range_flag |= UNIQUE_RANGE;
7785
      if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && 
7786 7787 7788
          !op->readTuple(lm) && 
          !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
7789
          (op->setAbortOption(AO_IgnoreError), TRUE))
7790
        curr += reclength;
7791
      else
7792
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
7793 7794
      break;
    }
7795
    case ORDERED_INDEX: {
7796
  range:
7797
      multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
7798 7799
      if (scanOp == 0)
      {
7800 7801 7802 7803 7804 7805
        if (m_multi_cursor)
        {
          scanOp= m_multi_cursor;
          DBUG_ASSERT(scanOp->getSorted() == sorted);
          DBUG_ASSERT(scanOp->getLockMode() == 
                      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
7806
          if (scanOp->reset_bounds(m_force_send))
7807 7808 7809 7810 7811
            DBUG_RETURN(ndb_err(m_active_trans));
          
          end_of_buffer -= reclength;
        }
        else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) 
mskold@mysql.com's avatar
mskold@mysql.com committed
7812 7813
                 &&!scanOp->readTuples(lm, 0, parallelism, sorted, 
				       FALSE, TRUE, need_pk)
7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824
                 &&!generate_scan_filter(m_cond_stack, scanOp)
                 &&!define_read_attrs(end_of_buffer-reclength, scanOp))
        {
          m_multi_cursor= scanOp;
          m_multi_range_cursor_result_ptr= end_of_buffer-reclength;
        }
        else
        {
          ERR_RETURN(scanOp ? scanOp->getNdbError() : 
                     m_active_trans->getNdbError());
        }
7825
      }
7826

7827
      const key_range *keys[2]= { &multi_range_curr->start_key, 
7828
                                  &multi_range_curr->end_key };
7829 7830
      if ((res= set_bounds(scanOp, active_index, false, keys,
                           multi_range_curr-ranges)))
7831
        DBUG_RETURN(res);
7832
      break;
7833
    }
7834
    case UNDEFINED_INDEX:
mskold@mysql.com's avatar
mskold@mysql.com committed
7835 7836 7837 7838
      DBUG_ASSERT(FALSE);
      DBUG_RETURN(1);
      break;
    }
7839 7840
  }
  
7841
  if (multi_range_curr != multi_range_end)
7842
  {
7843 7844 7845 7846 7847 7848
    /**
     * Mark that we're using entire buffer (even if might not) as
     *   we haven't read all ranges for some reason
     * This as we don't want mysqld to reuse the buffer when we read
     *   the remaining ranges
     */
7849
    buffer->end_of_used_area= (byte*)buffer->buffer_end;
7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860
  }
  else
  {
    buffer->end_of_used_area= curr;
  }
  
  /**
   * Set first operation in multi range
   */
  m_current_multi_operation= 
    lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
7861
  if (!(res= execute_no_commit_ie(this, m_active_trans,true)))
7862
  {
7863 7864
    m_multi_range_defined= multi_range_curr;
    multi_range_curr= ranges;
7865 7866
    m_multi_range_result_ptr= (byte*)buffer->buffer;
    DBUG_RETURN(read_multi_range_next(found_range_p));
7867 7868 7869 7870
  }
  ERR_RETURN(m_active_trans->getNdbError());
}

7871
#if 0
7872
#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
7873 7874 7875 7876
#else
#define DBUG_MULTI_RANGE(x)
#endif

7877
int
7878
ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
7879 7880
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
7881
  if (m_disable_multi_read)
7882
  {
7883
    DBUG_MULTI_RANGE(11);
7884
    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
7885
  }
7886
  
7887
  int res;
7888
  int range_no;
7889
  ulong reclength= table_share->reclength;
7890
  const NdbOperation* op= m_current_multi_operation;
7891
  for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
7892
  {
7893 7894 7895
    DBUG_MULTI_RANGE(12);
    if (multi_range_curr->range_flag & SKIP_RANGE)
      continue;
7896
    if (multi_range_curr->range_flag & UNIQUE_RANGE)
7897
    {
7898
      if (op->getNdbError().code == 0)
7899 7900
      {
        DBUG_MULTI_RANGE(13);
7901
        goto found_next;
7902
      }
7903 7904 7905
      
      op= m_active_trans->getNextCompletedOperation(op);
      m_multi_range_result_ptr += reclength;
7906
      continue;
7907
    } 
7908
    else if (m_multi_cursor && !multi_range_sorted)
7909
    {
7910 7911
      DBUG_MULTI_RANGE(1);
      if ((res= fetch_next(m_multi_cursor)) == 0)
7912
      {
7913 7914 7915
        DBUG_MULTI_RANGE(2);
        range_no= m_multi_cursor->get_range_no();
        goto found;
7916 7917 7918
      } 
      else
      {
7919
        DBUG_MULTI_RANGE(14);
7920
        goto close_scan;
7921 7922
      }
    }
7923
    else if (m_multi_cursor && multi_range_sorted)
7924
    {
7925 7926
      if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
      {
7927 7928
        DBUG_MULTI_RANGE(3);
        goto close_scan;
7929
      }
7930
      
7931
      range_no= m_multi_cursor->get_range_no();
7932
      uint current_range_no= multi_range_curr - m_multi_ranges;
mskold@mysql.com's avatar
mskold@mysql.com committed
7933
      if ((uint) range_no == current_range_no)
7934
      {
7935
        DBUG_MULTI_RANGE(4);
7936
        // return current row
7937
        goto found;
7938
      }
7939
      else if (range_no > (int)current_range_no)
7940
      {
7941 7942 7943 7944
        DBUG_MULTI_RANGE(5);
        // wait with current row
        m_active_cursor= 0;
        continue;
7945 7946 7947
      }
      else 
      {
7948 7949 7950
        DBUG_MULTI_RANGE(6);
        // First fetch from cursor
        DBUG_ASSERT(range_no == -1);
7951
        if ((res= m_multi_cursor->nextResult(true)))
7952
        {
7953
          DBUG_MULTI_RANGE(15);
7954 7955 7956 7957
          goto close_scan;
        }
        multi_range_curr--; // Will be increased in for-loop
        continue;
7958
      }
7959
    }
7960
    else /** m_multi_cursor == 0 */
7961
    {
7962
      DBUG_MULTI_RANGE(7);
7963 7964 7965 7966
      /**
       * Corresponds to range 5 in example in read_multi_range_first
       */
      (void)1;
7967
      continue;
7968
    }
7969
    
7970
    DBUG_ASSERT(FALSE); // Should only get here via goto's
7971 7972 7973
close_scan:
    if (res == 1)
    {
7974
      m_multi_cursor->close(FALSE, TRUE);
7975
      m_active_cursor= m_multi_cursor= 0;
7976
      DBUG_MULTI_RANGE(8);
7977 7978 7979 7980
      continue;
    } 
    else 
    {
7981
      DBUG_MULTI_RANGE(9);
7982 7983 7984
      DBUG_RETURN(ndb_err(m_active_trans));
    }
  }
7985
  
7986
  if (multi_range_curr == multi_range_end)
7987 7988
  {
    DBUG_MULTI_RANGE(16);
7989
    DBUG_RETURN(HA_ERR_END_OF_FILE);
7990
  }
7991
  
7992 7993 7994 7995
  /**
   * Read remaining ranges
   */
  DBUG_RETURN(read_multi_range_first(multi_range_found_p, 
7996 7997 7998 7999
                                     multi_range_curr,
                                     multi_range_end - multi_range_curr, 
                                     multi_range_sorted,
                                     multi_range_buffer));
8000 8001
  
found:
8002 8003 8004
  /**
   * Found a record belonging to a scan
   */
8005
  m_active_cursor= m_multi_cursor;
8006
  * multi_range_found_p= m_multi_ranges + range_no;
8007 8008
  memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
  setup_recattr(m_active_cursor->getFirstRecAttr());
8009 8010 8011
  unpack_record(table->record[0]);
  table->status= 0;     
  DBUG_RETURN(0);
8012
  
8013
found_next:
8014 8015 8016 8017
  /**
   * Found a record belonging to a pk/index op,
   *   copy result and move to next to prepare for next call
   */
8018
  * multi_range_found_p= multi_range_curr;
8019
  memcpy(table->record[0], m_multi_range_result_ptr, reclength);
8020
  setup_recattr(op->getFirstRecAttr());
8021
  unpack_record(table->record[0]);
8022 8023
  table->status= 0;
  
8024
  multi_range_curr++;
8025
  m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
8026 8027
  m_multi_range_result_ptr += reclength;
  DBUG_RETURN(0);
8028 8029
}

8030 8031 8032 8033 8034 8035 8036 8037
int
ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
{
  DBUG_ENTER("setup_recattr");

  Field **field, **end;
  NdbValue *value= m_value;
  
8038
  end= table->field + table_share->fields;
8039 8040 8041 8042 8043 8044
  
  for (field= table->field; field < end; field++, value++)
  {
    if ((* value).ptr)
    {
      DBUG_ASSERT(curr != 0);
8045 8046 8047
      NdbValue* val= m_value + curr->getColumn()->getColumnNo();
      DBUG_ASSERT(val->ptr);
      val->rec= curr;
8048
      curr= curr->next();
8049 8050 8051
    }
  }
  
8052
  DBUG_RETURN(0);
8053 8054
}

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8055 8056
char*
ha_ndbcluster::update_table_comment(
8057 8058
                                /* out: table comment + additional */
        const char*     comment)/* in:  table comment defined by user */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8059 8060
{
  uint length= strlen(comment);
8061
  if (length > 64000 - 3)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073
  {
    return((char*)comment); /* string too long */
  }

  Ndb* ndb;
  if (!(ndb= get_ndb()))
  {
    return((char*)comment);
  }

  ndb->setDatabaseName(m_dbname);
  NDBDICT* dict= ndb->getDictionary();
8074 8075
  const NDBTAB* tab= m_table;
  DBUG_ASSERT(tab != NULL);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8076 8077 8078 8079 8080 8081 8082 8083 8084

  char *str;
  const char *fmt="%s%snumber_of_replicas: %d";
  const unsigned fmt_len_plus_extra= length + strlen(fmt);
  if ((str= my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
  {
    return (char*)comment;
  }

8085 8086 8087
  my_snprintf(str,fmt_len_plus_extra,fmt,comment,
              length > 0 ? " ":"",
              tab->getReplicaCount());
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8088 8089 8090 8091 8092
  return str;
}


// Utility thread main loop
8093
pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8094 8095
{
  THD *thd; /* needs to be first for thread_stack */
8096
  Ndb* ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8097
  struct timespec abstime;
8098
  List<NDB_SHARE> util_open_tables;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8099 8100 8101 8102 8103 8104 8105

  my_thread_init();
  DBUG_ENTER("ndb_util_thread");
  DBUG_PRINT("enter", ("ndb_cache_check_time: %d", ndb_cache_check_time));

  thd= new THD; /* note that contructor of THD uses DBUG_ */
  THD_CHECK_SENTRY(thd);
8106
  ndb= new Ndb(g_ndb_cluster_connection, "");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8107 8108 8109 8110 8111

  pthread_detach_this_thread();
  ndb_util_thread= pthread_self();

  thd->thread_stack= (char*)&thd; /* remember where our stack is */
8112
  if (thd->store_globals() || (ndb->init() != 0))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8113 8114 8115
  {
    thd->cleanup();
    delete thd;
8116
    delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8117 8118
    DBUG_RETURN(NULL);
  }
8119 8120 8121 8122 8123 8124 8125 8126
  thd->init_for_queries();
  thd->version=refresh_version;
  thd->set_time();
  thd->main_security_ctx.host_or_ip= "";
  thd->client_capabilities = 0;
  my_net_init(&thd->net, 0);
  thd->main_security_ctx.master_access= ~0;
  thd->main_security_ctx.priv_user = 0;
8127
  thd->current_stmt_binlog_row_based= TRUE;     // If in mixed mode
8128 8129 8130 8131 8132 8133 8134 8135 8136

  /*
    wait for mysql server to start
  */
  pthread_mutex_lock(&LOCK_server_started);
  while (!mysqld_server_started)
    pthread_cond_wait(&COND_server_started, &LOCK_server_started);
  pthread_mutex_unlock(&LOCK_server_started);

8137 8138
  ndbcluster_util_inited= 1;

8139 8140 8141 8142
  /*
    Wait for cluster to start
  */
  pthread_mutex_lock(&LOCK_ndb_util_thread);
8143
  while (!ndb_cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157
  {
    /* ndb not connected yet */
    set_timespec(abstime, 1);
    pthread_cond_timedwait(&COND_ndb_util_thread,
                           &LOCK_ndb_util_thread,
                           &abstime);
    if (abort_loop)
    {
      pthread_mutex_unlock(&LOCK_ndb_util_thread);
      goto ndb_util_thread_end;
    }
  }
  pthread_mutex_unlock(&LOCK_ndb_util_thread);

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169
  {
    Thd_ndb *thd_ndb;
    if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
    {
      sql_print_error("Could not allocate Thd_ndb object");
      goto ndb_util_thread_end;
    }
    set_thd_ndb(thd, thd_ndb);
    thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
  }

#ifdef HAVE_NDB_BINLOG
8170 8171
  if (ndb_extra_logging && ndb_binlog_running)
    sql_print_information("NDB Binlog: Ndb tables initially read only.");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8172 8173 8174
  /* create tables needed by the replication */
  ndbcluster_setup_binlog_table_shares(thd);
#else
8175 8176 8177 8178
  /*
    Get all table definitions from the storage node
  */
  ndbcluster_find_all_files(thd);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8179
#endif
8180

8181
  set_timespec(abstime, 0);
8182
  for (;!abort_loop;)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8183 8184
  {
    pthread_mutex_lock(&LOCK_ndb_util_thread);
monty@mysql.com's avatar
monty@mysql.com committed
8185 8186 8187
    pthread_cond_timedwait(&COND_ndb_util_thread,
                           &LOCK_ndb_util_thread,
                           &abstime);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8188
    pthread_mutex_unlock(&LOCK_ndb_util_thread);
8189
#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8190 8191
    DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d",
                                   ndb_cache_check_time));
8192
#endif
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8193 8194 8195
    if (abort_loop)
      break; /* Shutting down server */

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8196 8197 8198 8199 8200
#ifdef HAVE_NDB_BINLOG
    /*
      Check that the apply_status_share and schema_share has been created.
      If not try to create it
    */
8201
    if (!ndb_binlog_tables_inited)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8202 8203 8204
      ndbcluster_setup_binlog_table_shares(thd);
#endif

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8205 8206
    if (ndb_cache_check_time == 0)
    {
8207 8208
      /* Wake up in 1 second to check if value has changed */
      set_timespec(abstime, 1);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8209 8210 8211 8212 8213 8214 8215 8216 8217
      continue;
    }

    /* Lock mutex and fill list with pointers to all open tables */
    NDB_SHARE *share;
    pthread_mutex_lock(&ndbcluster_mutex);
    for (uint i= 0; i < ndbcluster_open_tables.records; i++)
    {
      share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8218 8219 8220 8221 8222 8223
#ifdef HAVE_NDB_BINLOG
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 0)
        continue; // injector thread is the only user, skip statistics
      share->util_lock= current_thd; // Mark that util thread has lock
#endif /* HAVE_NDB_BINLOG */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235
      share->use_count++; /* Make sure the table can't be closed */
      DBUG_PRINT("ndb_util_thread",
                 ("Found open table[%d]: %s, use_count: %d",
                  i, share->table_name, share->use_count));

      /* Store pointer to table */
      util_open_tables.push_back(share);
    }
    pthread_mutex_unlock(&ndbcluster_mutex);

    /* Iterate through the  open files list */
    List_iterator_fast<NDB_SHARE> it(util_open_tables);
8236
    while ((share= it++))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8237
    {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248
#ifdef HAVE_NDB_BINLOG
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 1)
      {
        /*
          Util thread and injector thread is the only user, skip statistics
	*/
        free_share(&share);
        continue;
      }
#endif /* HAVE_NDB_BINLOG */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8249
      DBUG_PRINT("ndb_util_thread",
8250
                 ("Fetching commit count for: %s",
8251
                  share->key));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8252 8253

      /* Contact NDB to get commit count for table */
8254
      ndb->setDatabaseName(share->db);
8255 8256 8257 8258 8259 8260 8261
      struct Ndb_statistics stat;

      uint lock;
      pthread_mutex_lock(&share->mutex);
      lock= share->commit_count_lock;
      pthread_mutex_unlock(&share->mutex);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8262
      {
8263 8264 8265 8266
        Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
        if (ndbtab_g.get_table() &&
            ndb_get_table_statistics(ndb, ndbtab_g.get_table(), &stat) == 0)
        {
8267
          char buff[22], buff2[22];
8268 8269
          DBUG_PRINT("ndb_util_thread",
                     ("Table: %s, commit_count: %llu, rows: %llu",
8270 8271
                      share->key,
                      llstr(stat.commit_count, buff),
kostja@bodhi.local's avatar
kostja@bodhi.local committed
8272
                      llstr(stat.row_count, buff2)));
8273 8274 8275 8276 8277 8278 8279 8280
        }
        else
        {
          DBUG_PRINT("ndb_util_thread",
                     ("Error: Could not get commit count for table %s",
                      share->key));
          stat.commit_count= 0;
        }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8281
      }
8282 8283 8284 8285 8286 8287

      pthread_mutex_lock(&share->mutex);
      if (share->commit_count_lock == lock)
        share->commit_count= stat.commit_count;
      pthread_mutex_unlock(&share->mutex);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8288
      /* Decrease the use count and possibly free share */
8289
      free_share(&share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8290 8291 8292 8293 8294
    }

    /* Clear the list of open tables */
    util_open_tables.empty();

8295 8296 8297 8298 8299 8300 8301 8302 8303
    /* Calculate new time to wake up */
    int secs= 0;
    int msecs= ndb_cache_check_time;

    struct timeval tick_time;
    gettimeofday(&tick_time, 0);
    abstime.tv_sec=  tick_time.tv_sec;
    abstime.tv_nsec= tick_time.tv_usec * 1000;

8304
    if (msecs >= 1000){
8305 8306 8307 8308 8309 8310 8311 8312 8313 8314
      secs=  msecs / 1000;
      msecs= msecs % 1000;
    }

    abstime.tv_sec+=  secs;
    abstime.tv_nsec+= msecs * 1000000;
    if (abstime.tv_nsec >= 1000000000) {
      abstime.tv_sec+=  1;
      abstime.tv_nsec-= 1000000000;
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8315
  }
8316
ndb_util_thread_end:
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8317
  sql_print_information("Stopping Cluster Utility thread");
8318
  net_end(&thd->net);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8319 8320
  thd->cleanup();
  delete thd;
8321
  delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8322 8323 8324 8325 8326 8327
  DBUG_PRINT("exit", ("ndb_util_thread"));
  my_thread_end();
  pthread_exit(0);
  DBUG_RETURN(NULL);
}

8328 8329 8330
/*
  Condition pushdown
*/
8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347
/*
  Push a condition to ndbcluster storage engine for evaluation 
  during table   and index scans. The conditions will be stored on a stack
  for possibly storing several conditions. The stack can be popped
  by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
  will clear the stack.
  The current implementation supports arbitrary AND/OR nested conditions
  with comparisons between columns and constants (including constant
  expressions and function calls) and the following comparison operators:
  =, !=, >, >=, <, <=, "is null", and "is not null".
  
  RETURN
    NULL The condition was supported and will be evaluated for each 
    row found during the scan
    cond The condition was not supported and all rows will be returned from
         the scan for evaluation (and thus not saved on stack)
*/
8348 8349 8350 8351 8352
const 
COND* 
ha_ndbcluster::cond_push(const COND *cond) 
{ 
  DBUG_ENTER("cond_push");
8353 8354 8355
  Ndb_cond_stack *ndb_cond = new Ndb_cond_stack();
  DBUG_EXECUTE("where",print_where((COND *)cond, m_tabname););
  if (m_cond_stack)
mskold@mysql.com's avatar
mskold@mysql.com committed
8356
    ndb_cond->next= m_cond_stack;
8357 8358 8359 8360 8361 8362
  else
    ndb_cond->next= NULL;
  m_cond_stack= ndb_cond;
  
  if (serialize_cond(cond, ndb_cond))
  {
mskold@mysql.com's avatar
mskold@mysql.com committed
8363
    DBUG_RETURN(NULL);
8364 8365 8366 8367
  }
  else
  {
    cond_pop();
mskold@mysql.com's avatar
mskold@mysql.com committed
8368
  }
8369 8370 8371
  DBUG_RETURN(cond); 
}

8372 8373 8374
/*
  Pop the top condition from the condition stack of the handler instance.
*/
8375 8376 8377 8378 8379 8380 8381 8382 8383
void 
ha_ndbcluster::cond_pop() 
{ 
  Ndb_cond_stack *ndb_cond_stack= m_cond_stack;  
  if (ndb_cond_stack)
  {
    m_cond_stack= ndb_cond_stack->next;
    delete ndb_cond_stack;
  }
mskold@mysql.com's avatar
mskold@mysql.com committed
8384
}
8385

8386 8387 8388
/*
  Clear the condition stack
*/
8389 8390 8391 8392 8393 8394 8395 8396 8397 8398
void
ha_ndbcluster::cond_clear()
{
  DBUG_ENTER("cond_clear");
  while (m_cond_stack)
    cond_pop();

  DBUG_VOID_RETURN;
}

8399 8400 8401 8402 8403 8404
/*
  Serialize the item tree into a linked list represented by Ndb_cond
  for fast generation of NbdScanFilter. Adds information such as
  position of fields that is not directly available in the Item tree.
  Also checks if condition is supported.
*/
8405 8406 8407 8408 8409
void ndb_serialize_cond(const Item *item, void *arg)
{
  Ndb_cond_traverse_context *context= (Ndb_cond_traverse_context *) arg;
  DBUG_ENTER("ndb_serialize_cond");  

mskold@mysql.com's avatar
mskold@mysql.com committed
8410 8411 8412 8413 8414
  // Check if we are skipping arguments to a function to be evaluated
  if (context->skip)
  {
    DBUG_PRINT("info", ("Skiping argument %d", context->skip));
    context->skip--;
8415 8416 8417
    switch (item->type()) {
    case Item::FUNC_ITEM:
    {
mskold@mysql.com's avatar
mskold@mysql.com committed
8418 8419 8420 8421
      Item_func *func_item= (Item_func *) item;
      context->skip+= func_item->argument_count();
      break;
    }
8422 8423 8424 8425 8426
    case Item::INT_ITEM:
    case Item::REAL_ITEM:
    case Item::STRING_ITEM:
    case Item::VARBIN_ITEM:
    case Item::DECIMAL_ITEM:
mskold@mysql.com's avatar
mskold@mysql.com committed
8427 8428
      break;
    default:
8429
      context->supported= FALSE;
mskold@mysql.com's avatar
mskold@mysql.com committed
8430 8431
      break;
    }
8432
    
mskold@mysql.com's avatar
mskold@mysql.com committed
8433 8434 8435
    DBUG_VOID_RETURN;
  }
  
8436
  if (context->supported)
8437
  {
8438 8439 8440 8441 8442 8443
    Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
    const Item_func *func_item;
    // Check if we are rewriting some unsupported function call
    if (rewrite_context &&
        (func_item= rewrite_context->func_item) &&
        rewrite_context->count++ == 0)
mskold@mysql.com's avatar
mskold@mysql.com committed
8444
    {
8445 8446
      switch (func_item->functype()) {
      case Item_func::BETWEEN:
8447
        /*
8448 8449 8450 8451 8452 8453 8454
          Rewrite 
          <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
          to <field>|<const> > <const1>|<field1> AND 
          <field>|<const> < <const2>|<field2>
          or actually in prefix format
          BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
          LT(<field>|<const>, <const2>|<field2>), END()
8455
        */
8456 8457
      case Item_func::IN_FUNC:
      {
8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473
        /*
          Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
          to <field>|<const> = <const1>|<field1> OR 
          <field> = <const2>|<field2> ...
          or actually in prefix format
          BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
          EQ(<field>|<const>, <const2>|<field2>), ... END()
          Each part of the disjunction is added for each call
          to ndb_serialize_cond and end of rewrite statement 
          is wrapped in end of ndb_serialize_cond
        */
        if (context->expecting(item->type()))
        {
          // This is the <field>|<const> item, save it in the rewrite context
          rewrite_context->left_hand_item= item;
          if (item->type() == Item::FUNC_ITEM)
8474
          {
8475 8476 8477
            Item_func *func_item= (Item_func *) item;
            if (func_item->functype() == Item_func::UNKNOWN_FUNC &&
                func_item->const_item())
8478
            {
8479 8480 8481
              // Skip any arguments since we will evaluate function instead
              DBUG_PRINT("info", ("Skip until end of arguments marker"));
              context->skip= func_item->argument_count();
8482 8483 8484
            }
            else
            {
8485 8486 8487 8488
              DBUG_PRINT("info", ("Found unsupported functional expression in BETWEEN|IN"));
              context->supported= FALSE;
              DBUG_VOID_RETURN;
              
8489 8490 8491
            }
          }
        }
8492 8493
        else
        {
8494 8495 8496
          // Non-supported BETWEEN|IN expression
          DBUG_PRINT("info", ("Found unexpected item of type %u in BETWEEN|IN",
                              item->type()));
8497
          context->supported= FALSE;
8498
          DBUG_VOID_RETURN;
8499
        }
8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521
        break;
      }
      default:
        context->supported= FALSE;
        break;
      }
      DBUG_VOID_RETURN;
    }
    else
    {
      Ndb_cond_stack *ndb_stack= context->stack_ptr;
      Ndb_cond *prev_cond= context->cond_ptr;
      Ndb_cond *curr_cond= context->cond_ptr= new Ndb_cond();
      if (!ndb_stack->ndb_cond)
        ndb_stack->ndb_cond= curr_cond;
      curr_cond->prev= prev_cond;
      if (prev_cond) prev_cond->next= curr_cond;
    // Check if we are rewriting some unsupported function call
      if (context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        const Item_func *func_item= rewrite_context->func_item;
8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533
        switch (func_item->functype()) {
        case Item_func::BETWEEN:
        {
          /*
            Rewrite 
            <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
            to <field>|<const> > <const1>|<field1> AND 
            <field>|<const> < <const2>|<field2>
            or actually in prefix format
            BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
            LT(<field>|<const>, <const2>|<field2>), END()
          */
8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552
          if (rewrite_context->count == 2)
          {
            // Lower limit of BETWEEN
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::GE_FUNC, 2);
          }
          else if (rewrite_context->count == 3)
          {
            // Upper limit of BETWEEN
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::LE_FUNC, 2);
          }
          else
          {
            // Illegal BETWEEN expression
            DBUG_PRINT("info", ("Illegal BETWEEN expression"));
            context->supported= FALSE;
            DBUG_VOID_RETURN;
          }
8553 8554
          break;
        }
8555 8556
        case Item_func::IN_FUNC:
        {
8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569
          /*
            Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
            to <field>|<const> = <const1>|<field1> OR 
            <field> = <const2>|<field2> ...
            or actually in prefix format
            BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
            EQ(<field>|<const>, <const2>|<field2>), ... END()
            Each part of the disjunction is added for each call
            to ndb_serialize_cond and end of rewrite statement 
            is wrapped in end of ndb_serialize_cond
          */
          DBUG_PRINT("info", ("EQ_FUNC"));      
          curr_cond->ndb_item= new Ndb_item(Item_func::EQ_FUNC, 2);
8570 8571
          break;
        }
8572 8573
        default:
          context->supported= FALSE;
8574
        }
8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604
        // Handle left hand <field>|<const>
        context->rewrite_stack= NULL; // Disable rewrite mode
        context->expect_only(Item::FIELD_ITEM);
        context->expect_field_result(STRING_RESULT);
        context->expect_field_result(REAL_RESULT);
        context->expect_field_result(INT_RESULT);
        context->expect_field_result(DECIMAL_RESULT);
        context->expect(Item::INT_ITEM);
        context->expect(Item::STRING_ITEM);
        context->expect(Item::VARBIN_ITEM);
        context->expect(Item::FUNC_ITEM);
        ndb_serialize_cond(rewrite_context->left_hand_item, arg);
        context->skip= 0; // Any FUNC_ITEM expression has already been parsed
        context->rewrite_stack= rewrite_context; // Enable rewrite mode
        if (!context->supported)
          DBUG_VOID_RETURN;

        prev_cond= context->cond_ptr;
        curr_cond= context->cond_ptr= new Ndb_cond();
        prev_cond->next= curr_cond;
      }
      
      // Check for end of AND/OR expression
      if (!item)
      {
        // End marker for condition group
        DBUG_PRINT("info", ("End of condition group"));
        curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
      }
      else
8605 8606 8607 8608
      {
        switch (item->type()) {
        case Item::FIELD_ITEM:
        {
8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621
          Item_field *field_item= (Item_field *) item;
          Field *field= field_item->field;
          enum_field_types type= field->type();
          /*
            Check that the field is part of the table of the handler
            instance and that we expect a field with of this result type.
          */
          if (context->table == field->table)
          {       
            const NDBTAB *tab= (const NDBTAB *) context->ndb_table;
            DBUG_PRINT("info", ("FIELD_ITEM"));
            DBUG_PRINT("info", ("table %s", tab->getName()));
            DBUG_PRINT("info", ("column %s", field->field_name));
8622
            DBUG_PRINT("info", ("type %d", field->type()));
8623 8624 8625 8626 8627
            DBUG_PRINT("info", ("result type %d", field->result_type()));
            
            // Check that we are expecting a field and with the correct
            // result type
            if (context->expecting(Item::FIELD_ITEM) &&
8628
                context->expecting_field_type(field->type()) &&
8629
                (context->expecting_field_result(field->result_type()) ||
mskold@mysql.com's avatar
mskold@mysql.com committed
8630
                 // Date and year can be written as string or int
8631 8632 8633 8634
                 ((type == MYSQL_TYPE_TIME ||
                   type == MYSQL_TYPE_DATE || 
                   type == MYSQL_TYPE_YEAR ||
                   type == MYSQL_TYPE_DATETIME)
mskold@mysql.com's avatar
mskold@mysql.com committed
8635 8636 8637
                  ? (context->expecting_field_result(STRING_RESULT) ||
                     context->expecting_field_result(INT_RESULT))
                  : true)) &&
8638
                // Bit fields no yet supported in scan filter
8639 8640 8641
                type != MYSQL_TYPE_BIT &&
                // No BLOB support in scan filter
                type != MYSQL_TYPE_TINY_BLOB &&
8642 8643
                type != MYSQL_TYPE_MEDIUM_BLOB &&
                type != MYSQL_TYPE_LONG_BLOB &&
8644
                type != MYSQL_TYPE_BLOB)
8645 8646 8647 8648 8649 8650
            {
              const NDBCOL *col= tab->getColumn(field->field_name);
              DBUG_ASSERT(col);
              curr_cond->ndb_item= new Ndb_item(field, col->getColumnNo());
              context->dont_expect(Item::FIELD_ITEM);
              context->expect_no_field_result();
8651
              if (! context->expecting_nothing())
8652
              {
8653 8654 8655 8656 8657 8658 8659 8660 8661 8662
                // We have not seen second argument yet
                if (type == MYSQL_TYPE_TIME ||
                    type == MYSQL_TYPE_DATE || 
                    type == MYSQL_TYPE_YEAR ||
                    type == MYSQL_TYPE_DATETIME)
                {
                  context->expect_only(Item::STRING_ITEM);
                  context->expect(Item::INT_ITEM);
                }
                else
8663 8664
                  switch (field->result_type()) {
                  case STRING_RESULT:
8665 8666 8667 8668 8669
                    // Expect char string or binary string
                    context->expect_only(Item::STRING_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    context->expect_collation(field_item->collation.collation);
                    break;
8670
                  case REAL_RESULT:
8671 8672
                    context->expect_only(Item::REAL_ITEM);
                    context->expect(Item::DECIMAL_ITEM);
8673
                    context->expect(Item::INT_ITEM);
8674
                    break;
8675
                  case INT_RESULT:
8676 8677 8678
                    context->expect_only(Item::INT_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    break;
8679
                  case DECIMAL_RESULT:
8680 8681
                    context->expect_only(Item::DECIMAL_ITEM);
                    context->expect(Item::REAL_ITEM);
8682
                    context->expect(Item::INT_ITEM);
8683 8684 8685 8686
                    break;
                  default:
                    break;
                  }    
8687 8688
              }
              else
8689 8690 8691 8692
              {
                // Expect another logical expression
                context->expect_only(Item::FUNC_ITEM);
                context->expect(Item::COND_ITEM);
8693 8694 8695 8696 8697 8698 8699
                // Check that field and string constant collations are the same
                if ((field->result_type() == STRING_RESULT) &&
                    !context->expecting_collation(item->collation.collation)
                    && type != MYSQL_TYPE_TIME
                    && type != MYSQL_TYPE_DATE
                    && type != MYSQL_TYPE_YEAR
                    && type != MYSQL_TYPE_DATETIME)
8700
                {
mskold@mysql.com's avatar
mskold@mysql.com committed
8701
                  DBUG_PRINT("info", ("Found non-matching collation %s",  
8702 8703
                                      item->collation.collation->name)); 
                  context->supported= FALSE;                
8704 8705
                }
              }
8706 8707
              break;
            }
8708 8709
            else
            {
mskold@mysql.com's avatar
mskold@mysql.com committed
8710 8711
              DBUG_PRINT("info", ("Was not expecting field of type %u(%u)",
                                  field->result_type(), type));
8712
              context->supported= FALSE;
8713
            }
8714
          }
8715
          else
8716
          {
8717 8718 8719
            DBUG_PRINT("info", ("Was not expecting field from table %s (%s)",
                                context->table->s->table_name.str, 
                                field->table->s->table_name.str));
8720
            context->supported= FALSE;
8721
          }
8722 8723
          break;
        }
8724 8725
        case Item::FUNC_ITEM:
        {
8726 8727 8728 8729 8730 8731
          Item_func *func_item= (Item_func *) item;
          // Check that we expect a function or functional expression here
          if (context->expecting(Item::FUNC_ITEM) || 
              func_item->functype() == Item_func::UNKNOWN_FUNC)
            context->expect_nothing();
          else
8732
          {
8733 8734 8735
            // Did not expect function here
            context->supported= FALSE;
            break;
8736
          }
8737
          
8738 8739 8740
          switch (func_item->functype()) {
          case Item_func::EQ_FUNC:
          {
8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754
            DBUG_PRINT("info", ("EQ_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(), 
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
8755
          }
8756 8757
          case Item_func::NE_FUNC:
          {
8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771
            DBUG_PRINT("info", ("NE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
8772
          }
8773 8774
          case Item_func::LT_FUNC:
          {
8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789
            DBUG_PRINT("info", ("LT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8790 8791
          case Item_func::LE_FUNC:
          {
8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8807 8808
          case Item_func::GE_FUNC:
          {
8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8824 8825
          case Item_func::GT_FUNC:
          {
8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840
            DBUG_PRINT("info", ("GT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8841 8842
          case Item_func::LIKE_FUNC:
          {
8843 8844 8845 8846 8847
            DBUG_PRINT("info", ("LIKE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::FIELD_ITEM);
8848 8849 8850
            context->expect_only_field_type(MYSQL_TYPE_STRING);
            context->expect_field_type(MYSQL_TYPE_VAR_STRING);
            context->expect_field_type(MYSQL_TYPE_VARCHAR);
8851 8852 8853 8854
            context->expect_field_result(STRING_RESULT);
            context->expect(Item::FUNC_ITEM);
            break;
          }
8855 8856
          case Item_func::ISNULL_FUNC:
          {
8857 8858 8859 8860 8861 8862 8863 8864 8865 8866
            DBUG_PRINT("info", ("ISNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8867 8868
          case Item_func::ISNOTNULL_FUNC:
          {
8869 8870 8871 8872 8873 8874 8875 8876 8877 8878
            DBUG_PRINT("info", ("ISNOTNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
8879 8880
          case Item_func::NOT_FUNC:
          {
8881 8882 8883 8884
            DBUG_PRINT("info", ("NOT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FUNC_ITEM);
8885
            context->expect(Item::COND_ITEM);
8886
            break;
8887
          }
8888 8889
          case Item_func::BETWEEN:
          {
8890
            DBUG_PRINT("info", ("BETWEEN, rewriting using AND"));
8891
            Item_func_between *between_func= (Item_func_between *) func_item;
8892 8893 8894 8895
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
8896 8897 8898 8899 8900 8901 8902 8903 8904
            if (between_func->negated)
            {
              DBUG_PRINT("info", ("NOT_FUNC"));
              curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1);
              prev_cond= curr_cond;
              curr_cond= context->cond_ptr= new Ndb_cond();
              curr_cond->prev= prev_cond;
              prev_cond->next= curr_cond;
            }
8905
            DBUG_PRINT("info", ("COND_AND_FUNC"));
8906 8907 8908
            curr_cond->ndb_item= 
              new Ndb_item(Item_func::COND_AND_FUNC, 
                           func_item->argument_count() - 1);
8909
            context->expect_only(Item::FIELD_ITEM);
8910 8911 8912 8913 8914
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
8915
          }
8916 8917
          case Item_func::IN_FUNC:
          {
8918
            DBUG_PRINT("info", ("IN_FUNC, rewriting using OR"));
8919
            Item_func_in *in_func= (Item_func_in *) func_item;
8920 8921 8922 8923
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
8924 8925 8926 8927 8928 8929 8930 8931 8932
            if (in_func->negated)
            {
              DBUG_PRINT("info", ("NOT_FUNC"));
              curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1);
              prev_cond= curr_cond;
              curr_cond= context->cond_ptr= new Ndb_cond();
              curr_cond->prev= prev_cond;
              prev_cond->next= curr_cond;
            }
8933 8934 8935 8936 8937 8938 8939 8940 8941
            DBUG_PRINT("info", ("COND_OR_FUNC"));
            curr_cond->ndb_item= new Ndb_item(Item_func::COND_OR_FUNC, 
                                              func_item->argument_count() - 1);
            context->expect_only(Item::FIELD_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
8942
          }
8943 8944
          case Item_func::UNKNOWN_FUNC:
          {
8945 8946 8947 8948
            DBUG_PRINT("info", ("UNKNOWN_FUNC %s", 
                                func_item->const_item()?"const":""));  
            DBUG_PRINT("info", ("result type %d", func_item->result_type()));
            if (func_item->const_item())
8949 8950 8951 8952
            {
              switch (func_item->result_type()) {
              case STRING_RESULT:
              {
8953 8954 8955
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::STRING_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); 
8956
                if (! context->expecting_no_field_result())
8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(STRING_RESULT);
                  context->expect_collation(func_item->collation.collation);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                  // Check that string result have correct collation
                  if (!context->expecting_collation(item->collation.collation))
                  {
                    DBUG_PRINT("info", ("Found non-matching collation %s",  
                                        item->collation.collation->name));
                    context->supported= FALSE;
                  }
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
8981 8982
              case REAL_RESULT:
              {
8983 8984 8985
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::REAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
8986
                if (! context->expecting_no_field_result()) 
8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(REAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
9004 9005
              case INT_RESULT:
              {
9006 9007 9008
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::INT_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
9009
                if (! context->expecting_no_field_result()) 
9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(INT_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
9027 9028
              case DECIMAL_RESULT:
              {
9029 9030 9031
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::DECIMAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
9032
                if (! context->expecting_no_field_result()) 
9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(DECIMAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              default:
                break;
              }
9052
            }
9053 9054 9055 9056 9057
            else
              // Function does not return constant expression
              context->supported= FALSE;
            break;
          }
9058 9059
          default:
          {
9060 9061 9062
            DBUG_PRINT("info", ("Found func_item of type %d", 
                                func_item->functype()));
            context->supported= FALSE;
9063
          }
9064 9065
          }
          break;
9066
        }
9067
        case Item::STRING_ITEM:
9068 9069 9070
          DBUG_PRINT("info", ("STRING_ITEM")); 
          if (context->expecting(Item::STRING_ITEM)) 
          {
9071
#ifndef DBUG_OFF
9072 9073 9074 9075 9076 9077
            char buff[256];
            String str(buff,(uint32) sizeof(buff), system_charset_info);
            str.length(0);
            Item_string *string_item= (Item_string *) item;
            DBUG_PRINT("info", ("value \"%s\"", 
                                string_item->val_str(&str)->ptr()));
9078
#endif
9079 9080 9081
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::STRING_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
9082
            if (! context->expecting_no_field_result())
9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
              context->expect_collation(item->collation.collation);
            }
            else 
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
              // Check that we are comparing with a field with same collation
              if (!context->expecting_collation(item->collation.collation))
              {
                DBUG_PRINT("info", ("Found non-matching collation %s",  
                                    item->collation.collation->name));
                context->supported= FALSE;
              }
            }
          }
          else
            context->supported= FALSE;
          break;
9106
        case Item::INT_ITEM:
9107 9108
          DBUG_PRINT("info", ("INT_ITEM"));
          if (context->expecting(Item::INT_ITEM)) 
9109
          {
9110 9111 9112 9113 9114
            Item_int *int_item= (Item_int *) item;      
            DBUG_PRINT("info", ("value %d", int_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::INT_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
9115
            if (! context->expecting_no_field_result()) 
9116 9117 9118 9119
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(INT_RESULT);
9120 9121
              context->expect_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
9122 9123 9124 9125 9126 9127 9128
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
9129 9130
          }
          else
9131 9132
            context->supported= FALSE;
          break;
9133
        case Item::REAL_ITEM:
9134 9135
          DBUG_PRINT("info", ("REAL_ITEM %s"));
          if (context->expecting(Item::REAL_ITEM)) 
9136
          {
9137 9138 9139 9140 9141
            Item_float *float_item= (Item_float *) item;      
            DBUG_PRINT("info", ("value %f", float_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::REAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
9142
            if (! context->expecting_no_field_result()) 
9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
9154
          }
9155 9156 9157
          else
            context->supported= FALSE;
          break;
9158
        case Item::VARBIN_ITEM:
9159 9160
          DBUG_PRINT("info", ("VARBIN_ITEM"));
          if (context->expecting(Item::VARBIN_ITEM)) 
9161
          {
9162 9163 9164
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::VARBIN_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
9165
            if (! context->expecting_no_field_result())
9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
9177 9178
          }
          else
9179 9180
            context->supported= FALSE;
          break;
9181
        case Item::DECIMAL_ITEM:
9182 9183
          DBUG_PRINT("info", ("DECIMAL_ITEM %s"));
          if (context->expecting(Item::DECIMAL_ITEM)) 
9184
          {
9185 9186 9187 9188 9189
            Item_decimal *decimal_item= (Item_decimal *) item;      
            DBUG_PRINT("info", ("value %f", decimal_item->val_real()));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::DECIMAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
9190
            if (! context->expecting_no_field_result()) 
9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
9203
          }
9204 9205 9206
          else
            context->supported= FALSE;
          break;
9207 9208
        case Item::COND_ITEM:
        {
9209 9210 9211
          Item_cond *cond_item= (Item_cond *) item;
          
          if (context->expecting(Item::COND_ITEM))
9212 9213 9214
          {
            switch (cond_item->functype()) {
            case Item_func::COND_AND_FUNC:
9215 9216 9217 9218
              DBUG_PRINT("info", ("COND_AND_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
9219
            case Item_func::COND_OR_FUNC:
9220 9221 9222 9223 9224 9225 9226 9227 9228
              DBUG_PRINT("info", ("COND_OR_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
            default:
              DBUG_PRINT("info", ("COND_ITEM %d", cond_item->functype()));
              context->supported= FALSE;
              break;
            }
9229
          }
9230
          else
9231 9232
          {
            /* Did not expect condition */
9233
            context->supported= FALSE;          
9234
          }
9235
          break;
9236
        }
9237 9238
        default:
        {
9239
          DBUG_PRINT("info", ("Found item of type %d", item->type()));
9240
          context->supported= FALSE;
9241 9242
        }
        }
9243
      }
9244 9245 9246 9247 9248 9249 9250 9251 9252 9253
      if (context->supported && context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        if (rewrite_context->count == 
            rewrite_context->func_item->argument_count())
        {
          // Rewrite is done, wrap an END() at the en
          DBUG_PRINT("info", ("End of condition group"));
          prev_cond= curr_cond;
          curr_cond= context->cond_ptr= new Ndb_cond();
9254
          curr_cond->prev= prev_cond;
9255 9256 9257
          prev_cond->next= curr_cond;
          curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
          // Pop rewrite stack
9258 9259 9260
          context->rewrite_stack=  rewrite_context->next;
          rewrite_context->next= NULL;
          delete(rewrite_context);
9261
        }
9262
      }
9263
    }
9264
  }
9265
 
9266 9267 9268 9269 9270 9271 9272 9273
  DBUG_VOID_RETURN;
}

bool
ha_ndbcluster::serialize_cond(const COND *cond, Ndb_cond_stack *ndb_cond)
{
  DBUG_ENTER("serialize_cond");
  Item *item= (Item *) cond;
9274
  Ndb_cond_traverse_context context(table, (void *)m_table, ndb_cond);
9275 9276 9277
  // Expect a logical expression
  context.expect(Item::FUNC_ITEM);
  context.expect(Item::COND_ITEM);
9278
  item->traverse_cond(&ndb_serialize_cond, (void *) &context, Item::PREFIX);
9279
  DBUG_PRINT("info", ("The pushed condition is %ssupported", (context.supported)?"":"not "));
9280

9281
  DBUG_RETURN(context.supported);
9282 9283
}

9284 9285
int
ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, 
9286 9287
                                           NdbScanFilter *filter,
                                           bool negated)
9288 9289
{
  DBUG_ENTER("build_scan_filter_predicate");  
9290 9291 9292
  switch (cond->ndb_item->type) {
  case NDB_FUNCTION:
  {
9293 9294 9295
    if (!cond->next)
      break;
    Ndb_item *a= cond->next->ndb_item;
9296
    Ndb_item *b, *field, *value= NULL;
9297 9298
    LINT_INIT(field);

9299 9300
    switch (cond->ndb_item->argument_count()) {
    case 1:
9301 9302 9303
      field= 
        (a->type == NDB_FIELD)? a : NULL;
      break;
9304
    case 2:
9305
      if (!cond->next->next)
9306
        break;
9307 9308
      b= cond->next->next->ndb_item;
      value= 
9309 9310 9311
        (a->type == NDB_VALUE)? a
        : (b->type == NDB_VALUE)? b
        : NULL;
9312
      field= 
9313 9314 9315
        (a->type == NDB_FIELD)? a
        : (b->type == NDB_FIELD)? b
        : NULL;
9316
      break;
9317
    default:
9318 9319
      field= NULL; //Keep compiler happy
      DBUG_ASSERT(0);
9320 9321
      break;
    }
9322 9323 9324
    switch ((negated) ? 
            Ndb_item::negate(cond->ndb_item->qualification.function_type)
            : cond->ndb_item->qualification.function_type) {
9325
    case NDB_EQ_FUNC:
9326
    {
9327
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9328 9329
      // Save value in right format for the field type
      value->save_in_field(field);
9330
      DBUG_PRINT("info", ("Generating EQ filter"));
9331
      if (filter->cmp(NdbScanFilter::COND_EQ, 
9332 9333 9334 9335
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
9336 9337
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9338
    }
9339
    case NDB_NE_FUNC:
9340
    {
9341
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9342 9343
      // Save value in right format for the field type
      value->save_in_field(field);
9344
      DBUG_PRINT("info", ("Generating NE filter"));
9345
      if (filter->cmp(NdbScanFilter::COND_NE, 
9346 9347 9348 9349
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
9350 9351
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9352
    }
9353
    case NDB_LT_FUNC:
9354
    {
9355
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9356 9357
      // Save value in right format for the field type
      value->save_in_field(field);
9358
      if (a == field)
9359
      {
9360 9361 9362 9363 9364 9365
        DBUG_PRINT("info", ("Generating LT filter")); 
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9366
      }
9367
      else
9368
      {
9369 9370 9371 9372 9373 9374
        DBUG_PRINT("info", ("Generating GT filter")); 
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9375
      }
9376 9377
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9378
    }
9379
    case NDB_LE_FUNC:
9380
    {
9381
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9382 9383
      // Save value in right format for the field type
      value->save_in_field(field);
9384
      if (a == field)
9385
      {
9386 9387 9388 9389 9390 9391
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);       
9392
      }
9393
      else
9394
      {
9395 9396 9397 9398 9399 9400
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9401
      }
9402 9403
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9404
    }
9405
    case NDB_GE_FUNC:
9406
    {
9407
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9408 9409
      // Save value in right format for the field type
      value->save_in_field(field);
9410
      if (a == field)
9411
      {
9412 9413 9414 9415 9416 9417
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9418
      }
9419
      else
9420
      {
9421 9422 9423 9424 9425 9426
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9427
      }
9428 9429
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9430
    }
9431
    case NDB_GT_FUNC:
9432
    {
9433
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9434 9435
      // Save value in right format for the field type
      value->save_in_field(field);
9436
      if (a == field)
9437
      {
9438 9439 9440 9441 9442 9443
        DBUG_PRINT("info", ("Generating GT filter"));
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9444
      }
9445
      else
9446
      {
9447 9448 9449 9450 9451 9452
        DBUG_PRINT("info", ("Generating LT filter"));
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
9453
      }
9454 9455
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9456
    }
9457
    case NDB_LIKE_FUNC:
9458
    {
9459
      if (!value || !field) break;
9460 9461 9462
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
          break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9463 9464 9465
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating LIKE filter: like(%d,%s,%d)", 
9466 9467 9468 9469
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_LIKE, 
                      field->get_field_no(),
9470 9471
                      value->get_val(),
                      value->pack_length()) == -1)
9472
        DBUG_RETURN(1);
9473 9474
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9475
    }
9476
    case NDB_NOTLIKE_FUNC:
9477
    {
9478
      if (!value || !field) break;
9479 9480
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
9481
          break;
mskold@mysql.com's avatar
mskold@mysql.com committed
9482 9483 9484
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating NOTLIKE filter: notlike(%d,%s,%d)", 
9485 9486 9487 9488
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_NOT_LIKE, 
                      field->get_field_no(),
9489 9490
                      value->get_val(),
                      value->pack_length()) == -1)
9491
        DBUG_RETURN(1);
9492 9493
      cond= cond->next->next->next;
      DBUG_RETURN(0);
9494
    }
9495
    case NDB_ISNULL_FUNC:
9496 9497 9498 9499 9500
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNULL filter"));
      if (filter->isnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);
9501 9502
      cond= cond->next->next;
      DBUG_RETURN(0);
9503
    case NDB_ISNOTNULL_FUNC:
9504
    {
9505 9506 9507 9508 9509
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNOTNULL filter"));
      if (filter->isnotnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);         
9510 9511
      cond= cond->next->next;
      DBUG_RETURN(0);
9512 9513 9514 9515 9516 9517 9518 9519 9520 9521
    }
    default:
      break;
    }
    break;
  }
  default:
    break;
  }
  DBUG_PRINT("info", ("Found illegal condition"));
9522
  DBUG_RETURN(1);
9523 9524
}

9525
int
9526
ha_ndbcluster::build_scan_filter_group(Ndb_cond* &cond, NdbScanFilter *filter)
9527
{
9528
  uint level=0;
9529
  bool negated= FALSE;
9530
  DBUG_ENTER("build_scan_filter_group");
9531

9532 9533
  do
  {
9534 9535 9536 9537 9538 9539
    if (!cond)
      DBUG_RETURN(1);
    switch (cond->ndb_item->type) {
    case NDB_FUNCTION:
    {
      switch (cond->ndb_item->qualification.function_type) {
9540
      case NDB_COND_AND_FUNC:
9541
      {
9542 9543 9544 9545 9546
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NAND":"AND",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NAND)
            : filter->begin(NdbScanFilter::AND) == -1)
9547
          DBUG_RETURN(1);
9548
        negated= FALSE;
9549 9550 9551
        cond= cond->next;
        break;
      }
9552
      case NDB_COND_OR_FUNC:
9553
      {
9554 9555 9556 9557 9558 9559
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NOR":"OR",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NOR)
            : filter->begin(NdbScanFilter::OR) == -1)
          DBUG_RETURN(1);
9560
        negated= FALSE;
9561 9562 9563
        cond= cond->next;
        break;
      }
9564
      case NDB_NOT_FUNC:
9565
      {
9566
        DBUG_PRINT("info", ("Generating negated query"));
9567
        cond= cond->next;
9568
        negated= TRUE;
9569 9570 9571 9572
        break;
      }
      default:
        if (build_scan_filter_predicate(cond, filter, negated))
9573
          DBUG_RETURN(1);
9574
        negated= FALSE;
9575 9576 9577
        break;
      }
      break;
9578 9579
    }
    case NDB_END_COND:
9580 9581
      DBUG_PRINT("info", ("End of group %u", level));
      level--;
9582 9583
      if (cond) cond= cond->next;
      if (filter->end() == -1)
9584
        DBUG_RETURN(1);
9585 9586 9587
      if (!negated)
        break;
      // else fall through (NOT END is an illegal condition)
9588 9589
    default:
    {
9590
      DBUG_PRINT("info", ("Illegal scan filter"));
9591
    }
9592
    }
9593
  }  while (level > 0 || negated);
9594
  
9595
  DBUG_RETURN(0);
9596 9597
}

9598 9599
int
ha_ndbcluster::build_scan_filter(Ndb_cond * &cond, NdbScanFilter *filter)
9600 9601 9602 9603
{
  bool simple_cond= TRUE;
  DBUG_ENTER("build_scan_filter");  

9604 9605 9606
    switch (cond->ndb_item->type) {
    case NDB_FUNCTION:
      switch (cond->ndb_item->qualification.function_type) {
9607 9608
      case NDB_COND_AND_FUNC:
      case NDB_COND_OR_FUNC:
9609 9610 9611 9612 9613 9614 9615 9616 9617
        simple_cond= FALSE;
        break;
      default:
        break;
      }
      break;
    default:
      break;
    }
9618 9619 9620 9621 9622 9623
  if (simple_cond && filter->begin() == -1)
    DBUG_RETURN(1);
  if (build_scan_filter_group(cond, filter))
    DBUG_RETURN(1);
  if (simple_cond && filter->end() == -1)
    DBUG_RETURN(1);
9624

9625
  DBUG_RETURN(0);
9626 9627
}

9628
int
9629
ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack,
9630
                                    NdbScanOperation *op)
9631 9632 9633 9634
{
  DBUG_ENTER("generate_scan_filter");
  if (ndb_cond_stack)
  {
9635
    DBUG_PRINT("info", ("Generating scan filter"));
9636 9637 9638 9639 9640
    NdbScanFilter filter(op);
    bool multiple_cond= FALSE;
    // Wrap an AND group around multiple conditions
    if (ndb_cond_stack->next) {
      multiple_cond= TRUE;
9641
      if (filter.begin() == -1)
9642
        DBUG_RETURN(1); 
9643 9644
    }
    for (Ndb_cond_stack *stack= ndb_cond_stack; 
9645 9646
         (stack); 
         stack= stack->next)
9647
      {
9648
        Ndb_cond *cond= stack->ndb_cond;
9649

9650 9651 9652 9653 9654
        if (build_scan_filter(cond, &filter))
        {
          DBUG_PRINT("info", ("build_scan_filter failed"));
          DBUG_RETURN(1);
        }
9655
      }
9656 9657
    if (multiple_cond && filter.end() == -1)
      DBUG_RETURN(1);
9658 9659 9660 9661 9662 9663
  }
  else
  {  
    DBUG_PRINT("info", ("Empty stack"));
  }

9664
  DBUG_RETURN(0);
9665 9666
}

9667 9668 9669
/*
  get table space info for SHOW CREATE TABLE
*/
9670
char* ha_ndbcluster::get_tablespace_name(THD *thd)
9671
{
9672
  Ndb *ndb= check_ndb_in_thd(thd);
9673
  NDBDICT *ndbdict= ndb->getDictionary();
9674 9675
  NdbError ndberr;
  Uint32 id;
9676
  ndb->setDatabaseName(m_dbname);
9677 9678
  const NDBTAB *ndbtab= m_table;
  DBUG_ASSERT(ndbtab != NULL);
9679 9680
  if (!ndbtab->getTablespace(&id))
  {
9681
    return 0;
9682 9683 9684 9685
  }
  {
    NdbDictionary::Tablespace ts= ndbdict->getTablespace(id);
    ndberr= ndbdict->getNdbError();
9686
    if(ndberr.classification != NdbError::NoError)
9687 9688 9689 9690 9691
      goto err;
    return (my_strdup(ts.getName(), MYF(0)));
  }
err:
  if (ndberr.status == NdbError::TemporaryError)
9692
    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
9693 9694 9695
			ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
			ndberr.code, ndberr.message, "NDB");
  else
9696
    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
9697 9698
			ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
			ndberr.code, ndberr.message, "NDB");
9699 9700 9701
  return 0;
}

9702 9703 9704
/*
  Implements the SHOW NDB STATUS command.
*/
9705 9706 9707
bool
ndbcluster_show_status(THD* thd, stat_print_fn *stat_print,
                       enum ha_stat_type stat_type)
9708
{
9709
  char buf[IO_SIZE];
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9710
  uint buflen;
9711 9712 9713 9714
  DBUG_ENTER("ndbcluster_show_status");
  
  if (have_ndbcluster != SHOW_OPTION_YES) 
  {
9715 9716 9717 9718 9719
    DBUG_RETURN(FALSE);
  }
  if (stat_type != HA_ENGINE_STATUS)
  {
    DBUG_RETURN(FALSE);
9720
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9721 9722 9723 9724 9725 9726 9727

  update_status_variables(g_ndb_cluster_connection);
  buflen=
    my_snprintf(buf, sizeof(buf),
                "cluster_node_id=%u, "
                "connected_host=%s, "
                "connected_port=%u, "
9728 9729 9730
                "number_of_storage_nodes=%u, "
                "number_of_ready_storage_nodes=%u, "
                "connect_count=%u",
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9731 9732 9733
                ndb_cluster_node_id,
                ndb_connected_host,
                ndb_connected_port,
9734 9735 9736
                ndb_number_of_storage_nodes,
                ndb_number_of_ready_storage_nodes,
                ndb_connect_count);
9737 9738
  if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                 STRING_WITH_LEN("connection"), buf, buflen))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9739 9740
    DBUG_RETURN(TRUE);

9741
  if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb)
9742
  {
9743
    Ndb* ndb= (get_thd_ndb(thd))->ndb;
9744 9745
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
9746 9747
    while (ndb->get_free_list_usage(&tmp))
    {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9748
      buflen=
9749
        my_snprintf(buf, sizeof(buf),
9750 9751
                  "created=%u, free=%u, sizeof=%u",
                  tmp.m_created, tmp.m_free, tmp.m_sizeof);
9752
      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
9753
                     tmp.m_name, strlen(tmp.m_name), buf, buflen))
9754
        DBUG_RETURN(TRUE);
9755 9756
    }
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9757 9758 9759 9760
#ifdef HAVE_NDB_BINLOG
  ndbcluster_show_status_binlog(thd, stat_print, stat_type);
#endif

9761 9762
  DBUG_RETURN(FALSE);
}
9763

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9764

9765 9766 9767
/*
  Create a table in NDB Cluster
 */
9768
static uint get_no_fragments(ulonglong max_rows)
9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805
{
#if MYSQL_VERSION_ID >= 50000
  uint acc_row_size= 25 + /*safety margin*/ 2;
#else
  uint acc_row_size= pk_length*4;
  /* add acc overhead */
  if (pk_length <= 8)  /* main page will set the limit */
    acc_row_size+= 25 + /*safety margin*/ 2;
  else                /* overflow page will set the limit */
    acc_row_size+= 4 + /*safety margin*/ 4;
#endif
  ulonglong acc_fragment_size= 512*1024*1024;
#if MYSQL_VERSION_ID >= 50100
  return (max_rows*acc_row_size)/acc_fragment_size+1;
#else
  return ((max_rows*acc_row_size)/acc_fragment_size+1
	  +1/*correct rounding*/)/2;
#endif
}


/*
  Routine to adjust default number of partitions to always be a multiple
  of number of nodes and never more than 4 times the number of nodes.

*/
static bool adjusted_frag_count(uint no_fragments, uint no_nodes,
                                uint &reported_frags)
{
  uint i= 0;
  reported_frags= no_nodes;
  while (reported_frags < no_fragments && ++i < 4 &&
         (reported_frags + no_nodes) < MAX_PARTITIONS) 
    reported_frags+= no_nodes;
  return (reported_frags < no_fragments);
}

9806
int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *info)
9807
{
9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818
  ha_rows max_rows, min_rows;
  if (info)
  {
    max_rows= info->max_rows;
    min_rows= info->min_rows;
  }
  else
  {
    max_rows= table_share->max_rows;
    min_rows= table_share->min_rows;
  }
9819
  uint reported_frags;
9820 9821
  uint no_fragments=
    get_no_fragments(max_rows >= min_rows ? max_rows : min_rows);
9822
  uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
9823 9824 9825 9826 9827 9828
  if (adjusted_frag_count(no_fragments, no_nodes, reported_frags))
  {
    push_warning(current_thd,
                 MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
    "Ndb might have problems storing the max amount of rows specified");
  }
9829 9830 9831 9832
  return (int)reported_frags;
}


9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874
/*
  Set-up auto-partitioning for NDB Cluster

  SYNOPSIS
    set_auto_partitions()
    part_info                  Partition info struct to set-up
 
  RETURN VALUE
    NONE

  DESCRIPTION
    Set-up auto partitioning scheme for tables that didn't define any
    partitioning. We'll use PARTITION BY KEY() in this case which
    translates into partition by primary key if a primary key exists
    and partition by hidden key otherwise.
*/

void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
{
  DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
  part_info->list_of_part_fields= TRUE;
  part_info->part_type= HASH_PARTITION;
  switch (opt_ndb_distribution_id)
  {
  case ND_KEYHASH:
    part_info->linear_hash_ind= FALSE;
    break;
  case ND_LINHASH:
    part_info->linear_hash_ind= TRUE;
    break;
  }
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::set_range_data(void *tab_ref, partition_info *part_info)
{
  NDBTAB *tab= (NDBTAB*)tab_ref;
  int32 *range_data= (int32*)my_malloc(part_info->no_parts*sizeof(int32),
                                       MYF(0));
  uint i;
  int error= 0;
9875
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
9876 9877 9878 9879 9880 9881 9882 9883 9884 9885
  DBUG_ENTER("set_range_data");

  if (!range_data)
  {
    mem_alloc_error(part_info->no_parts*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (i= 0; i < part_info->no_parts; i++)
  {
    longlong range_val= part_info->range_int_array[i];
9886 9887
    if (unsigned_flag)
      range_val-= 0x8000000000000000ULL;
9888
    if (range_val < INT_MIN32 || range_val >= INT_MAX32)
9889
    {
9890 9891 9892 9893 9894 9895 9896 9897
      if ((i != part_info->no_parts - 1) ||
          (range_val != LONGLONG_MAX))
      {
        my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
        error= 1;
        goto error;
      }
      range_val= INT_MAX32;
9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913
    }
    range_data[i]= (int32)range_val;
  }
  tab->setRangeListData(range_data, sizeof(int32)*part_info->no_parts);
error:
  my_free((char*)range_data, MYF(0));
  DBUG_RETURN(error);
}

int ha_ndbcluster::set_list_data(void *tab_ref, partition_info *part_info)
{
  NDBTAB *tab= (NDBTAB*)tab_ref;
  int32 *list_data= (int32*)my_malloc(part_info->no_list_values * 2
                                      * sizeof(int32), MYF(0));
  uint32 *part_id, i;
  int error= 0;
9914
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925
  DBUG_ENTER("set_list_data");

  if (!list_data)
  {
    mem_alloc_error(part_info->no_list_values*2*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (i= 0; i < part_info->no_list_values; i++)
  {
    LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
    longlong list_val= list_entry->list_value;
9926 9927
    if (unsigned_flag)
      list_val-= 0x8000000000000000ULL;
9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943
    if (list_val < INT_MIN32 || list_val > INT_MAX32)
    {
      my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
      error= 1;
      goto error;
    }
    list_data[2*i]= (int32)list_val;
    part_id= (uint32*)&list_data[2*i+1];
    *part_id= list_entry->partition_id;
  }
  tab->setRangeListData(list_data, 2*sizeof(int32)*part_info->no_list_values);
error:
  my_free((char*)list_data, MYF(0));
  DBUG_RETURN(error);
}

9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960
/*
  User defined partitioning set-up. We need to check how many fragments the
  user wants defined and which node groups to put those into. Later we also
  want to attach those partitions to a tablespace.

  All the functionality of the partition function, partition limits and so
  forth are entirely handled by the MySQL Server. There is one exception to
  this rule for PARTITION BY KEY where NDB handles the hash function and
  this type can thus be handled transparently also by NDB API program.
  For RANGE, HASH and LIST and subpartitioning the NDB API programs must
  implement the function to map to a partition.
*/

uint ha_ndbcluster::set_up_partition_info(partition_info *part_info,
                                          TABLE *table,
                                          void *tab_par)
{
9961 9962 9963
  uint16 frag_data[MAX_PARTITIONS];
  char *ts_names[MAX_PARTITIONS];
  ulong ts_index= 0, fd_index= 0, i, j;
9964 9965 9966
  NDBTAB *tab= (NDBTAB*)tab_par;
  NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
  partition_element *part_elem;
9967 9968 9969 9970 9971 9972
  bool first= TRUE;
  uint ts_id, ts_version, part_count= 0, tot_ts_name_len;
  List_iterator<partition_element> part_it(part_info->partitions);
  int error;
  char *name_ptr;
  DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985

  if (part_info->part_type == HASH_PARTITION &&
      part_info->list_of_part_fields == TRUE)
  {
    Field **fields= part_info->part_field_array;

    if (part_info->linear_hash_ind)
      ftype= NDBTAB::DistrKeyLin;
    else
      ftype= NDBTAB::DistrKeyHash;

    for (i= 0; i < part_info->part_field_list.elements; i++)
    {
9986
      NDBCOL *col= tab->getColumn(fields[i]->field_index);
9987 9988 9989 9990
      DBUG_PRINT("info",("setting dist key on %s", col->getName()));
      col->setPartitionKey(TRUE);
    }
  }
9991
  else 
9992
  {
9993 9994 9995 9996 9997 9998 9999 10000
    if (!current_thd->variables.new_mode)
    {
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          ER(ER_ILLEGAL_HA_CREATE_OPTION),
                          ndbcluster_hton_name,
                          "LIST, RANGE and HASH partition disabled by default,"
                          " use --new option to enable");
10001
      DBUG_RETURN(HA_ERR_UNSUPPORTED);
10002 10003
    }
   /*
10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019
      Create a shadow field for those tables that have user defined
      partitioning. This field stores the value of the partition
      function such that NDB can handle reorganisations of the data
      even when the MySQL Server isn't available to assist with
      calculation of the partition function value.
    */
    NDBCOL col;
    DBUG_PRINT("info", ("Generating partition func value field"));
    col.setName("$PART_FUNC_VALUE");
    col.setType(NdbDictionary::Column::Int);
    col.setLength(1);
    col.setNullable(FALSE);
    col.setPrimaryKey(FALSE);
    col.setAutoIncrement(FALSE);
    tab->addColumn(col);
    if (part_info->part_type == RANGE_PARTITION)
10020
    {
10021 10022 10023 10024
      if ((error= set_range_data((void*)tab, part_info)))
      {
        DBUG_RETURN(error);
      }
10025
    }
10026
    else if (part_info->part_type == LIST_PARTITION)
10027
    {
10028 10029 10030 10031
      if ((error= set_list_data((void*)tab, part_info)))
      {
        DBUG_RETURN(error);
      }
10032 10033 10034
    }
  }
  tab->setFragmentType(ftype);
10035 10036 10037
  i= 0;
  tot_ts_name_len= 0;
  do
10038
  {
10039 10040
    uint ng;
    part_elem= part_it++;
10041
    if (!part_info->is_sub_partitioned())
10042
    {
10043 10044 10045 10046 10047
      ng= part_elem->nodegroup_id;
      if (first && ng == UNDEF_NODEGROUP)
        ng= 0;
      ts_names[fd_index]= part_elem->tablespace_name;
      frag_data[fd_index++]= ng;
10048
    }
10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065
    else
    {
      List_iterator<partition_element> sub_it(part_elem->subpartitions);
      j= 0;
      do
      {
        part_elem= sub_it++;
        ng= part_elem->nodegroup_id;
        if (first && ng == UNDEF_NODEGROUP)
          ng= 0;
        ts_names[fd_index]= part_elem->tablespace_name;
        frag_data[fd_index++]= ng;
      } while (++j < part_info->no_subparts);
    }
    first= FALSE;
  } while (++i < part_info->no_parts);
  tab->setDefaultNoPartitionsFlag(part_info->use_default_no_partitions);
10066
  tab->setLinearFlag(part_info->linear_hash_ind);
10067
  {
10068 10069
    ha_rows max_rows= table_share->max_rows;
    ha_rows min_rows= table_share->min_rows;
10070 10071 10072 10073 10074
    if (max_rows < min_rows)
      max_rows= min_rows;
    if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
    {
      tab->setMaxRows(max_rows);
10075
      tab->setMinRows(min_rows);
10076 10077
    }
  }
10078 10079 10080 10081
  tab->setTablespaceNames(ts_names, fd_index*sizeof(char*));
  tab->setFragmentCount(fd_index);
  tab->setFragmentData(&frag_data, fd_index*2);
  DBUG_RETURN(0);
10082
}
10083

10084

10085 10086 10087
bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info,
					       uint table_changes)
{
10088 10089 10090
  DBUG_ENTER("ha_ndbcluster::check_if_incompatible_data");
  uint i;
  const NDBTAB *tab= (const NDBTAB *) m_table;
marty@linux.site's avatar
marty@linux.site committed
10091

10092 10093 10094 10095 10096 10097
  if (current_thd->variables.ndb_use_copying_alter_table)
  {
    DBUG_PRINT("info", ("On-line alter table disabled"));
    DBUG_RETURN(COMPATIBLE_DATA_NO);
  }

10098 10099 10100
  for (i= 0; i < table->s->fields; i++) 
  {
    Field *field= table->field[i];
10101 10102
    const NDBCOL *col= tab->getColumn(i);
    if (field->flags & FIELD_IS_RENAMED)
10103 10104 10105 10106
    {
      DBUG_PRINT("info", ("Field has been renamed, copy table"));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
10107
    if ((field->flags & FIELD_IN_ADD_INDEX) &&
10108 10109 10110 10111 10112 10113
        col->getStorageType() == NdbDictionary::Column::StorageTypeDisk)
    {
      DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
  }
10114
  if (table_changes != IS_EQUAL_YES)
10115
    DBUG_RETURN(COMPATIBLE_DATA_NO);
10116 10117 10118 10119
  
  /* Check that auto_increment value was not changed */
  if ((info->used_fields & HA_CREATE_USED_AUTO) &&
      info->auto_increment_value != 0)
10120
    DBUG_RETURN(COMPATIBLE_DATA_NO);
10121 10122 10123 10124
  
  /* Check that row format didn't change */
  if ((info->used_fields & HA_CREATE_USED_AUTO) &&
      get_row_type() != info->row_type)
10125
    DBUG_RETURN(COMPATIBLE_DATA_NO);
10126

10127
  DBUG_RETURN(COMPATIBLE_DATA_YES);
10128 10129
}

10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169
bool set_up_tablespace(st_alter_tablespace *info,
                       NdbDictionary::Tablespace *ndb_ts)
{
  ndb_ts->setName(info->tablespace_name);
  ndb_ts->setExtentSize(info->extent_size);
  ndb_ts->setDefaultLogfileGroup(info->logfile_group_name);
  return false;
}

bool set_up_datafile(st_alter_tablespace *info,
                     NdbDictionary::Datafile *ndb_df)
{
  if (info->max_size > 0)
  {
    my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
    return true;
  }
  ndb_df->setPath(info->data_file_name);
  ndb_df->setSize(info->initial_size);
  ndb_df->setTablespace(info->tablespace_name);
  return false;
}

bool set_up_logfile_group(st_alter_tablespace *info,
                          NdbDictionary::LogfileGroup *ndb_lg)
{
  ndb_lg->setName(info->logfile_group_name);
  ndb_lg->setUndoBufferSize(info->undo_buffer_size);
  return false;
}

bool set_up_undofile(st_alter_tablespace *info,
                     NdbDictionary::Undofile *ndb_uf)
{
  ndb_uf->setPath(info->undo_file_name);
  ndb_uf->setSize(info->initial_size);
  ndb_uf->setLogfileGroup(info->logfile_group_name);
  return false;
}

10170
int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info)
10171 10172
{
  DBUG_ENTER("ha_ndbcluster::alter_tablespace");
10173

10174
  int is_tablespace= 0;
10175 10176
  Ndb *ndb= check_ndb_in_thd(thd);
  if (ndb == NULL)
10177
  {
10178
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
10179
  }
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10180 10181

  NdbError err;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10182
  NDBDICT *dict= ndb->getDictionary();
10183 10184
  int error;
  const char * errmsg;
10185
  LINT_INIT(errmsg);
10186

10187 10188 10189
  switch (info->ts_cmd_type){
  case (CREATE_TABLESPACE):
  {
10190
    error= ER_CREATE_FILEGROUP_FAILED;
10191
    
10192 10193
    NdbDictionary::Tablespace ndb_ts;
    NdbDictionary::Datafile ndb_df;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10194
    NdbDictionary::ObjectId objid;
10195 10196 10197 10198 10199 10200 10201 10202
    if (set_up_tablespace(info, &ndb_ts))
    {
      DBUG_RETURN(1);
    }
    if (set_up_datafile(info, &ndb_df))
    {
      DBUG_RETURN(1);
    }
10203
    errmsg= "TABLESPACE";
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10204
    if (dict->createTablespace(ndb_ts, &objid))
10205 10206
    {
      DBUG_PRINT("error", ("createTablespace returned %d", error));
10207
      goto ndberror;
10208 10209
    }
    DBUG_PRINT("info", ("Successfully created Tablespace"));
10210 10211
    errmsg= "DATAFILE";
    if (dict->createDatafile(ndb_df))
10212
    {
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10213 10214 10215 10216 10217 10218 10219 10220 10221
      err= dict->getNdbError();
      NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropTablespace(tmp);
      }
      
10222
      DBUG_PRINT("error", ("createDatafile returned %d", error));
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10223
      goto ndberror2;
10224
    }
10225
    is_tablespace= 1;
10226 10227 10228 10229
    break;
  }
  case (ALTER_TABLESPACE):
  {
10230
    error= ER_ALTER_FILEGROUP_FAILED;
10231 10232 10233 10234 10235 10236 10237
    if (info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
    {
      NdbDictionary::Datafile ndb_df;
      if (set_up_datafile(info, &ndb_df))
      {
	DBUG_RETURN(1);
      }
10238 10239
      errmsg= " CREATE DATAFILE";
      if (dict->createDatafile(ndb_df))
10240
      {
10241
	goto ndberror;
10242 10243 10244 10245
      }
    }
    else if(info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
    {
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10246 10247 10248 10249 10250 10251
      NdbDictionary::Tablespace ts= dict->getTablespace(info->tablespace_name);
      NdbDictionary::Datafile df= dict->getDatafile(0, info->data_file_name);
      NdbDictionary::ObjectId objid;
      df.getTablespaceId(&objid);
      if (ts.getObjectId() == objid.getObjectId() && 
	  strcmp(df.getPath(), info->data_file_name) == 0)
10252
      {
10253 10254
	errmsg= " DROP DATAFILE";
	if (dict->dropDatafile(df))
10255
	{
10256
	  goto ndberror;
10257 10258 10259 10260 10261
	}
      }
      else
      {
	DBUG_PRINT("error", ("No such datafile"));
10262
	my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
10263 10264 10265 10266 10267 10268 10269 10270 10271
	DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("error", ("Unsupported alter tablespace: %d", 
			   info->ts_alter_tablespace_type));
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
10272
    is_tablespace= 1;
10273 10274 10275 10276
    break;
  }
  case (CREATE_LOGFILE_GROUP):
  {
10277
    error= ER_CREATE_FILEGROUP_FAILED;
10278 10279
    NdbDictionary::LogfileGroup ndb_lg;
    NdbDictionary::Undofile ndb_uf;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10280
    NdbDictionary::ObjectId objid;
10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291
    if (info->undo_file_name == NULL)
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    if (set_up_logfile_group(info, &ndb_lg))
    {
      DBUG_RETURN(1);
    }
10292
    errmsg= "LOGFILE GROUP";
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10293
    if (dict->createLogfileGroup(ndb_lg, &objid))
10294
    {
10295
      goto ndberror;
10296 10297 10298 10299 10300 10301
    }
    DBUG_PRINT("info", ("Successfully created Logfile Group"));
    if (set_up_undofile(info, &ndb_uf))
    {
      DBUG_RETURN(1);
    }
10302 10303
    errmsg= "UNDOFILE";
    if (dict->createUndofile(ndb_uf))
10304
    {
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10305 10306 10307 10308 10309 10310 10311 10312 10313
      err= dict->getNdbError();
      NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropLogfileGroup(tmp);
      }
      goto ndberror2;
10314 10315 10316 10317 10318
    }
    break;
  }
  case (ALTER_LOGFILE_GROUP):
  {
10319
    error= ER_ALTER_FILEGROUP_FAILED;
10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331
    if (info->undo_file_name == NULL)
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    NdbDictionary::Undofile ndb_uf;
    if (set_up_undofile(info, &ndb_uf))
    {
      DBUG_RETURN(1);
    }
10332 10333
    errmsg= "CREATE UNDOFILE";
    if (dict->createUndofile(ndb_uf))
10334
    {
10335
      goto ndberror;
10336 10337 10338 10339 10340
    }
    break;
  }
  case (DROP_TABLESPACE):
  {
10341
    error= ER_DROP_FILEGROUP_FAILED;
10342 10343
    errmsg= "TABLESPACE";
    if (dict->dropTablespace(dict->getTablespace(info->tablespace_name)))
10344
    {
10345
      goto ndberror;
10346
    }
10347
    is_tablespace= 1;
10348 10349 10350 10351
    break;
  }
  case (DROP_LOGFILE_GROUP):
  {
10352
    error= ER_DROP_FILEGROUP_FAILED;
10353 10354
    errmsg= "LOGFILE GROUP";
    if (dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name)))
10355
    {
10356
      goto ndberror;
10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372
    }
    break;
  }
  case (CHANGE_FILE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  case (ALTER_ACCESS_MODE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  default:
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  }
10373
#ifdef HAVE_NDB_BINLOG
10374 10375 10376 10377 10378
  if (is_tablespace)
    ndbcluster_log_schema_op(thd, 0,
                             thd->query, thd->query_length,
                             "", info->tablespace_name,
                             0, 0,
10379
                             SOT_TABLESPACE, 0, 0, 0);
10380 10381 10382 10383 10384
  else
    ndbcluster_log_schema_op(thd, 0,
                             thd->query, thd->query_length,
                             "", info->logfile_group_name,
                             0, 0,
10385
                             SOT_LOGFILE_GROUP, 0, 0, 0);
10386
#endif
10387
  DBUG_RETURN(FALSE);
10388 10389

ndberror:
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10390 10391
  err= dict->getNdbError();
ndberror2:
10392 10393 10394 10395 10396
  ERR_PRINT(err);
  ndb_to_mysql_error(&err);
  
  my_error(error, MYF(0), errmsg);
  DBUG_RETURN(1);
10397 10398
}

10399 10400 10401 10402 10403 10404 10405 10406

bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts)
{
  Ndb *ndb;
  NDBDICT *dict;
  const NDBTAB *tab;
  int err;
  DBUG_ENTER("ha_ndbcluster::get_no_parts");
10407
  LINT_INIT(err);
10408 10409 10410

  set_dbname(name);
  set_tabname(name);
10411
  for (;;)
10412 10413 10414 10415 10416 10417 10418
  {
    if (check_ndb_connection())
    {
      err= HA_ERR_NO_CONNECTION;
      break;
    }
    ndb= get_ndb();
10419
    ndb->setDatabaseName(m_dbname);
10420 10421
    Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
    if (!ndbtab_g.get_table())
10422
      ERR_BREAK(dict->getNdbError(), err);
10423
    *no_parts= ndbtab_g.get_table()->getFragmentCount();
10424
    DBUG_RETURN(FALSE);
10425
  }
10426 10427 10428 10429 10430

  print_error(err, MYF(0));
  DBUG_RETURN(TRUE);
}

10431 10432
static int ndbcluster_fill_files_table(THD *thd, TABLE_LIST *tables,
                                       COND *cond)
10433 10434
{
  TABLE* table= tables->table;
10435
  Ndb *ndb= check_ndb_in_thd(thd);
10436 10437
  NdbDictionary::Dictionary* dict= ndb->getDictionary();
  NdbDictionary::Dictionary::List dflist;
10438
  NdbError ndberr;
10439
  uint i;
10440
  DBUG_ENTER("ndbcluster_fill_files_table");
10441

10442 10443
  dict->listObjects(dflist, NdbDictionary::Object::Datafile);
  ndberr= dict->getNdbError();
10444 10445
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);
10446

10447
  for (i= 0; i < dflist.count; i++)
10448 10449 10450
  {
    NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
    Ndb_cluster_connection_node_iter iter;
10451 10452
    uint id;
    
10453 10454
    g_ndb_cluster_connection->init_get_next_node(iter);

10455
    while ((id= g_ndb_cluster_connection->get_next_node(iter)))
10456
    {
10457
      uint c= 0;
10458
      NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
10459
      ndberr= dict->getNdbError();
10460 10461 10462 10463 10464 10465
      if(ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10466 10467
      NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
      ndberr= dict->getNdbError();
10468 10469 10470 10471 10472 10473
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10474

10475
      table->field[c++]->set_null(); // FILE_ID
10476
      table->field[c]->set_notnull();
10477 10478
      table->field[c++]->store(elt.name, strlen(elt.name),
                               system_charset_info);
10479
      table->field[c]->set_notnull();
10480
      table->field[c++]->store("DATAFILE",8,system_charset_info);
10481
      table->field[c]->set_notnull();
10482
      table->field[c++]->store(df.getTablespace(), strlen(df.getTablespace()),
10483 10484 10485 10486 10487 10488
                               system_charset_info);
      table->field[c++]->set_null(); // TABLE_CATALOG
      table->field[c++]->set_null(); // TABLE_SCHEMA
      table->field[c++]->set_null(); // TABLE_NAME

      // LOGFILE_GROUP_NAME
10489
      table->field[c]->set_notnull();
10490 10491 10492 10493
      table->field[c++]->store(ts.getDefaultLogfileGroup(),
                               strlen(ts.getDefaultLogfileGroup()),
                               system_charset_info);
      table->field[c++]->set_null(); // LOGFILE_GROUP_NUMBER
10494
      table->field[c]->set_notnull();
10495 10496
      table->field[c++]->store(ndbcluster_hton_name,
                               ndbcluster_hton_name_length,
10497 10498 10499 10500 10501
                               system_charset_info); // ENGINE

      table->field[c++]->set_null(); // FULLTEXT_KEYS
      table->field[c++]->set_null(); // DELETED_ROWS
      table->field[c++]->set_null(); // UPDATE_COUNT
10502
      table->field[c]->set_notnull();
10503
      table->field[c++]->store(df.getFree() / ts.getExtentSize()); // FREE_EXTENTS
10504
      table->field[c]->set_notnull();
10505
      table->field[c++]->store(df.getSize() / ts.getExtentSize()); // TOTAL_EXTENTS
10506
      table->field[c]->set_notnull();
10507 10508
      table->field[c++]->store(ts.getExtentSize()); // EXTENT_SIZE

10509
      table->field[c]->set_notnull();
10510
      table->field[c++]->store(df.getSize()); // INITIAL_SIZE
10511
      table->field[c]->set_notnull();
10512 10513 10514 10515 10516 10517 10518 10519 10520
      table->field[c++]->store(df.getSize()); // MAXIMUM_SIZE
      table->field[c++]->set_null(); // AUTOEXTEND_SIZE

      table->field[c++]->set_null(); // CREATION_TIME
      table->field[c++]->set_null(); // LAST_UPDATE_TIME
      table->field[c++]->set_null(); // LAST_ACCESS_TIME
      table->field[c++]->set_null(); // RECOVER_TIME
      table->field[c++]->set_null(); // TRANSACTION_COUNTER

10521
      table->field[c]->set_notnull();
10522 10523
      table->field[c++]->store(df.getObjectVersion()); // VERSION

10524
      table->field[c]->set_notnull();
10525
      table->field[c++]->store("FIXED", 5, system_charset_info); // ROW_FORMAT
10526 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537

      table->field[c++]->set_null(); // TABLE_ROWS
      table->field[c++]->set_null(); // AVG_ROW_LENGTH
      table->field[c++]->set_null(); // DATA_LENGTH
      table->field[c++]->set_null(); // MAX_DATA_LENGTH
      table->field[c++]->set_null(); // INDEX_LENGTH
      table->field[c++]->set_null(); // DATA_FREE
      table->field[c++]->set_null(); // CREATE_TIME
      table->field[c++]->set_null(); // UPDATE_TIME
      table->field[c++]->set_null(); // CHECK_TIME
      table->field[c++]->set_null(); // CHECKSUM

10538
      table->field[c]->set_notnull();
10539
      table->field[c++]->store("NORMAL", 6, system_charset_info);
10540 10541

      char extra[30];
10542 10543
      int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
      table->field[c]->store(extra, len, system_charset_info);
10544
      table->field[c]->set_notnull();
10545 10546 10547 10548
      schema_table_store_record(thd, table);
    }
  }

jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10549 10550
  NdbDictionary::Dictionary::List uflist;
  dict->listObjects(uflist, NdbDictionary::Object::Undofile);
10551
  ndberr= dict->getNdbError();
10552 10553
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);
10554

jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10555
  for (i= 0; i < uflist.count; i++)
10556
  {
jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10557
    NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
10558 10559 10560 10561 10562
    Ndb_cluster_connection_node_iter iter;
    unsigned id;

    g_ndb_cluster_connection->init_get_next_node(iter);

10563
    while ((id= g_ndb_cluster_connection->get_next_node(iter)))
10564 10565
    {
      NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
10566
      ndberr= dict->getNdbError();
10567 10568 10569 10570 10571 10572
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10573 10574 10575
      NdbDictionary::LogfileGroup lfg=
        dict->getLogfileGroup(uf.getLogfileGroup());
      ndberr= dict->getNdbError();
10576 10577 10578 10579 10580 10581
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10582 10583

      int c= 0;
10584
      table->field[c++]->set_null(); // FILE_ID
10585
      table->field[c]->set_notnull();
10586 10587
      table->field[c++]->store(elt.name, strlen(elt.name),
                               system_charset_info);
10588
      table->field[c]->set_notnull();
10589
      table->field[c++]->store("UNDO LOG", 8, system_charset_info);
10590 10591 10592 10593 10594 10595
      table->field[c++]->set_null(); // TABLESPACE NAME
      table->field[c++]->set_null(); // TABLE_CATALOG
      table->field[c++]->set_null(); // TABLE_SCHEMA
      table->field[c++]->set_null(); // TABLE_NAME

      // LOGFILE_GROUP_NAME
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10596 10597
      NdbDictionary::ObjectId objid;
      uf.getLogfileGroupId(&objid);
10598
      table->field[c]->set_notnull();
10599 10600 10601
      table->field[c++]->store(uf.getLogfileGroup(),
                               strlen(uf.getLogfileGroup()),
                               system_charset_info);
10602
      table->field[c]->set_notnull();
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10603
      table->field[c++]->store(objid.getObjectId()); // LOGFILE_GROUP_NUMBER
10604
      table->field[c]->set_notnull();
10605 10606
      table->field[c++]->store(ndbcluster_hton_name,
                               ndbcluster_hton_name_length,
10607 10608 10609 10610 10611
                               system_charset_info); // ENGINE

      table->field[c++]->set_null(); // FULLTEXT_KEYS
      table->field[c++]->set_null(); // DELETED_ROWS
      table->field[c++]->set_null(); // UPDATE_COUNT
10612 10613
      table->field[c++]->set_null(); // FREE_EXTENTS
      table->field[c]->set_notnull();
10614
      table->field[c++]->store(uf.getSize()/4); // TOTAL_EXTENTS
10615
      table->field[c]->set_notnull();
10616 10617
      table->field[c++]->store(4); // EXTENT_SIZE

10618
      table->field[c]->set_notnull();
10619
      table->field[c++]->store(uf.getSize()); // INITIAL_SIZE
10620
      table->field[c]->set_notnull();
10621 10622 10623 10624 10625 10626 10627 10628 10629
      table->field[c++]->store(uf.getSize()); // MAXIMUM_SIZE
      table->field[c++]->set_null(); // AUTOEXTEND_SIZE

      table->field[c++]->set_null(); // CREATION_TIME
      table->field[c++]->set_null(); // LAST_UPDATE_TIME
      table->field[c++]->set_null(); // LAST_ACCESS_TIME
      table->field[c++]->set_null(); // RECOVER_TIME
      table->field[c++]->set_null(); // TRANSACTION_COUNTER

10630
      table->field[c]->set_notnull();
10631 10632 10633 10634 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645
      table->field[c++]->store(uf.getObjectVersion()); // VERSION

      table->field[c++]->set_null(); // ROW FORMAT

      table->field[c++]->set_null(); // TABLE_ROWS
      table->field[c++]->set_null(); // AVG_ROW_LENGTH
      table->field[c++]->set_null(); // DATA_LENGTH
      table->field[c++]->set_null(); // MAX_DATA_LENGTH
      table->field[c++]->set_null(); // INDEX_LENGTH
      table->field[c++]->set_null(); // DATA_FREE
      table->field[c++]->set_null(); // CREATE_TIME
      table->field[c++]->set_null(); // UPDATE_TIME
      table->field[c++]->set_null(); // CHECK_TIME
      table->field[c++]->set_null(); // CHECKSUM

10646
      table->field[c]->set_notnull();
10647
      table->field[c++]->store("NORMAL", 6, system_charset_info);
10648

10649 10650
      char extra[100];
      int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",id,lfg.getUndoBufferSize());
10651
      table->field[c]->set_notnull();
10652
      table->field[c]->store(extra, len, system_charset_info);
10653 10654 10655
      schema_table_store_record(thd, table);
    }
  }
10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742

  // now for LFGs
  NdbDictionary::Dictionary::List lfglist;
  dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < lfglist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];
    unsigned id;

    NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
    ndberr= dict->getNdbError();
    if (ndberr.classification != NdbError::NoError)
    {
      if (ndberr.classification == NdbError::SchemaError)
        continue;
      ERR_RETURN(ndberr);
    }

    int c= 0;
    table->field[c++]->set_null(); // FILE_ID
    table->field[c++]->set_null(); // name
    table->field[c]->set_notnull();
    table->field[c++]->store("UNDO LOG", 8, system_charset_info);
    table->field[c++]->set_null(); // TABLESPACE NAME
    table->field[c++]->set_null(); // TABLE_CATALOG
    table->field[c++]->set_null(); // TABLE_SCHEMA
    table->field[c++]->set_null(); // TABLE_NAME

    // LOGFILE_GROUP_NAME
    table->field[c]->set_notnull();
    table->field[c++]->store(elt.name, strlen(elt.name),
                             system_charset_info);
    table->field[c]->set_notnull();
    table->field[c++]->store(lfg.getObjectId()); // LOGFILE_GROUP_NUMBER
    table->field[c]->set_notnull();
    table->field[c++]->store(ndbcluster_hton_name,
                             ndbcluster_hton_name_length,
                             system_charset_info); // ENGINE

    table->field[c++]->set_null(); // FULLTEXT_KEYS
    table->field[c++]->set_null(); // DELETED_ROWS
    table->field[c++]->set_null(); // UPDATE_COUNT
    table->field[c]->set_notnull();
    table->field[c++]->store(lfg.getUndoFreeWords()); // FREE_EXTENTS
    table->field[c++]->set_null(); //store(uf.getSize()/4); // TOTAL_EXTENTS
    table->field[c]->set_notnull();
    table->field[c++]->store(4); // EXTENT_SIZE

    table->field[c++]->set_null();//store(uf.getSize()); // INITIAL_SIZE
    table->field[c++]->set_null(); //store(uf.getSize()); // MAXIMUM_SIZE
    table->field[c++]->set_null(); // AUTOEXTEND_SIZE

    table->field[c++]->set_null(); // CREATION_TIME
    table->field[c++]->set_null(); // LAST_UPDATE_TIME
    table->field[c++]->set_null(); // LAST_ACCESS_TIME
    table->field[c++]->set_null(); // RECOVER_TIME
    table->field[c++]->set_null(); // TRANSACTION_COUNTER

    table->field[c]->set_notnull();
    table->field[c++]->store(lfg.getObjectVersion()); // VERSION

    table->field[c++]->set_null(); // ROW FORMAT

    table->field[c++]->set_null(); // TABLE_ROWS
    table->field[c++]->set_null(); // AVG_ROW_LENGTH
    table->field[c++]->set_null(); // DATA_LENGTH
    table->field[c++]->set_null(); // MAX_DATA_LENGTH
    table->field[c++]->set_null(); // INDEX_LENGTH
    table->field[c++]->set_null(); // DATA_FREE
    table->field[c++]->set_null(); // CREATE_TIME
    table->field[c++]->set_null(); // UPDATE_TIME
    table->field[c++]->set_null(); // CHECK_TIME
    table->field[c++]->set_null(); // CHECKSUM

    table->field[c]->set_notnull();
    table->field[c++]->store("NORMAL", 6, system_charset_info);

    char extra[100];
    int len= my_snprintf(extra,sizeof(extra),"UNDO_BUFFER_SIZE=%lu",id,lfg.getUndoBufferSize());
    table->field[c]->set_notnull();
    table->field[c]->store(extra, len, system_charset_info);
    schema_table_store_record(thd, table);
  }
10743
  DBUG_RETURN(0);
10744
}
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10745

brian@zim.(none)'s avatar
brian@zim.(none) committed
10746 10747 10748 10749 10750
SHOW_VAR ndb_status_variables_export[]= {
  {"Ndb",                      (char*) &ndb_status_variables,   SHOW_ARRAY},
  {NullS, NullS, SHOW_LONG}
};

10751
struct st_mysql_storage_engine ndbcluster_storage_engine=
10752
{ MYSQL_HANDLERTON_INTERFACE_VERSION, ndbcluster_hton };
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10753 10754 10755 10756

mysql_declare_plugin(ndbcluster)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
10757
  &ndbcluster_storage_engine,
10758
  ndbcluster_hton_name,
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10759
  "MySQL AB",
10760 10761
  "Clustered, fault-tolerant tables",
  ndbcluster_init, /* Plugin Init */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10762 10763
  NULL, /* Plugin Deinit */
  0x0100 /* 1.0 */,
10764 10765 10766
  ndb_status_variables_export,/* status variables                */
  NULL,                       /* system variables                */
  NULL                        /* config options                  */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10767 10768 10769 10770
}
mysql_declare_plugin_end;

#endif