ha_ndbcluster.cc 293 KB
Newer Older
1
/* Copyright (C) 2000-2003 MySQL AB
2 3 4

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
5
  the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
14
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15 16 17 18 19 20 21
*/

/*
  This file defines the NDB Cluster handler: the interface between MySQL and
  NDB Cluster
*/

22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation				// gcc: Class implementation
24 25 26
#endif

#include "mysql_priv.h"
27
#include "rpl_mi.h"
28 29

#include <my_dir.h>
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
30
#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
31 32
#include "ha_ndbcluster.h"
#include <ndbapi/NdbApi.hpp>
33
#include "ha_ndbcluster_cond.h"
34
#include <../util/Bitmask.hpp>
35
#include <ndbapi/NdbIndexStat.hpp>
36

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
37
#include "ha_ndbcluster_binlog.h"
38
#include "ha_ndbcluster_tables.h"
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
39

acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
40 41
#include <mysql/plugin.h>

42 43 44 45 46
#ifdef ndb_dynamite
#undef assert
#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0)
#endif

47 48 49
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
extern const char *opt_ndbcluster_connectstring;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
50
extern ulong opt_ndb_cache_check_time;
51

52 53 54 55 56 57 58 59 60 61
// ndb interface initialization/cleanup
#ifdef  __cplusplus
extern "C" {
#endif
extern void ndb_init_internal();
extern void ndb_end_internal();
#ifdef  __cplusplus
}
#endif

62 63 64 65 66 67
const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS};
TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1,
                                    "", ndb_distribution_names, NULL };
const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH];
enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH;

68
// Default value for parallelism
69
static const int parallelism= 0;
70

71 72
// Default value for max number of transactions
// createable against NDB from this handler
tulin@dl145b.mysql.com's avatar
tulin@dl145b.mysql.com committed
73
static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used
74

75 76
static uint ndbcluster_partition_flags();
static uint ndbcluster_alter_table_flags(uint flags);
77
static int ndbcluster_init(void *);
78 79 80 81 82 83 84 85 86 87 88
static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
static bool ndbcluster_show_status(handlerton *hton, THD*,
                                   stat_print_fn *,
                                   enum ha_stat_type);
static int ndbcluster_alter_tablespace(handlerton *hton,
                                       THD* thd, 
                                       st_alter_tablespace *info);
static int ndbcluster_fill_files_table(handlerton *hton,
                                       THD *thd, 
                                       TABLE_LIST *tables, 
                                       COND *cond);
89

90
handlerton *ndbcluster_hton;
91

92 93
static handler *ndbcluster_create_handler(handlerton *hton,
                                          TABLE_SHARE *table,
94
                                          MEM_ROOT *mem_root)
95
{
96
  return new (mem_root) ha_ndbcluster(hton, table);
97 98
}

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
static uint ndbcluster_partition_flags()
{
  return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
          HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
}

static uint ndbcluster_alter_table_flags(uint flags)
{
  if (flags & ALTER_DROP_PARTITION)
    return 0;
  else
    return (HA_ONLINE_ADD_INDEX | HA_ONLINE_DROP_INDEX |
            HA_ONLINE_ADD_UNIQUE_INDEX | HA_ONLINE_DROP_UNIQUE_INDEX |
            HA_PARTITION_FUNCTION_SUPPORTED);

}

116
#define NDB_AUTO_INCREMENT_RETRIES 10
117 118

#define ERR_PRINT(err) \
119
  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
120

121 122
#define ERR_RETURN(err)                  \
{                                        \
123
  const NdbError& tmp= err;              \
124
  set_ndb_err(current_thd, tmp);         \
125
  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
126 127
}

128 129 130
#define ERR_BREAK(err, code)             \
{                                        \
  const NdbError& tmp= err;              \
131
  set_ndb_err(current_thd, tmp);         \
132 133 134 135
  code= ndb_to_mysql_error(&tmp);        \
  break;                                 \
}

136
static int ndbcluster_inited= 0;
137
int ndbcluster_terminating= 0;
138

139
static Ndb* g_ndb= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
140
Ndb_cluster_connection* g_ndb_cluster_connection= NULL;
monty@mysql.com's avatar
monty@mysql.com committed
141
uchar g_node_id_map[max_ndb_nodes];
142

143 144 145 146
// Handler synchronization
pthread_mutex_t ndbcluster_mutex;

// Table lock handling
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
147
HASH ndbcluster_open_tables;
148 149 150

static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
                                my_bool not_used __attribute__((unused)));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
151 152 153
#ifdef HAVE_NDB_BINLOG
static int rename_share(NDB_SHARE *share, const char *new_key);
#endif
154
static int ndb_get_table_statistics(ha_ndbcluster*, bool, Ndb*, const NDBTAB *, 
155
                                    struct Ndb_statistics *);
156

157

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
158
// Util thread variables
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
159
pthread_t ndb_util_thread;
160
int ndb_util_thread_running= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
161 162
pthread_mutex_t LOCK_ndb_util_thread;
pthread_cond_t COND_ndb_util_thread;
163
pthread_cond_t COND_ndb_util_ready;
164
pthread_handler_t ndb_util_thread_func(void *arg);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
165
ulong ndb_cache_check_time;
166

167 168 169 170
/*
  Dummy buffer to read zero pack_length fields
  which are mapped to 1 char
*/
171
static uint32 dummy_buf;
172

173 174 175 176 177 178 179 180 181 182 183
/*
  Stats that can be retrieved from ndb
*/

struct Ndb_statistics {
  Uint64 row_count;
  Uint64 commit_count;
  Uint64 row_size;
  Uint64 fragment_memory;
};

184 185 186 187 188 189
/* Status variables shown with 'show status like 'Ndb%' */

static long ndb_cluster_node_id= 0;
static const char * ndb_connected_host= 0;
static long ndb_connected_port= 0;
static long ndb_number_of_replicas= 0;
190 191
long ndb_number_of_data_nodes= 0;
long ndb_number_of_ready_data_nodes= 0;
192
long ndb_connect_count= 0;
193 194 195 196 197 198 199

static int update_status_variables(Ndb_cluster_connection *c)
{
  ndb_cluster_node_id=         c->node_id();
  ndb_connected_port=          c->get_connected_port();
  ndb_connected_host=          c->get_connected_host();
  ndb_number_of_replicas=      0;
200
  ndb_number_of_ready_data_nodes= c->get_no_ready();
justin.he@qa3-104.qa.cn.tlan's avatar
justin.he@qa3-104.qa.cn.tlan committed
201
  ndb_number_of_data_nodes=     c->no_db_nodes();
202
  ndb_connect_count= c->get_connect_count();
203 204 205
  return 0;
}

serg@serg.mylan's avatar
serg@serg.mylan committed
206
SHOW_VAR ndb_status_variables[]= {
207
  {"cluster_node_id",        (char*) &ndb_cluster_node_id,         SHOW_LONG},
208 209
  {"config_from_host",         (char*) &ndb_connected_host,      SHOW_CHAR_PTR},
  {"config_from_port",         (char*) &ndb_connected_port,          SHOW_LONG},
210
//  {"number_of_replicas",     (char*) &ndb_number_of_replicas,      SHOW_LONG},
211
  {"number_of_data_nodes",(char*) &ndb_number_of_data_nodes, SHOW_LONG},
212 213 214
  {NullS, NullS, SHOW_LONG}
};

215 216 217 218
/*
  Error handling functions
*/

219
/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
220

221
static int ndb_to_mysql_error(const NdbError *ndberr)
222
{
223 224
  /* read the mysql mapped error code */
  int error= ndberr->mysql_code;
225

226 227 228 229 230 231 232 233 234 235 236 237 238
  switch (error)
  {
    /* errors for which we do not add warnings, just return mapped error code
    */
  case HA_ERR_NO_SUCH_TABLE:
  case HA_ERR_KEY_NOT_FOUND:
  case HA_ERR_FOUND_DUPP_KEY:
    return error;

    /* Mapping missing, go with the ndb error code*/
  case -1:
    error= ndberr->code;
    break;
239

240 241 242 243
    /* Mapping exists, go with the mapped code */
  default:
    break;
  }
244

245 246 247 248 249 250
  /*
    Push the NDB error message as warning
    - Used to be able to use SHOW WARNINGS toget more info on what the error is
    - Used by replication to see if the error was temporary
  */
  if (ndberr->status == NdbError::TemporaryError)
251
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
252 253 254 255 256 257 258
			ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
			ndberr->code, ndberr->message, "NDB");
  else
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
			ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
			ndberr->code, ndberr->message, "NDB");
  return error;
259 260
}

261 262
int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans)
{
263 264 265
  if (trans->execute(NdbTransaction::NoCommit,
                     NdbOperation::AO_IgnoreError,
                     h->m_force_send) == -1)
266
    return -1;
267 268

  const NdbError &err= trans->getNdbError();
269 270
  if (err.classification != NdbError::NoError &&
      err.classification != NdbError::ConstraintViolation &&
271
      err.classification != NdbError::NoDataFound)
272
    return -1;
273

274 275
  return 0;
}
276 277

inline
278
int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans,
279
		      bool force_release)
280
{
281
  h->release_completed_operations(trans, force_release);
282 283 284
  return h->m_ignore_no_key ?
    execute_no_commit_ignore_no_key(h,trans) :
    trans->execute(NdbTransaction::NoCommit,
285
		   NdbOperation::AbortOnError,
286
		   h->m_force_send);
287 288 289
}

inline
290
int execute_commit(ha_ndbcluster *h, NdbTransaction *trans)
291
{
292
  return trans->execute(NdbTransaction::Commit,
293
                        NdbOperation::AbortOnError,
294
                        h->m_force_send);
295 296 297
}

inline
298
int execute_commit(THD *thd, NdbTransaction *trans)
299
{
300
  return trans->execute(NdbTransaction::Commit,
301
                        NdbOperation::AbortOnError,
302
                        thd->variables.ndb_force_send);
303 304 305
}

inline
306
int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans,
307
			 bool force_release)
308
{
309
  h->release_completed_operations(trans, force_release);
310
  return trans->execute(NdbTransaction::NoCommit,
311
                        NdbOperation::AO_IgnoreError,
312
                        h->m_force_send);
313 314
}

315 316 317
/*
  Place holder for ha_ndbcluster thread specific data
*/
318 319 320 321 322
static
byte *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, uint *length,
                            my_bool not_used __attribute__((unused)))
{
  *length= sizeof(thd_ndb_share->key);
323
  return (byte*) &thd_ndb_share->key;
324 325
}

326 327
Thd_ndb::Thd_ndb()
{
328
  ndb= new Ndb(g_ndb_cluster_connection, "");
329 330
  lock_count= 0;
  count= 0;
331 332
  all= NULL;
  stmt= NULL;
333
  m_error= FALSE;
334
  query_state&= NDB_QUERY_NORMAL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
335
  options= 0;
336
  (void) hash_init(&open_tables, &my_charset_bin, 5, 0, 0,
337
                   (hash_get_key)thd_ndb_share_get_key, 0, 0);
338 339 340 341
}

Thd_ndb::~Thd_ndb()
{
342
  if (ndb)
343 344
  {
#ifndef DBUG_OFF
345 346
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
347 348 349 350 351 352 353 354 355 356
    while (ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
357
    delete ndb;
358
    ndb= NULL;
359
  }
360
  changed_tables.empty();
361 362 363 364 365 366
  hash_free(&open_tables);
}

void
Thd_ndb::init_open_tables()
{
367
  count= 0;
368
  m_error= FALSE;
369 370 371 372 373 374 375
  my_hash_reset(&open_tables);
}

THD_NDB_SHARE *
Thd_ndb::get_open_table(THD *thd, const void *key)
{
  DBUG_ENTER("Thd_ndb::get_open_table");
376
  HASH_SEARCH_STATE state;
377
  THD_NDB_SHARE *thd_ndb_share=
378
    (THD_NDB_SHARE*)hash_first(&open_tables, (byte *)&key, sizeof(key), &state);
379
  while (thd_ndb_share && thd_ndb_share->key != key)
380
    thd_ndb_share= (THD_NDB_SHARE*)hash_next(&open_tables, (byte *)&key, sizeof(key), &state);
381 382 383 384 385
  if (thd_ndb_share == 0)
  {
    thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root,
                                                sizeof(THD_NDB_SHARE));
    thd_ndb_share->key= key;
386 387
    thd_ndb_share->stat.last_count= count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
388
    thd_ndb_share->stat.records= ~(ha_rows)0;
389 390
    my_hash_insert(&open_tables, (byte *)thd_ndb_share);
  }
391 392 393 394
  else if (thd_ndb_share->stat.last_count != count)
  {
    thd_ndb_share->stat.last_count= count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
395
    thd_ndb_share->stat.records= ~(ha_rows)0;
396
  }
397 398
  DBUG_PRINT("exit", ("thd_ndb_share: 0x%lx  key: 0x%lx",
                      (long) thd_ndb_share, (long) key));
399
  DBUG_RETURN(thd_ndb_share);
400 401
}

402 403 404
inline
Ndb *ha_ndbcluster::get_ndb()
{
405
  return get_thd_ndb(current_thd)->ndb;
406 407 408 409 410 411
}

/*
 * manage uncommitted insert/deletes during transactio to get records correct
 */

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
412 413 414
void ha_ndbcluster::set_rec_per_key()
{
  DBUG_ENTER("ha_ndbcluster::get_status_const");
415
  for (uint i=0 ; i < table_share->keys ; i++)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
416 417 418 419 420 421
  {
    table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1;
  }
  DBUG_VOID_RETURN;
}

422 423 424 425
ha_rows ha_ndbcluster::records()
{
  ha_rows retval;
  DBUG_ENTER("ha_ndbcluster::records");
426
  struct Ndb_local_table_statistics *local_info= m_table_info;
427 428
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
                      ((const NDBTAB *)m_table)->getTableId(),
429
                      local_info->no_uncommitted_rows_count));
430 431 432 433

  Ndb *ndb= get_ndb();
  ndb->setDatabaseName(m_dbname);
  struct Ndb_statistics stat;
434
  if (ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat) == 0)
435 436 437
  {
    retval= stat.row_count;
  }
438 439
  else
  {
440
    DBUG_RETURN(HA_POS_ERROR);
441
  }
442 443

  THD *thd= current_thd;
444
  if (get_thd_ndb(thd)->m_error)
445
    local_info->no_uncommitted_rows_count= 0;
446

447
  DBUG_RETURN(retval + local_info->no_uncommitted_rows_count);
448 449
}

450
int ha_ndbcluster::records_update()
451
{
452
  if (m_ha_not_exact_count)
453
    return 0;
454
  DBUG_ENTER("ha_ndbcluster::records_update");
455 456
  int result= 0;

457
  struct Ndb_local_table_statistics *local_info= m_table_info;
458
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
459
                      ((const NDBTAB *)m_table)->getTableId(),
460
                      local_info->no_uncommitted_rows_count));
461
  {
462
    Ndb *ndb= get_ndb();
463
    struct Ndb_statistics stat;
464 465 466 467
    if (ndb->setDatabaseName(m_dbname))
    {
      return my_errno= HA_ERR_OUT_OF_MEM;
    }
468
    result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat);
stewart@willster.(none)'s avatar
stewart@willster.(none) committed
469
    if (result == 0)
470 471 472
    {
      stats.mean_rec_length= stat.row_size;
      stats.data_file_length= stat.fragment_memory;
473
      local_info->records= stat.row_count;
474 475
    }
  }
476 477
  {
    THD *thd= current_thd;
478
    if (get_thd_ndb(thd)->m_error)
479
      local_info->no_uncommitted_rows_count= 0;
480
  }
481 482
  if (result == 0)
    stats.records= local_info->records+ local_info->no_uncommitted_rows_count;
483
  DBUG_RETURN(result);
484 485
}

486 487
void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
488 489
  if (m_ha_not_exact_count)
    return;
490
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
491
  get_thd_ndb(current_thd)->m_error= TRUE;
492 493 494
  DBUG_VOID_RETURN;
}

495 496
void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
497 498
  if (m_ha_not_exact_count)
    return;
499
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
500
  struct Ndb_local_table_statistics *local_info= m_table_info;
501
  local_info->no_uncommitted_rows_count+= c;
502
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
503
                      ((const NDBTAB *)m_table)->getTableId(),
504
                      local_info->no_uncommitted_rows_count));
505 506 507 508 509
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
{
510 511
  if (m_ha_not_exact_count)
    return;
512
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
513 514
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  thd_ndb->count++;
515
  thd_ndb->m_error= FALSE;
516 517 518
  DBUG_VOID_RETURN;
}

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
/*
  Sets the latest ndb error code on the thd_ndb object such that it
  can be retrieved later to know which ndb error caused the handler
  error.
*/
static void set_ndb_err(THD *thd, const NdbError &err)
{
  DBUG_ENTER("set_ndb_err");
  ERR_PRINT(err);

  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  if (thd_ndb == NULL)
    DBUG_VOID_RETURN;
#ifdef NOT_YET
  /*
    Check if error code is overwritten, in this case the original
    failure cause will be lost.  E.g. if 4350 error is given. So
    push a warning so that it can be detected which is the root
    error cause.
  */
  if (thd_ndb->m_query_id == thd->query_id &&
      thd_ndb->m_error_code != 0 &&
      thd_ndb->m_error_code != err.code)
  {
    char buf[FN_REFLEN];
    ndb_error_string(thd_ndb->m_error_code, buf, sizeof(buf));
    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
			ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
			thd_ndb->m_error_code, buf, "NDB");
  }
#endif
  thd_ndb->m_query_id= thd->query_id;
  thd_ndb->m_error_code= err.code;
  DBUG_VOID_RETURN;
}

555
int ha_ndbcluster::ndb_err(NdbTransaction *trans)
556
{
557
  THD *thd= current_thd;
558
  int res;
559
  NdbError err= trans->getNdbError();
560 561
  DBUG_ENTER("ndb_err");
  
562 563
  set_ndb_err(thd, err);

564 565
  switch (err.classification) {
  case NdbError::SchemaError:
566
  {
567 568
    // TODO perhaps we need to do more here, invalidate also in the cache
    m_table->setStatusInvalid();
569 570 571 572 573
    /* Close other open handlers not used by any thread */
    TABLE_LIST table_list;
    bzero((char*) &table_list,sizeof(table_list));
    table_list.db= m_dbname;
    table_list.alias= table_list.table_name= m_tabname;
574
    close_cached_tables(thd, 0, &table_list);
575
    break;
576
  }
577 578 579
  default:
    break;
  }
580 581
  res= ndb_to_mysql_error(&err);
  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", 
582
                      err.code, res));
583
  if (res == HA_ERR_FOUND_DUPP_KEY)
584 585
  {
    if (m_rows_to_insert == 1)
586 587 588 589 590 591
    {
      /*
	We can only distinguish between primary and non-primary
	violations here, so we need to return MAX_KEY for non-primary
	to signal that key is unknown
      */
592
      m_dupkey= err.code == 630 ? table_share->primary_key : MAX_KEY; 
593
    }
594
    else
monty@mishka.local's avatar
monty@mishka.local committed
595 596
    {
      /* We are batching inserts, offending key is not available */
597
      m_dupkey= (uint) -1;
monty@mishka.local's avatar
monty@mishka.local committed
598
    }
599
  }
600
  DBUG_RETURN(res);
601 602 603
}


604
/*
605
  Override the default get_error_message in order to add the 
606 607 608
  error message of NDB 
 */

609
bool ha_ndbcluster::get_error_message(int error, 
610
                                      String *buf)
611
{
612
  DBUG_ENTER("ha_ndbcluster::get_error_message");
613
  DBUG_PRINT("enter", ("error: %d", error));
614

615
  Ndb *ndb= get_ndb();
616
  if (!ndb)
617
    DBUG_RETURN(FALSE);
618

619
  const NdbError err= ndb->getNdbError(error);
620 621 622 623
  bool temporary= err.status==NdbError::TemporaryError;
  buf->set(err.message, strlen(err.message), &my_charset_bin);
  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
  DBUG_RETURN(temporary);
624 625 626
}


tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
627
#ifndef DBUG_OFF
pekka@mysql.com's avatar
pekka@mysql.com committed
628 629 630 631
/*
  Check if type is supported by NDB.
*/

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
632
static bool ndb_supported_type(enum_field_types type)
pekka@mysql.com's avatar
pekka@mysql.com committed
633 634
{
  switch (type) {
pekka@mysql.com's avatar
pekka@mysql.com committed
635 636 637 638 639 640 641
  case MYSQL_TYPE_TINY:        
  case MYSQL_TYPE_SHORT:
  case MYSQL_TYPE_LONG:
  case MYSQL_TYPE_INT24:       
  case MYSQL_TYPE_LONGLONG:
  case MYSQL_TYPE_FLOAT:
  case MYSQL_TYPE_DOUBLE:
642 643
  case MYSQL_TYPE_DECIMAL:    
  case MYSQL_TYPE_NEWDECIMAL:
pekka@mysql.com's avatar
pekka@mysql.com committed
644 645 646 647 648 649 650 651
  case MYSQL_TYPE_TIMESTAMP:
  case MYSQL_TYPE_DATETIME:    
  case MYSQL_TYPE_DATE:
  case MYSQL_TYPE_NEWDATE:
  case MYSQL_TYPE_TIME:        
  case MYSQL_TYPE_YEAR:        
  case MYSQL_TYPE_STRING:      
  case MYSQL_TYPE_VAR_STRING:
pekka@mysql.com's avatar
pekka@mysql.com committed
652
  case MYSQL_TYPE_VARCHAR:
pekka@mysql.com's avatar
pekka@mysql.com committed
653 654 655 656 657 658
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:    
  case MYSQL_TYPE_MEDIUM_BLOB:   
  case MYSQL_TYPE_LONG_BLOB:  
  case MYSQL_TYPE_ENUM:
  case MYSQL_TYPE_SET:         
659
  case MYSQL_TYPE_BIT:
660
  case MYSQL_TYPE_GEOMETRY:
661
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
662
  case MYSQL_TYPE_NULL:   
pekka@mysql.com's avatar
pekka@mysql.com committed
663
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
664
  }
665
  return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
666
}
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
667
#endif /* !DBUG_OFF */
pekka@mysql.com's avatar
pekka@mysql.com committed
668 669


670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
/*
  Check if MySQL field type forces var part in ndb storage
*/
static bool field_type_forces_var_part(enum_field_types type)
{
  switch (type) {
  case MYSQL_TYPE_VAR_STRING:
  case MYSQL_TYPE_VARCHAR:
    return TRUE;
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:
  case MYSQL_TYPE_MEDIUM_BLOB:
  case MYSQL_TYPE_LONG_BLOB:
  case MYSQL_TYPE_GEOMETRY:
    return FALSE;
  default:
    return FALSE;
  }
}

690 691 692 693 694
/*
  Instruct NDB to set the value of the hidden primary key
*/

bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op,
695
                                   uint fieldnr, const byte *field_ptr)
696 697
{
  DBUG_ENTER("set_hidden_key");
698
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0);
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
}


/*
  Instruct NDB to set the value of one primary key attribute
*/

int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field,
                               uint fieldnr, const byte *field_ptr)
{
  uint32 pack_len= field->pack_length();
  DBUG_ENTER("set_ndb_key");
  DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", 
                       fieldnr, field->field_name, field->type(),
                       pack_len));
  DBUG_DUMP("key", (char*)field_ptr, pack_len);
  
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
716 717 718 719
  DBUG_ASSERT(ndb_supported_type(field->type()));
  DBUG_ASSERT(! (field->flags & BLOB_FLAG));
  // Common implementation for most field types
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*) field_ptr, pack_len) != 0);
720 721 722 723 724 725 726 727
}


/*
 Instruct NDB to set the value of one attribute
*/

int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, 
728 729
                                 uint fieldnr, int row_offset,
                                 bool *set_blob_value)
730
{
731 732
  const byte* field_ptr= field->ptr + row_offset;
  uint32 pack_len= field->pack_length();
733
  DBUG_ENTER("set_ndb_value");
734
  DBUG_PRINT("enter", ("%d: %s  type: %u  len=%d  is_null=%s", 
735
                       fieldnr, field->field_name, field->type(), 
736
                       pack_len, field->is_null(row_offset) ? "Y" : "N"));
737
  DBUG_DUMP("value", (char*) field_ptr, pack_len);
pekka@mysql.com's avatar
pekka@mysql.com committed
738

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
739
  DBUG_ASSERT(ndb_supported_type(field->type()));
740
  {
741
    // ndb currently does not support size 0
742
    uint32 empty_field;
743 744
    if (pack_len == 0)
    {
745 746
      pack_len= sizeof(empty_field);
      field_ptr= (byte *)&empty_field;
747
      if (field->is_null(row_offset))
748
        empty_field= 0;
749
      else
750
        empty_field= 1;
751
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
752 753
    if (! (field->flags & BLOB_FLAG))
    {
754 755
      if (field->type() != MYSQL_TYPE_BIT)
      {
756 757 758
        if (field->is_null(row_offset))
        {
          DBUG_PRINT("info", ("field is NULL"));
759
          // Set value to NULL
760
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0));
761
	}
762
        // Common implementation for most field types
763
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0);
764 765 766
      }
      else // if (field->type() == MYSQL_TYPE_BIT)
      {
767
        longlong bits= field->val_int();
768
 
769 770
        // Round up bit field length to nearest word boundry
        pack_len= ((pack_len + 3) >> 2) << 2;
771
        DBUG_ASSERT(pack_len <= 8);
772
        if (field->is_null(row_offset))
773
          // Set value to NULL
774
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0));
775
        DBUG_PRINT("info", ("bit field"));
776
        DBUG_DUMP("value", (char*)&bits, pack_len);
777
#ifdef WORDS_BIGENDIAN
778
        /* store lsw first */
joerg@trift2's avatar
joerg@trift2 committed
779 780
        bits = ((bits >> 32) & 0x00000000FFFFFFFFLL)
          |    ((bits << 32) & 0xFFFFFFFF00000000LL);
781
#endif
782
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0);
783
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
784 785
    }
    // Blob type
786
    NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
pekka@mysql.com's avatar
pekka@mysql.com committed
787 788
    if (ndb_blob != NULL)
    {
789
      if (field->is_null(row_offset))
pekka@mysql.com's avatar
pekka@mysql.com committed
790 791 792 793 794 795 796 797 798
        DBUG_RETURN(ndb_blob->setNull() != 0);

      Field_blob *field_blob= (Field_blob*)field;

      // Get length and pointer to data
      uint32 blob_len= field_blob->get_length(field_ptr);
      char* blob_ptr= NULL;
      field_blob->get_ptr(&blob_ptr);

799 800 801
      // Looks like NULL ptr signals length 0 blob
      if (blob_ptr == NULL) {
        DBUG_ASSERT(blob_len == 0);
802
        blob_ptr= (char*)"";
803
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
804

805 806
      DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",
                           (long) blob_ptr, blob_len));
pekka@mysql.com's avatar
pekka@mysql.com committed
807 808
      DBUG_DUMP("value", (char*)blob_ptr, min(blob_len, 26));

809
      if (set_blob_value)
810
        *set_blob_value= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
811 812 813 814
      // No callback needed to write value
      DBUG_RETURN(ndb_blob->setValue(blob_ptr, blob_len) != 0);
    }
    DBUG_RETURN(1);
815
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
}


/*
  Callback to read all blob values.
  - not done in unpack_record because unpack_record is valid
    after execute(Commit) but reading blobs is not
  - may only generate read operations; they have to be executed
    somewhere before the data is available
  - due to single buffer for all blobs, we let the last blob
    process all blobs (last so that all are active)
  - null bit is still set in unpack_record
  - TODO allocate blob part aligned buffers
*/

831
NdbBlob::ActiveHook g_get_ndb_blobs_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
832

833
int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
pekka@mysql.com's avatar
pekka@mysql.com committed
834
{
835
  DBUG_ENTER("g_get_ndb_blobs_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
836 837 838
  if (ndb_blob->blobsNextBlob() != NULL)
    DBUG_RETURN(0);
  ha_ndbcluster *ha= (ha_ndbcluster *)arg;
839 840
  int ret= get_ndb_blobs_value(ha->table, ha->m_value,
                               ha->m_blobs_buffer, ha->m_blobs_buffer_size,
841
                               ha->m_blobs_offset);
842
  DBUG_RETURN(ret);
pekka@mysql.com's avatar
pekka@mysql.com committed
843 844
}

845 846 847 848 849 850 851 852
/*
  This routine is shared by injector.  There is no common blobs buffer
  so the buffer and length are passed by reference.  Injector also
  passes a record pointer diff.
 */
int get_ndb_blobs_value(TABLE* table, NdbValue* value_array,
                        byte*& buffer, uint& buffer_size,
                        my_ptrdiff_t ptrdiff)
pekka@mysql.com's avatar
pekka@mysql.com committed
853 854 855 856 857 858 859 860
{
  DBUG_ENTER("get_ndb_blobs_value");

  // Field has no field number so cannot use TABLE blob_field
  // Loop twice, first only counting total buffer size
  for (int loop= 0; loop <= 1; loop++)
  {
    uint32 offset= 0;
861
    for (uint i= 0; i < table->s->fields; i++)
pekka@mysql.com's avatar
pekka@mysql.com committed
862 863
    {
      Field *field= table->field[i];
864
      NdbValue value= value_array[i];
865 866 867
      if (! (field->flags & BLOB_FLAG))
        continue;
      if (value.blob == NULL)
pekka@mysql.com's avatar
pekka@mysql.com committed
868
      {
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
        DBUG_PRINT("info",("[%u] skipped", i));
        continue;
      }
      Field_blob *field_blob= (Field_blob *)field;
      NdbBlob *ndb_blob= value.blob;
      int isNull;
      if (ndb_blob->getNull(isNull) != 0)
        ERR_RETURN(ndb_blob->getNdbError());
      if (isNull == 0) {
        Uint64 len64= 0;
        if (ndb_blob->getLength(len64) != 0)
          ERR_RETURN(ndb_blob->getNdbError());
        // Align to Uint64
        uint32 size= len64;
        if (size % 8 != 0)
          size+= 8 - size % 8;
        if (loop == 1)
886
        {
887 888 889 890
          char *buf= buffer + offset;
          uint32 len= 0xffffffff;  // Max uint32
          if (ndb_blob->readData(buf, len) != 0)
            ERR_RETURN(ndb_blob->getNdbError());
891 892
          DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u  [ptrdiff=%d]",
                              i, offset, (long) buf, len, (int)ptrdiff));
893 894
          DBUG_ASSERT(len == len64);
          // Ugly hack assumes only ptr needs to be changed
895
          field_blob->set_ptr_offset(ptrdiff, len, buf);
896
        }
897 898 899 900 901 902 903
        offset+= size;
      }
      else if (loop == 1) // undefined or null
      {
        // have to set length even in this case
        char *buf= buffer + offset; // or maybe NULL
        uint32 len= 0;
904
        field_blob->set_ptr_offset(ptrdiff, len, buf);
905
        DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
pekka@mysql.com's avatar
pekka@mysql.com committed
906 907
      }
    }
908
    if (loop == 0 && offset > buffer_size)
pekka@mysql.com's avatar
pekka@mysql.com committed
909
    {
910 911 912 913 914
      my_free(buffer, MYF(MY_ALLOW_ZERO_PTR));
      buffer_size= 0;
      DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
      buffer= my_malloc(offset, MYF(MY_WME));
      if (buffer == NULL)
915 916 917
      {
        sql_print_error("ha_ndbcluster::get_ndb_blobs_value: "
                        "my_malloc(%u) failed", offset);
pekka@mysql.com's avatar
pekka@mysql.com committed
918
        DBUG_RETURN(-1);
919
      }
920
      buffer_size= offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
921
    }
922
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
923
  DBUG_RETURN(0);
924 925 926 927 928
}


/*
  Instruct NDB to fetch one field
pekka@mysql.com's avatar
pekka@mysql.com committed
929 930
  - data is read directly into buffer provided by field
    if field is NULL, data is read into memory provided by NDBAPI
931 932
*/

pekka@mysql.com's avatar
pekka@mysql.com committed
933
int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field,
934
                                 uint fieldnr, byte* buf)
935 936
{
  DBUG_ENTER("get_ndb_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
937 938 939 940 941
  DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr,
                       (int)(field != NULL ? field->flags : 0)));

  if (field != NULL)
  {
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
942 943
      DBUG_ASSERT(buf);
      DBUG_ASSERT(ndb_supported_type(field->type()));
pekka@mysql.com's avatar
pekka@mysql.com committed
944 945
      DBUG_ASSERT(field->ptr != NULL);
      if (! (field->flags & BLOB_FLAG))
946
      { 
947 948
        if (field->type() != MYSQL_TYPE_BIT)
        {
949 950 951 952 953 954 955 956
          byte *field_buf;
          if (field->pack_length() != 0)
            field_buf= buf + (field->ptr - table->record[0]);
          else
            field_buf= (byte *)&dummy_buf;
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr, 
                                                 field_buf);
        }
957 958 959 960
        else // if (field->type() == MYSQL_TYPE_BIT)
        {
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr);
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
961 962 963 964 965 966 967 968 969
        DBUG_RETURN(m_value[fieldnr].rec == NULL);
      }

      // Blob type
      NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
      m_value[fieldnr].blob= ndb_blob;
      if (ndb_blob != NULL)
      {
        // Set callback
970
	m_blobs_offset= buf - (byte*) table->record[0];
pekka@mysql.com's avatar
pekka@mysql.com committed
971
        void *arg= (void *)this;
972
        DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0);
pekka@mysql.com's avatar
pekka@mysql.com committed
973 974 975 976 977
      }
      DBUG_RETURN(1);
  }

  // Used for hidden key only
978
  m_value[fieldnr].rec= ndb_op->getValue(fieldnr, m_ref);
pekka@mysql.com's avatar
pekka@mysql.com committed
979 980 981
  DBUG_RETURN(m_value[fieldnr].rec == NULL);
}

982 983 984 985 986 987
/*
  Instruct NDB to fetch the partition id (fragment id)
*/
int ha_ndbcluster::get_ndb_partition_id(NdbOperation *ndb_op)
{
  DBUG_ENTER("get_ndb_partition_id");
988 989
  DBUG_RETURN(ndb_op->getValue(NdbDictionary::Column::FRAGMENT, 
                               (char *)&m_part_id) == NULL);
990
}
pekka@mysql.com's avatar
pekka@mysql.com committed
991 992 993 994

/*
  Check if any set or get of blob value in current query.
*/
995

996
bool ha_ndbcluster::uses_blob_value()
pekka@mysql.com's avatar
pekka@mysql.com committed
997
{
998 999
  MY_BITMAP *bitmap;
  uint *blob_index, *blob_index_end;
1000
  if (table_share->blob_fields == 0)
1001
    return FALSE;
1002 1003 1004 1005 1006

  bitmap= m_write_op ? table->write_set : table->read_set;
  blob_index=     table_share->blob_field;
  blob_index_end= blob_index + table_share->blob_fields;
  do
pekka@mysql.com's avatar
pekka@mysql.com committed
1007
  {
1008
    if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
1009 1010
      return TRUE;
  } while (++blob_index != blob_index_end);
1011
  return FALSE;
1012 1013 1014 1015 1016 1017 1018 1019 1020
}


/*
  Get metadata for this table from NDB 

  IMPLEMENTATION
    - check that frm-file on disk is equal to frm-file
      of table accessed in NDB
1021 1022 1023 1024

  RETURN
    0    ok
    -2   Meta data has changed; Re-read data and try again
1025 1026
*/

1027 1028
int cmp_frm(const NDBTAB *ndbtab, const void *pack_data,
            uint pack_length)
1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
{
  DBUG_ENTER("cmp_frm");
  /*
    Compare FrmData in NDB with frm file from disk.
  */
  if ((pack_length != ndbtab->getFrmLength()) || 
      (memcmp(pack_data, ndbtab->getFrmData(), pack_length)))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}

1040 1041
int ha_ndbcluster::get_metadata(const char *path)
{
1042 1043
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
1044 1045 1046 1047 1048
  const NDBTAB *tab;
  int error;
  DBUG_ENTER("get_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

1049 1050
  DBUG_ASSERT(m_table == NULL);
  DBUG_ASSERT(m_table_info == NULL);
1051

1052
  const void *data= NULL, *pack_data= NULL;
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
  uint length, pack_length;

  /*
    Compare FrmData in NDB with frm file from disk.
  */
  error= 0;
  if (readfrm(path, &data, &length) ||
      packfrm(data, length, &pack_data, &pack_length))
  {
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
  }
1066
    
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  if (!(tab= ndbtab_g.get_table()))
    ERR_RETURN(dict->getNdbError());

  if (get_ndb_share_state(m_share) != NSS_ALTERED 
      && cmp_frm(tab, pack_data, pack_length))
  {
    DBUG_PRINT("error", 
               ("metadata, pack_length: %d  getFrmLength: %d  memcmp: %d",
                pack_length, tab->getFrmLength(),
                memcmp(pack_data, tab->getFrmData(), pack_length)));
    DBUG_DUMP("pack_data", (char*)pack_data, pack_length);
    DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength());
    error= HA_ERR_TABLE_DEF_CHANGED;
  }
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
1084

1085
  if (error)
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
    goto err;

  DBUG_PRINT("info", ("fetched table %s", tab->getName()));
  m_table= tab;
  if ((error= open_indexes(ndb, table, FALSE)) == 0)
  {
    ndbtab_g.release();
    DBUG_RETURN(0);
  }
err:
  ndbtab_g.invalidate();
  m_table= NULL;
  DBUG_RETURN(error);
1099
}
1100

1101
static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
1102 1103
                                       const NDBINDEX *index,
                                       KEY *key_info)
1104 1105 1106 1107 1108 1109
{
  DBUG_ENTER("fix_unique_index_attr_order");
  unsigned sz= index->getNoOfIndexColumns();

  if (data.unique_index_attrid_map)
    my_free((char*)data.unique_index_attrid_map, MYF(0));
monty@mysql.com's avatar
monty@mysql.com committed
1110
  data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME));
1111 1112 1113 1114 1115 1116
  if (data.unique_index_attrid_map == 0)
  {
    sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
                    (unsigned int)sz);
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128

  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ASSERT(key_info->key_parts == sz);
  for (unsigned i= 0; key_part != end; key_part++, i++) 
  {
    const char *field_name= key_part->field->field_name;
#ifndef DBUG_OFF
   data.unique_index_attrid_map[i]= 255;
#endif
    for (unsigned j= 0; j < sz; j++)
    {
1129
      const NDBCOL *c= index->getColumn(j);
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
1130
      if (strcmp(field_name, c->getName()) == 0)
1131
      {
1132 1133
        data.unique_index_attrid_map[i]= j;
        break;
1134 1135 1136 1137 1138 1139
      }
    }
    DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
  }
  DBUG_RETURN(0);
}
1140

1141 1142 1143 1144 1145 1146
/*
  Create all the indexes for a table.
  If any index should fail to be created,
  the error is returned immediately
*/
int ha_ndbcluster::create_indexes(Ndb *ndb, TABLE *tab)
1147
{
1148
  uint i;
1149
  int error= 0;
1150
  const char *index_name;
1151
  KEY* key_info= tab->key_info;
1152
  const char **key_name= tab->s->keynames.type_names;
1153
  DBUG_ENTER("ha_ndbcluster::create_indexes");
1154

1155
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
1156
  {
1157
    index_name= *key_name;
1158
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
1159 1160
    error= create_index(index_name, key_info, idx_type, i);
    if (error)
1161
    {
1162 1163
      DBUG_PRINT("error", ("Failed to create index %u", i));
      break;
1164
    }
1165 1166 1167 1168 1169
  }

  DBUG_RETURN(error);
}

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1170
static void ndb_init_index(NDB_INDEX_DATA &data)
1171
{
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1172 1173 1174 1175 1176 1177 1178 1179 1180
  data.type= UNDEFINED_INDEX;
  data.status= UNDEFINED;
  data.unique_index= NULL;
  data.index= NULL;
  data.unique_index_attrid_map= NULL;
  data.index_stat=NULL;
  data.index_stat_cache_entries=0;
  data.index_stat_update_freq=0;
  data.index_stat_query_count=0;
1181 1182
}

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1183
static void ndb_clear_index(NDB_INDEX_DATA &data)
1184
{
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1185 1186 1187 1188 1189 1190 1191 1192 1193
  if (data.unique_index_attrid_map)
  {
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  }
  if (data.index_stat)
  {
    delete data.index_stat;
  }
  ndb_init_index(data);
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
}

/*
  Associate a direct reference to an index handle
  with an index (for faster access)
 */
int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
                                    const char *index_name, uint index_no)
{
  int error= 0;
  NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
  m_index[index_no].type= idx_type;
1206 1207
  DBUG_ENTER("ha_ndbcluster::add_index_handle");
  DBUG_PRINT("enter", ("table %s", m_tabname));
1208 1209 1210 1211

  if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
  {
    DBUG_PRINT("info", ("Get handle to index %s", index_name));
1212 1213 1214
    const NDBINDEX *index;
    do
    {
1215
      index= dict->getIndexGlobal(index_name, *m_table);
1216 1217
      if (!index)
        ERR_RETURN(dict->getNdbError());
1218 1219
      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
                          (long) index,
1220 1221 1222 1223
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
1224 1225
      DBUG_ASSERT(index->getObjectStatus() ==
                  NdbDictionary::Object::Retrieved);
1226 1227
      break;
    } while (1);
1228
    m_index[index_no].index= index;
1229 1230 1231 1232 1233
    // ordered index - add stats
    NDB_INDEX_DATA& d=m_index[index_no];
    delete d.index_stat;
    d.index_stat=NULL;
    if (thd->variables.ndb_index_stat_enable)
1234
    {
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
      d.index_stat=new NdbIndexStat(index);
      d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries;
      d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq;
      d.index_stat_query_count=0;
      d.index_stat->alloc_cache(d.index_stat_cache_entries);
      DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u",
                          index->getName(),
                          d.index_stat_cache_entries,
                          d.index_stat_update_freq));
    } else
    {
      DBUG_PRINT("info", ("index %s stat=off", index->getName()));
    }
  }
  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    char unique_index_name[FN_LEN];
    static const char* unique_suffix= "$unique";
1253
    m_has_unique_index= TRUE;
1254 1255
    strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
1256 1257 1258
    const NDBINDEX *index;
    do
    {
1259
      index= dict->getIndexGlobal(unique_index_name, *m_table);
1260 1261
      if (!index)
        ERR_RETURN(dict->getNdbError());
1262 1263
      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
                          (long) index,
1264 1265 1266 1267
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
1268 1269
      DBUG_ASSERT(index->getObjectStatus() ==
                  NdbDictionary::Object::Retrieved);
1270 1271
      break;
    } while (1);
1272
    m_index[index_no].unique_index= index;
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
    error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
  }
  if (!error)
    m_index[index_no].status= ACTIVE;
  
  DBUG_RETURN(error);
}

/*
  Associate index handles for each index of a table
*/
1284
int ha_ndbcluster::open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error)
1285 1286 1287 1288 1289 1290 1291 1292
{
  uint i;
  int error= 0;
  THD *thd=current_thd;
  NDBDICT *dict= ndb->getDictionary();
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::open_indexes");
1293
  m_has_unique_index= FALSE;
1294 1295 1296
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
1297 1298 1299 1300
      if (ignore_error)
        m_index[i].index= m_index[i].unique_index= NULL;
      else
        break;
1301
    m_index[i].null_in_unique_index= FALSE;
1302
    if (check_index_fields_not_null(key_info))
1303
      m_index[i].null_in_unique_index= TRUE;
1304
  }
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325

  if (error && !ignore_error)
  {
    while (i > 0)
    {
      i--;
      if (m_index[i].index)
      {
         dict->removeIndexGlobal(*m_index[i].index, 1);
         m_index[i].index= NULL;
      }
      if (m_index[i].unique_index)
      {
         dict->removeIndexGlobal(*m_index[i].unique_index, 1);
         m_index[i].unique_index= NULL;
      }
    }
  }

  DBUG_ASSERT(error == 0 || error == 4243);

1326 1327 1328 1329 1330 1331 1332
  DBUG_RETURN(error);
}

/*
  Renumber indexes in index list by shifting out
  indexes that are to be dropped
 */
1333
void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
{
  uint i;
  const char *index_name;
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::renumber_indexes");
  
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    index_name= *key_name;
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED) 
    {
      DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", 
                          index_name, i));
      NDB_INDEX_DATA tmp;
      uint j= i + 1;
      // Shift index out of list
      while(j != MAX_KEY && m_index[j].status != UNDEFINED)
1354
      {
1355 1356 1357 1358
        tmp=  m_index[j - 1];
        m_index[j - 1]= m_index[j];
        m_index[j]= tmp;
        j++;
1359 1360
      }
    }
1361 1362
  }

1363
  DBUG_VOID_RETURN;
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377
}

/*
  Drop all indexes that are marked for deletion
*/
int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
{
  uint i;
  int error= 0;
  const char *index_name;
  KEY* key_info= tab->key_info;
  NDBDICT *dict= ndb->getDictionary();
  DBUG_ENTER("ha_ndbcluster::drop_indexes");
  
1378
  for (i= 0; i < tab->s->keys; i++, key_info++)
1379 1380 1381 1382
  {
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED)
1383
    {
1384 1385
      const NdbDictionary::Index *index= m_index[i].index;
      const NdbDictionary::Index *unique_index= m_index[i].unique_index;
1386 1387
      
      if (index)
1388
      {
1389 1390 1391
        index_name= index->getName();
        DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));  
        // Drop ordered index from ndb
1392 1393 1394 1395 1396 1397
        error= dict->dropIndexGlobal(*index);
        if (!error)
        {
          dict->removeIndexGlobal(*index, 1);
          m_index[i].index= NULL;
        }
1398 1399
      }
      if (!error && unique_index)
1400
      {
1401 1402
        index_name= unique_index->getName();
        DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
1403
        // Drop unique index from ndb
1404 1405 1406 1407 1408 1409
        error= dict->dropIndexGlobal(*unique_index);
        if (!error)
        {
          dict->removeIndexGlobal(*unique_index, 1);
          m_index[i].unique_index= NULL;
        }
1410
      }
1411 1412
      if (error)
        DBUG_RETURN(error);
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1413
      ndb_clear_index(m_index[i]);
1414
      continue;
1415
    }
1416
  }
1417 1418
  
  DBUG_RETURN(error);
1419 1420
}

1421 1422 1423 1424
/*
  Decode the type of an index from information 
  provided in table object
*/
1425
NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
1426
{
1427 1428
  return get_index_type_from_key(inx, table_share->key_info,
                                 inx == table_share->primary_key);
1429 1430 1431
}

NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
1432 1433
                                                      KEY *key_info,
                                                      bool primary) const
1434 1435
{
  bool is_hash_index=  (key_info[inx].algorithm == 
1436
                        HA_KEY_ALG_HASH);
1437
  if (primary)
1438
    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
1439 1440
  
  return ((key_info[inx].flags & HA_NOSAME) ? 
1441 1442
          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
          ORDERED_INDEX);
1443
} 
1444

1445
bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info)
1446 1447 1448
{
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1449
  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
1450 1451 1452 1453 1454
  
  for (; key_part != end; key_part++) 
    {
      Field* field= key_part->field;
      if (field->maybe_null())
1455
	DBUG_RETURN(TRUE);
1456 1457
    }
  
1458
  DBUG_RETURN(FALSE);
1459
}
1460

1461
void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
1462
{
1463
  uint i;
1464

1465 1466 1467
  DBUG_ENTER("release_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
  NDBDICT *dict= ndb->getDictionary();
  int invalidate_indexes= 0;
  if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
  {
    invalidate_indexes = 1;
  }
  if (m_table != NULL)
  {
    if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
      invalidate_indexes= 1;
    dict->removeTableGlobal(*m_table, invalidate_indexes);
  }
  // TODO investigate
  DBUG_ASSERT(m_table_info == NULL);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1482
  m_table_info= NULL;
1483

1484
  // Release index list 
1485 1486
  for (i= 0; i < MAX_KEY; i++)
  {
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496
    if (m_index[i].unique_index)
    {
      DBUG_ASSERT(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
    }
    if (m_index[i].index)
    {
      DBUG_ASSERT(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
    }
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
1497
    ndb_clear_index(m_index[i]);
1498 1499
  }

1500
  m_table= NULL;
1501 1502 1503
  DBUG_VOID_RETURN;
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1504
int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type)
1505
{
1506
  if (type >= TL_WRITE_ALLOW_WRITE)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1507
    return NdbOperation::LM_Exclusive;
mskold@mysql.com's avatar
mskold@mysql.com committed
1508 1509
  if (type ==  TL_READ_WITH_SHARED_LOCKS ||
      uses_blob_value())
1510
    return NdbOperation::LM_Read;
1511
  return NdbOperation::LM_CommittedRead;
1512 1513
}

1514 1515 1516 1517 1518 1519
static const ulong index_type_flags[]=
{
  /* UNDEFINED_INDEX */
  0,                         

  /* PRIMARY_KEY_INDEX */
1520
  HA_ONLY_WHOLE_INDEX, 
1521 1522

  /* PRIMARY_KEY_ORDERED_INDEX */
1523
  /* 
mskold@mysql.com's avatar
mskold@mysql.com committed
1524
     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, 
1525 1526 1527
     thus ORDERD BY clauses can be optimized by reading directly 
     through the index.
  */
mskold@mysql.com's avatar
mskold@mysql.com committed
1528
  // HA_KEYREAD_ONLY | 
1529
  HA_READ_NEXT |
1530
  HA_READ_PREV |
1531 1532
  HA_READ_RANGE |
  HA_READ_ORDER,
1533 1534

  /* UNIQUE_INDEX */
1535
  HA_ONLY_WHOLE_INDEX,
1536

1537
  /* UNIQUE_ORDERED_INDEX */
1538
  HA_READ_NEXT |
1539
  HA_READ_PREV |
1540 1541
  HA_READ_RANGE |
  HA_READ_ORDER,
1542

1543
  /* ORDERED_INDEX */
1544
  HA_READ_NEXT |
1545
  HA_READ_PREV |
1546 1547
  HA_READ_RANGE |
  HA_READ_ORDER
1548 1549 1550 1551 1552 1553 1554
};

static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);

inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
{
  DBUG_ASSERT(idx_no < MAX_KEY);
1555
  return m_index[idx_no].type;
1556 1557
}

1558 1559 1560 1561 1562 1563
inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
{
  DBUG_ASSERT(idx_no < MAX_KEY);
  return m_index[idx_no].null_in_unique_index;
}

1564 1565 1566 1567 1568 1569 1570 1571

/*
  Get the flags for an index

  RETURN
    flags depending on the type of the index.
*/

1572 1573
inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
                                        bool all_parts) const 
1574
{ 
1575
  DBUG_ENTER("ha_ndbcluster::index_flags");
1576
  DBUG_PRINT("enter", ("idx_no: %u", idx_no));
1577
  DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
1578 1579
  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | 
              HA_KEY_SCAN_NOT_ROR);
1580 1581
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1582 1583
static void shrink_varchar(Field* field, const byte* & ptr, char* buf)
{
1584
  if (field->type() == MYSQL_TYPE_VARCHAR && ptr != NULL) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1585
    Field_varstring* f= (Field_varstring*)field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1586
    if (f->length_bytes == 1) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1587 1588 1589 1590 1591
      uint pack_len= field->pack_length();
      DBUG_ASSERT(1 <= pack_len && pack_len <= 256);
      if (ptr[1] == 0) {
        buf[0]= ptr[0];
      } else {
1592
        DBUG_ASSERT(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
1593 1594 1595 1596 1597 1598 1599
        buf[0]= 255;
      }
      memmove(buf + 1, ptr + 2, pack_len - 1);
      ptr= buf;
    }
  }
}
1600 1601 1602

int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key)
{
1603
  KEY* key_info= table->key_info + table_share->primary_key;
1604 1605 1606 1607 1608 1609 1610
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ENTER("set_primary_key");

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1611 1612 1613
    const byte* ptr= key;
    char buf[256];
    shrink_varchar(field, ptr, buf);
1614
    if (set_ndb_key(op, field, 
1615
                    key_part->fieldnr-1, ptr))
1616
      ERR_RETURN(op->getNdbError());
pekka@mysql.com's avatar
pekka@mysql.com committed
1617
    key += key_part->store_length;
1618 1619 1620 1621 1622
  }
  DBUG_RETURN(0);
}


1623
int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const byte *record)
1624
{
1625
  KEY* key_info= table->key_info + table_share->primary_key;
1626 1627
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1628
  DBUG_ENTER("set_primary_key_from_record");
1629 1630 1631 1632 1633

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, 
1634
		    key_part->fieldnr-1, record+key_part->offset))
1635 1636 1637 1638 1639
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
}

1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658
int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, 
                                             const byte *record, uint keyno)
{
  KEY* key_info= table->key_info + keyno;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  uint i;
  DBUG_ENTER("set_index_key_from_record");
                                                                                
  for (i= 0; key_part != end; key_part++, i++)
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, m_index[keyno].unique_index_attrid_map[i],
                    record+key_part->offset))
      ERR_RETURN(m_active_trans->getNdbError());
  }
  DBUG_RETURN(0);
}

1659 1660
int 
ha_ndbcluster::set_index_key(NdbOperation *op, 
1661 1662
                             const KEY *key_info, 
                             const byte * key_ptr)
1663
{
1664
  DBUG_ENTER("ha_ndbcluster::set_index_key");
1665 1666 1667 1668 1669 1670
  uint i;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  
  for (i= 0; key_part != end; key_part++, i++) 
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
1671 1672 1673 1674
    Field* field= key_part->field;
    const byte* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr;
    char buf[256];
    shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1675
    if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr))
1676 1677 1678 1679 1680
      ERR_RETURN(m_active_trans->getNdbError());
    key_ptr+= key_part->store_length;
  }
  DBUG_RETURN(0);
}
1681

1682 1683 1684 1685 1686 1687 1688
inline 
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
  uint i;
  DBUG_ENTER("define_read_attrs");  

  // Define attributes to read
1689
  for (i= 0; i < table_share->fields; i++) 
1690 1691
  {
    Field *field= table->field[i];
1692
    if (bitmap_is_set(table->read_set, i) ||
1693
        ((field->flags & PRI_KEY_FLAG)))
1694 1695
    {      
      if (get_ndb_value(op, field, i, buf))
1696
        ERR_RETURN(op->getNdbError());
1697
    } 
1698
    else
1699 1700 1701 1702 1703
    {
      m_value[i].ptr= NULL;
    }
  }
    
1704
  if (table_share->primary_key == MAX_KEY) 
1705 1706 1707
  {
    DBUG_PRINT("info", ("Getting hidden key"));
    // Scanning table with no primary key
1708
    int hidden_no= table_share->fields;      
1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719
#ifndef DBUG_OFF
    const NDBTAB *tab= (const NDBTAB *) m_table;    
    if (!tab->getColumn(hidden_no))
      DBUG_RETURN(1);
#endif
    if (get_ndb_value(op, NULL, hidden_no, NULL))
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
} 

tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1720

1721 1722 1723 1724
/*
  Read one record from NDB using primary key
*/

1725 1726
int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf,
                           uint32 part_id)
1727
{
1728
  uint no_fields= table_share->fields;
1729 1730
  NdbConnection *trans= m_active_trans;
  NdbOperation *op;
1731

1732 1733 1734 1735
  int res;
  DBUG_ENTER("pk_read");
  DBUG_PRINT("enter", ("key_len: %u", key_len));
  DBUG_DUMP("key", (char*)key, key_len);
1736
  m_write_op= FALSE;
1737

1738 1739
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1740
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1741
      op->readTuple(lm) != 0)
1742
    ERR_RETURN(trans->getNdbError());
1743
  
1744
  if (table_share->primary_key == MAX_KEY) 
1745 1746 1747 1748 1749
  {
    // This table has no primary key, use "hidden" primary key
    DBUG_PRINT("info", ("Using hidden key"));
    DBUG_DUMP("key", (char*)key, 8);    
    if (set_hidden_key(op, no_fields, key))
1750
      ERR_RETURN(trans->getNdbError());
1751
    
1752
    // Read key at the same time, for future reference
1753
    if (get_ndb_value(op, NULL, no_fields, NULL))
1754
      ERR_RETURN(trans->getNdbError());
1755 1756 1757 1758 1759 1760 1761
  } 
  else 
  {
    if ((res= set_primary_key(op, key)))
      return res;
  }
  
1762
  if ((res= define_read_attrs(buf, op)))
1763
    DBUG_RETURN(res);
1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775

  if (m_use_partition_function)
  {
    op->setPartitionId(part_id);
    // If table has user defined partitioning
    // and no indexes, we need to read the partition id
    // to support ORDER BY queries
    if (table_share->primary_key == MAX_KEY &&
        get_ndb_partition_id(op))
      ERR_RETURN(trans->getNdbError());
  }

1776
  if ((res = execute_no_commit_ie(this,trans,FALSE)) != 0 ||
1777
      op->getNdbError().code) 
1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(buf);
  table->status= 0;     
  DBUG_RETURN(0);
}

1789 1790
/*
  Read one complementing record from NDB using primary key from old_data
1791
  or hidden key
1792 1793
*/

1794 1795
int ha_ndbcluster::complemented_read(const byte *old_data, byte *new_data,
                                     uint32 old_part_id)
1796
{
1797
  uint no_fields= table_share->fields, i;
1798
  NdbTransaction *trans= m_active_trans;
1799
  NdbOperation *op;
1800
  DBUG_ENTER("complemented_read");
1801
  m_write_op= FALSE;
1802

1803
  if (bitmap_is_set_all(table->read_set))
1804
  {
1805 1806
    // We have allready retrieved all fields, nothing to complement
    DBUG_RETURN(0);
1807
  }
1808

1809 1810
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1811
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1812
      op->readTuple(lm) != 0)
1813
    ERR_RETURN(trans->getNdbError());
1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
  if (table_share->primary_key != MAX_KEY) 
  {
    if (set_primary_key_from_record(op, old_data))
      ERR_RETURN(trans->getNdbError());
  } 
  else 
  {
    // This table has no primary key, use "hidden" primary key
    if (set_hidden_key(op, table->s->fields, m_ref))
      ERR_RETURN(op->getNdbError());
  }
1825 1826 1827 1828

  if (m_use_partition_function)
    op->setPartitionId(old_part_id);
  
1829 1830 1831 1832
  // Read all unreferenced non-key field(s)
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
1833
    if (!((field->flags & PRI_KEY_FLAG) ||
1834 1835
          bitmap_is_set(table->read_set, i)) &&
        !bitmap_is_set(table->write_set, i))
1836
    {
1837
      if (get_ndb_value(op, field, i, new_data))
1838
        ERR_RETURN(trans->getNdbError());
1839 1840 1841
    }
  }
  
1842
  if (execute_no_commit(this,trans,FALSE) != 0) 
1843 1844 1845 1846 1847 1848 1849 1850
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(new_data);
  table->status= 0;     
1851 1852 1853 1854 1855 1856 1857 1858

  /**
   * restore m_value
   */
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
    if (!((field->flags & PRI_KEY_FLAG) ||
1859
          bitmap_is_set(table->read_set, i)))
1860 1861 1862 1863 1864
    {
      m_value[i].ptr= NULL;
    }
  }
  
1865 1866 1867
  DBUG_RETURN(0);
}

1868
/*
1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887
 * Check that all operations between first and last all
 * have gotten the errcode
 * If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
 * for all succeeding operations
 */
bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
                                                   const NdbOperation *first,
                                                   const NdbOperation *last,
                                                   uint errcode)
{
  const NdbOperation *op= first;
  DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");

  while(op)
  {
    NdbError err= op->getNdbError();
    if (err.status != NdbError::Success)
    {
      if (ndb_to_mysql_error(&err) != (int) errcode)
1888
        DBUG_RETURN(FALSE);
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
      if (op == last) break;
      op= trans->getNextCompletedOperation(op);
    }
    else
    {
      // We found a duplicate
      if (op->getType() == NdbOperation::UniqueIndexAccess)
      {
        if (errcode == HA_ERR_KEY_NOT_FOUND)
        {
          NdbIndexOperation *iop= (NdbIndexOperation *) op;
          const NDBINDEX *index= iop->getIndex();
          // Find the key_no of the index
          for(uint i= 0; i<table->s->keys; i++)
          {
            if (m_index[i].unique_index == index)
            {
              m_dupkey= i;
              break;
            }
          }
        }
      }
      else
      {
        // Must have been primary key access
        DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);
        if (errcode == HA_ERR_KEY_NOT_FOUND)
          m_dupkey= table->s->primary_key;
      }
1919
      DBUG_RETURN(FALSE);      
1920 1921
    }
  }
1922
  DBUG_RETURN(TRUE);
1923 1924 1925
}


1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952
/**
 * Check if record contains any null valued columns that are part of a key
 */
static
int
check_null_in_record(const KEY* key_info, const byte *record)
{
  KEY_PART_INFO *curr_part, *end_part;
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->key_parts;

  while (curr_part != end_part)
  {
    if (curr_part->null_bit &&
        (record[curr_part->null_offset] & curr_part->null_bit))
      return 1;
    curr_part++;
  }
  return 0;
  /*
    We could instead pre-compute a bitmask in table_share with one bit for
    every null-bit in the key, and so check this just by OR'ing the bitmask
    with the null bitmap in the record.
    But not sure it's worth it.
  */
}

1953 1954 1955
/*
 * Peek to check if any rows already exist with conflicting
 * primary key or unique index values
1956 1957
*/

1958 1959
int ha_ndbcluster::peek_indexed_rows(const byte *record,
				     bool check_pk)
1960
{
1961
  NdbTransaction *trans= m_active_trans;
1962
  NdbOperation *op;
1963 1964
  const NdbOperation *first, *last;
  uint i;
1965
  int res;
1966
  DBUG_ENTER("peek_indexed_rows");
1967

jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
1968
  NdbOperation::LockMode lm=
1969
      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
1970
  first= NULL;
1971
  if (check_pk && table->s->primary_key != MAX_KEY)
1972
  {
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
    /*
     * Fetch any row with colliding primary key
     */
    if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
        op->readTuple(lm) != 0)
      ERR_RETURN(trans->getNdbError());
    
    first= op;
    if ((res= set_primary_key_from_record(op, record)))
      ERR_RETURN(trans->getNdbError());

    if (m_use_partition_function)
1985
    {
1986 1987 1988
      uint32 part_id;
      int error;
      longlong func_value;
1989 1990 1991 1992
      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
      error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
      dbug_tmp_restore_column_map(table->read_set, old_map);
      if (error)
1993 1994
      {
        m_part_info->err_value= func_value;
1995
        DBUG_RETURN(error);
1996
      }
1997
      op->setPartitionId(part_id);
1998 1999
    }
  }
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
  /*
   * Fetch any rows with colliding unique indexes
   */
  KEY* key_info;
  KEY_PART_INFO *key_part, *end;
  for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
  {
    if (i != table->s->primary_key &&
        key_info->flags & HA_NOSAME)
    {
2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
      /*
        A unique index is defined on table.
        We cannot look up a NULL field value in a unique index. But since
        keys with NULLs are not indexed, such rows cannot conflict anyway, so
        we just skip the index in this case.
      */
      if (check_null_in_record(key_info, record))
      {
        DBUG_PRINT("info", ("skipping check for key with NULL"));
        continue;
      } 
2021
      NdbIndexOperation *iop;
2022
      const NDBINDEX *unique_index = m_index[i].unique_index;
2023 2024
      key_part= key_info->key_part;
      end= key_part + key_info->key_parts;
2025
      if (!(iop= trans->getNdbIndexOperation(unique_index, m_table)) ||
2026 2027
          iop->readTuple(lm) != 0)
        ERR_RETURN(trans->getNdbError());
2028

2029 2030 2031 2032 2033 2034 2035 2036
      if (!first)
        first= iop;
      if ((res= set_index_key_from_record(iop, record, i)))
        ERR_RETURN(trans->getNdbError());
    }
  }
  last= trans->getLastDefinedOperation();
  if (first)
2037
    res= execute_no_commit_ie(this,trans,FALSE);
2038 2039 2040 2041 2042 2043 2044 2045
  else
  {
    // Table has no keys
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
  }
  if (check_all_operations_for_error(trans, first, last, 
                                     HA_ERR_KEY_NOT_FOUND))
2046 2047 2048 2049
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  } 
2050 2051 2052 2053
  else
  {
    DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
  }
2054 2055
  DBUG_RETURN(0);
}
2056

2057

2058 2059 2060 2061 2062
/*
  Read one record from NDB using unique secondary index
*/

int ha_ndbcluster::unique_index_read(const byte *key,
2063
                                     uint key_len, byte *buf)
2064
{
2065
  int res;
2066
  NdbTransaction *trans= m_active_trans;
2067
  NdbIndexOperation *op;
2068
  DBUG_ENTER("ha_ndbcluster::unique_index_read");
2069 2070 2071
  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
  DBUG_DUMP("key", (char*)key, key_len);
  
2072 2073
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
2074 2075
  if (!(op= trans->getNdbIndexOperation(m_index[active_index].unique_index, 
                                        m_table)) ||
2076
      op->readTuple(lm) != 0)
2077 2078 2079
    ERR_RETURN(trans->getNdbError());
  
  // Set secondary index key(s)
2080
  if ((res= set_index_key(op, table->key_info + active_index, key)))
2081 2082
    DBUG_RETURN(res);
  
2083
  if ((res= define_read_attrs(buf, op)))
2084
    DBUG_RETURN(res);
2085

2086
  if (execute_no_commit_ie(this,trans,FALSE) != 0 ||
2087
      op->getNdbError().code) 
2088 2089 2090 2091 2092 2093 2094 2095 2096 2097
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }
  // The value have now been fetched from NDB
  unpack_record(buf);
  table->status= 0;
  DBUG_RETURN(0);
}

2098
inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
2099 2100
{
  DBUG_ENTER("fetch_next");
2101
  int local_check;
2102
  NdbTransaction *trans= m_active_trans;
2103
  
mskold@mysql.com's avatar
mskold@mysql.com committed
2104 2105 2106 2107 2108 2109 2110 2111
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
2112
      NdbConnection *con_trans= m_active_trans;
mskold@mysql.com's avatar
mskold@mysql.com committed
2113 2114 2115 2116 2117 2118
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
      if (!(op= m_active_cursor->lockCurrentTuple()))
      {
2119
        /* purecov: begin inspected */
2120
	m_lock_tuple= FALSE;
2121 2122
	ERR_RETURN(con_trans->getNdbError());
        /* purecov: end */    
mskold@mysql.com's avatar
mskold@mysql.com committed
2123 2124 2125
      }
      m_ops_pending++;
  }
2126
  m_lock_tuple= FALSE;
mskold@mysql.com's avatar
mskold@mysql.com committed
2127 2128 2129
  
  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
                    m_lock.type != TL_READ_WITH_SHARED_LOCKS;;
2130 2131
  do {
    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
pekka@mysql.com's avatar
pekka@mysql.com committed
2132 2133 2134
    /*
      We can only handle one tuple with blobs at a time.
    */
2135
    if (m_ops_pending && m_blobs_pending)
pekka@mysql.com's avatar
pekka@mysql.com committed
2136
    {
2137
      if (execute_no_commit(this,trans,FALSE) != 0)
2138
        DBUG_RETURN(ndb_err(trans));
2139 2140
      m_ops_pending= 0;
      m_blobs_pending= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2141
    }
2142
    
2143
    if ((local_check= cursor->nextResult(contact_ndb, m_force_send)) == 0)
2144
    {
mskold@mysql.com's avatar
mskold@mysql.com committed
2145 2146 2147 2148 2149 2150 2151
      /*
	Explicitly lock tuple if "select for update" or
	"select lock in share mode"
      */
      m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
		     || 
		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
2152 2153
      DBUG_RETURN(0);
    } 
2154
    else if (local_check == 1 || local_check == 2)
2155 2156 2157
    {
      // 1: No more records
      // 2: No more cached records
2158
      
2159
      /*
2160 2161 2162
        Before fetching more rows and releasing lock(s),
        all pending update or delete operations should 
        be sent to NDB
2163
      */
2164
      DBUG_PRINT("info", ("ops_pending: %ld", (long) m_ops_pending));    
2165
      if (m_ops_pending)
2166
      {
2167 2168
        if (m_transaction_on)
        {
2169
          if (execute_no_commit(this,trans,FALSE) != 0)
2170 2171 2172 2173 2174 2175
            DBUG_RETURN(-1);
        }
        else
        {
          if  (execute_commit(this,trans) != 0)
            DBUG_RETURN(-1);
2176
          if (trans->restart() != 0)
2177 2178 2179 2180 2181 2182
          {
            DBUG_ASSERT(0);
            DBUG_RETURN(-1);
          }
        }
        m_ops_pending= 0;
2183
      }
2184
      contact_ndb= (local_check == 2);
2185
    }
2186 2187 2188 2189
    else
    {
      DBUG_RETURN(-1);
    }
2190
  } while (local_check == 2);
2191

2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202
  DBUG_RETURN(1);
}

/*
  Get the next record of a started scan. Try to fetch
  it locally from NdbApi cached records if possible, 
  otherwise ask NDB for more.

  NOTE
  If this is a update/delete make sure to not contact 
  NDB before any pending ops have been sent to NDB.
2203

2204 2205 2206 2207 2208 2209 2210
*/

inline int ha_ndbcluster::next_result(byte *buf)
{  
  int res;
  DBUG_ENTER("next_result");
    
2211 2212 2213
  if (!m_active_cursor)
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  
2214
  if ((res= fetch_next(m_active_cursor)) == 0)
2215 2216 2217 2218 2219 2220 2221
  {
    DBUG_PRINT("info", ("One more record found"));    
    
    unpack_record(buf);
    table->status= 0;
    DBUG_RETURN(0);
  }
2222
  else if (res == 1)
2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233
  {
    // No more records
    table->status= STATUS_NOT_FOUND;
    
    DBUG_PRINT("info", ("No more records"));
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_RETURN(ndb_err(m_active_trans));
  }
2234 2235
}

2236
/*
2237
  Set bounds for ordered index scan.
2238 2239
*/

joreland@mysql.com's avatar
joreland@mysql.com committed
2240
int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
2241 2242
                              uint inx,
                              bool rir,
2243 2244
                              const key_range *keys[2],
                              uint range_no)
2245
{
2246
  const KEY *const key_info= table->key_info + inx;
2247 2248 2249
  const uint key_parts= key_info->key_parts;
  uint key_tot_len[2];
  uint tot_len;
2250
  uint i, j;
2251 2252

  DBUG_ENTER("set_bounds");
2253
  DBUG_PRINT("info", ("key_parts=%d", key_parts));
2254

2255
  for (j= 0; j <= 1; j++)
2256
  {
2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269
    const key_range *key= keys[j];
    if (key != NULL)
    {
      // for key->flag see ha_rkey_function
      DBUG_PRINT("info", ("key %d length=%d flag=%d",
                          j, key->length, key->flag));
      key_tot_len[j]= key->length;
    }
    else
    {
      DBUG_PRINT("info", ("key %d not present", j));
      key_tot_len[j]= 0;
    }
2270 2271
  }
  tot_len= 0;
2272

2273 2274 2275 2276
  for (i= 0; i < key_parts; i++)
  {
    KEY_PART_INFO *key_part= &key_info->key_part[i];
    Field *field= key_part->field;
2277
#ifndef DBUG_OFF
2278
    uint part_len= key_part->length;
2279
#endif
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293
    uint part_store_len= key_part->store_length;
    // Info about each key part
    struct part_st {
      bool part_last;
      const key_range *key;
      const byte *part_ptr;
      bool part_null;
      int bound_type;
      const char* bound_ptr;
    };
    struct part_st part[2];

    for (j= 0; j <= 1; j++)
    {
2294
      struct part_st &p= part[j];
2295 2296 2297 2298 2299 2300 2301
      p.key= NULL;
      p.bound_type= -1;
      if (tot_len < key_tot_len[j])
      {
        p.part_last= (tot_len + part_store_len >= key_tot_len[j]);
        p.key= keys[j];
        p.part_ptr= &p.key->key[tot_len];
joreland@mysql.com's avatar
joreland@mysql.com committed
2302
        p.part_null= key_part->null_bit && *p.part_ptr;
2303
        p.bound_ptr= (const char *)
joreland@mysql.com's avatar
joreland@mysql.com committed
2304
          p.part_null ? 0 : key_part->null_bit ? p.part_ptr + 1 : p.part_ptr;
2305 2306 2307 2308 2309 2310

        if (j == 0)
        {
          switch (p.key->flag)
          {
            case HA_READ_KEY_EXACT:
2311 2312 2313 2314
              if (! rir)
                p.bound_type= NdbIndexScanOperation::BoundEQ;
              else // differs for records_in_range
                p.bound_type= NdbIndexScanOperation::BoundLE;
2315
              break;
2316
            // ascending
2317 2318 2319 2320 2321 2322 2323 2324 2325
            case HA_READ_KEY_OR_NEXT:
              p.bound_type= NdbIndexScanOperation::BoundLE;
              break;
            case HA_READ_AFTER_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundLE;
              else
                p.bound_type= NdbIndexScanOperation::BoundLT;
              break;
2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338
            // descending
            case HA_READ_PREFIX_LAST:           // weird
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
            case HA_READ_PREFIX_LAST_OR_PREV:   // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
2339 2340 2341 2342 2343 2344 2345
            default:
              break;
          }
        }
        if (j == 1) {
          switch (p.key->flag)
          {
2346
            // ascending
2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
            case HA_READ_AFTER_KEY:     // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            default:
              break;
2358
            // descending strangely sets no end key
2359 2360
          }
        }
2361

2362 2363 2364
        if (p.bound_type == -1)
        {
          DBUG_PRINT("error", ("key %d unknown flag %d", j, p.key->flag));
2365
          DBUG_ASSERT(FALSE);
2366
          // Stop setting bounds but continue with what we have
2367
          DBUG_RETURN(op->end_of_bound(range_no));
2368 2369 2370
        }
      }
    }
2371

2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388
    // Seen with e.g. b = 1 and c > 1
    if (part[0].bound_type == NdbIndexScanOperation::BoundLE &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("replace LE/GE pair by EQ"));
      part[0].bound_type= NdbIndexScanOperation::BoundEQ;
      part[1].bound_type= -1;
    }
    // Not seen but was in previous version
    if (part[0].bound_type == NdbIndexScanOperation::BoundEQ &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("remove GE from EQ/GE pair"));
      part[1].bound_type= -1;
    }
2389

2390 2391
    for (j= 0; j <= 1; j++)
    {
2392
      struct part_st &p= part[j];
2393 2394 2395
      // Set bound if not done with this key
      if (p.key != NULL)
      {
2396
        DBUG_PRINT("info", ("key %d:%d  offset: %d  length: %d  last: %d  bound: %d",
2397 2398 2399 2400 2401
                            j, i, tot_len, part_len, p.part_last, p.bound_type));
        DBUG_DUMP("info", (const char*)p.part_ptr, part_store_len);

        // Set bound if not cancelled via type -1
        if (p.bound_type != -1)
2402
        {
pekka@mysql.com's avatar
pekka@mysql.com committed
2403 2404 2405
          const char* ptr= p.bound_ptr;
          char buf[256];
          shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
2406
          if (op->setBound(i, p.bound_type, ptr))
2407
            ERR_RETURN(op->getNdbError());
2408
        }
2409 2410 2411 2412
      }
    }

    tot_len+= part_store_len;
2413
  }
2414
  DBUG_RETURN(op->end_of_bound(range_no));
2415 2416
}

2417
/*
2418
  Start ordered index scan in NDB
2419 2420
*/

2421
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
2422
                                      const key_range *end_key,
2423 2424
                                      bool sorted, bool descending,
                                      byte* buf, part_id_range *part_spec)
2425
{  
2426
  int res;
joreland@mysql.com's avatar
joreland@mysql.com committed
2427
  bool restart;
2428
  NdbTransaction *trans= m_active_trans;
joreland@mysql.com's avatar
joreland@mysql.com committed
2429
  NdbIndexScanOperation *op;
2430

2431 2432 2433
  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d",
             active_index, sorted, descending));  
2434
  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
2435
  m_write_op= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2436

2437 2438
  // Check that sorted seems to be initialised
  DBUG_ASSERT(sorted == 0 || sorted == 1);
2439
  
2440
  if (m_active_cursor == 0)
joreland@mysql.com's avatar
joreland@mysql.com committed
2441
  {
2442
    restart= FALSE;
joreland@mysql.com's avatar
joreland@mysql.com committed
2443 2444
    NdbOperation::LockMode lm=
      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
mskold@mysql.com's avatar
mskold@mysql.com committed
2445
   bool need_pk = (lm == NdbOperation::LM_Read);
2446 2447
    if (!(op= trans->getNdbIndexScanOperation(m_index[active_index].index, 
                                              m_table)) ||
2448
        op->readTuples(lm, 0, parallelism, sorted, descending, FALSE, need_pk))
joreland@mysql.com's avatar
joreland@mysql.com committed
2449
      ERR_RETURN(trans->getNdbError());
2450 2451 2452
    if (m_use_partition_function && part_spec != NULL &&
        part_spec->start_part == part_spec->end_part)
      op->setPartitionId(part_spec->start_part);
2453
    m_active_cursor= op;
joreland@mysql.com's avatar
joreland@mysql.com committed
2454
  } else {
2455
    restart= TRUE;
2456
    op= (NdbIndexScanOperation*)m_active_cursor;
joreland@mysql.com's avatar
joreland@mysql.com committed
2457
    
2458 2459 2460
    if (m_use_partition_function && part_spec != NULL &&
        part_spec->start_part == part_spec->end_part)
      op->setPartitionId(part_spec->start_part);
joreland@mysql.com's avatar
joreland@mysql.com committed
2461 2462
    DBUG_ASSERT(op->getSorted() == sorted);
    DBUG_ASSERT(op->getLockMode() == 
2463
                (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
2464
    if (op->reset_bounds(m_force_send))
joreland@mysql.com's avatar
joreland@mysql.com committed
2465 2466
      DBUG_RETURN(ndb_err(m_active_trans));
  }
2467
  
2468
  {
2469
    const key_range *keys[2]= { start_key, end_key };
2470
    res= set_bounds(op, active_index, FALSE, keys);
2471 2472
    if (res)
      DBUG_RETURN(res);
2473
  }
2474

2475
  if (!restart)
2476
  {
2477
    if (m_cond && m_cond->generate_scan_filter(op))
2478 2479
      DBUG_RETURN(ndb_err(trans));

2480
    if ((res= define_read_attrs(buf, op)))
2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
    {
      DBUG_RETURN(res);
    }
    
    // If table has user defined partitioning
    // and no primary key, we need to read the partition id
    // to support ORDER BY queries
    if (m_use_partition_function &&
        (table_share->primary_key == MAX_KEY) && 
        (get_ndb_partition_id(op)))
      ERR_RETURN(trans->getNdbError());
joreland@mysql.com's avatar
joreland@mysql.com committed
2492
  }
2493

2494
  if (execute_no_commit(this,trans,FALSE) != 0)
2495 2496 2497 2498
    DBUG_RETURN(ndb_err(trans));
  
  DBUG_RETURN(next_result(buf));
}
2499

jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523
static
int
guess_scan_flags(NdbOperation::LockMode lm, 
		 const NDBTAB* tab, const MY_BITMAP* readset)
{
  int flags= 0;
  flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
  if (tab->checkColumns(0, 0) & 2)
  {
    int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
    
    if (ret & 2)
    { // If disk columns...use disk scan
      flags |= NdbScanOperation::SF_DiskScan;
    }
    else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
    {
      // If no mem column is set and exclusive...guess disk scan
      flags |= NdbScanOperation::SF_DiskScan;
    }
  }
  return flags;
}

2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582

/*
  Unique index scan in NDB (full table scan with scan filter)
 */

int ha_ndbcluster::unique_index_scan(const KEY* key_info, 
				     const byte *key, 
				     uint key_len,
				     byte *buf)
{
  int res;
  NdbScanOperation *op;
  NdbTransaction *trans= m_active_trans;
  part_id_range part_spec;

  DBUG_ENTER("unique_index_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));

  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
  int flags= guess_scan_flags(lm, m_table, table->read_set);
  if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) ||
      op->readTuples(lm, flags, parallelism))
    ERR_RETURN(trans->getNdbError());
  m_active_cursor= op;

  if (m_use_partition_function)
  {
    part_spec.start_part= 0;
    part_spec.end_part= m_part_info->get_tot_partitions() - 1;
    prune_partition_set(table, &part_spec);
    DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      m_active_cursor->setPartitionId(part_spec.start_part);
    }
    // If table has user defined partitioning
    // and no primary key, we need to read the partition id
    // to support ORDER BY queries
    if ((table_share->primary_key == MAX_KEY) && 
        (get_ndb_partition_id(op)))
      ERR_RETURN(trans->getNdbError());
  }
2583 2584 2585 2586 2587 2588 2589 2590
  if (!m_cond)
    m_cond= new ha_ndbcluster_cond;
  if (!m_cond)
  {
    my_errno= HA_ERR_OUT_OF_MEM;
    DBUG_RETURN(my_errno);
  }       
  if (m_cond->generate_scan_filter_from_key(op, key_info, key, key_len, buf))
2591 2592 2593 2594
    DBUG_RETURN(ndb_err(trans));
  if ((res= define_read_attrs(buf, op)))
    DBUG_RETURN(res);

2595
  if (execute_no_commit(this,trans,FALSE) != 0)
2596 2597 2598 2599 2600 2601
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
}


2602
/*
2603
  Start full table scan in NDB
2604 2605 2606 2607
 */

int ha_ndbcluster::full_table_scan(byte *buf)
{
2608
  int res;
2609
  NdbScanOperation *op;
2610
  NdbTransaction *trans= m_active_trans;
2611
  part_id_range part_spec;
2612 2613 2614

  DBUG_ENTER("full_table_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
2615
  m_write_op= FALSE;
2616

2617 2618
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
2619
  int flags= guess_scan_flags(lm, m_table, table->read_set);
2620
  if (!(op=trans->getNdbScanOperation(m_table)) ||
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
2621
      op->readTuples(lm, flags, parallelism))
2622
    ERR_RETURN(trans->getNdbError());
2623
  m_active_cursor= op;
2624 2625 2626 2627

  if (m_use_partition_function)
  {
    part_spec.start_part= 0;
2628
    part_spec.end_part= m_part_info->get_tot_partitions() - 1;
2629
    prune_partition_set(table, &part_spec);
2630
    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      m_active_cursor->setPartitionId(part_spec.start_part);
    }
2651 2652 2653 2654 2655 2656
    // If table has user defined partitioning
    // and no primary key, we need to read the partition id
    // to support ORDER BY queries
    if ((table_share->primary_key == MAX_KEY) && 
        (get_ndb_partition_id(op)))
      ERR_RETURN(trans->getNdbError());
2657 2658
  }

2659
  if (m_cond && m_cond->generate_scan_filter(op))
2660
    DBUG_RETURN(ndb_err(trans));
2661
  if ((res= define_read_attrs(buf, op)))
2662 2663
    DBUG_RETURN(res);

2664
  if (execute_no_commit(this,trans,FALSE) != 0)
2665 2666 2667
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
2668 2669
}

2670 2671 2672 2673 2674
/*
  Insert one record into NDB
*/
int ha_ndbcluster::write_row(byte *record)
{
mskold@mysql.com's avatar
mskold@mysql.com committed
2675
  bool has_auto_increment;
2676
  uint i;
2677
  NdbTransaction *trans= m_active_trans;
2678 2679
  NdbOperation *op;
  int res;
2680
  THD *thd= table->in_use;
2681 2682
  longlong func_value= 0;
  DBUG_ENTER("ha_ndbcluster::write_row");
2683

2684
  m_write_op= TRUE;
2685 2686 2687 2688 2689 2690 2691 2692
  has_auto_increment= (table->next_number_field && record == table->record[0]);
  if (table_share->primary_key != MAX_KEY)
  {
    /*
     * Increase any auto_incremented primary key
     */
    if (has_auto_increment) 
    {
2693
      int error;
2694 2695

      m_skip_auto_increment= FALSE;
2696 2697
      if ((error= update_auto_increment()))
        DBUG_RETURN(error);
2698
      m_skip_auto_increment= (insert_id_for_cur_row == 0);
2699 2700 2701 2702 2703 2704 2705
    }
  }

  /*
   * If IGNORE the ignore constraint violations on primary and unique keys
   */
  if (!m_use_write && m_ignore_dup_key)
2706
  {
2707 2708 2709 2710 2711
    /*
      compare if expression with that in start_bulk_insert()
      start_bulk_insert will set parameters to ensure that each
      write_row is committed individually
    */
2712
    int peek_res= peek_indexed_rows(record, TRUE);
2713 2714 2715 2716 2717 2718 2719
    
    if (!peek_res) 
    {
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
2720
  }
2721

antony@ppcg5.local's avatar
antony@ppcg5.local committed
2722
  ha_statistic_increment(&SSV::ha_write_count);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2723 2724
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();
2725

2726
  if (!(op= trans->getNdbOperation(m_table)))
2727 2728 2729 2730 2731 2732
    ERR_RETURN(trans->getNdbError());

  res= (m_use_write) ? op->writeTuple() :op->insertTuple(); 
  if (res != 0)
    ERR_RETURN(trans->getNdbError());  
 
2733 2734 2735 2736
  if (m_use_partition_function)
  {
    uint32 part_id;
    int error;
2737 2738 2739 2740
    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
    error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
    dbug_tmp_restore_column_map(table->read_set, old_map);
    if (error)
2741 2742
    {
      m_part_info->err_value= func_value;
2743
      DBUG_RETURN(error);
2744
    }
2745 2746 2747
    op->setPartitionId(part_id);
  }

2748
  if (table_share->primary_key == MAX_KEY) 
2749 2750
  {
    // Table has hidden primary key
2751
    Ndb *ndb= get_ndb();
2752
    Uint64 auto_value;
2753
    uint retries= NDB_AUTO_INCREMENT_RETRIES;
2754 2755 2756
    int retry_sleep= 30; /* 30 milliseconds, transaction */
    for (;;)
    {
2757
      Ndb_tuple_id_range_guard g(m_share);
2758
      if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1) == -1)
2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769
      {
        if (--retries &&
            ndb->getNdbError().status == NdbError::TemporaryError);
        {
          my_sleep(retry_sleep);
          continue;
        }
        ERR_RETURN(ndb->getNdbError());
      }
      break;
    }
2770
    if (set_hidden_key(op, table->s->fields, (const byte*)&auto_value))
2771 2772 2773 2774
      ERR_RETURN(op->getNdbError());
  } 
  else 
  {
2775 2776 2777
    int error;
    if ((error= set_primary_key_from_record(op, record)))
      DBUG_RETURN(error);
2778 2779 2780
  }

  // Set non-key attribute(s)
2781
  bool set_blob_value= FALSE;
2782
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
2783
  for (i= 0; i < table_share->fields; i++) 
2784 2785 2786
  {
    Field *field= table->field[i];
    if (!(field->flags & PRI_KEY_FLAG) &&
2787
	(bitmap_is_set(table->write_set, i) || !m_use_write) &&
2788
        set_ndb_value(op, field, i, record-table->record[0], &set_blob_value))
2789
    {
2790
      m_skip_auto_increment= TRUE;
2791
      dbug_tmp_restore_column_map(table->read_set, old_map);
2792
      ERR_RETURN(op->getNdbError());
2793
    }
2794
  }
2795
  dbug_tmp_restore_column_map(table->read_set, old_map);
2796

2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
  if (m_use_partition_function)
  {
    /*
      We need to set the value of the partition function value in
      NDB since the NDB kernel doesn't have easy access to the function
      to calculate the value.
    */
    if (func_value >= INT_MAX32)
      func_value= INT_MAX32;
    uint32 part_func_value= (uint32)func_value;
    uint no_fields= table_share->fields;
    if (table_share->primary_key == MAX_KEY)
      no_fields++;
    op->setValue(no_fields, part_func_value);
  }

2813 2814 2815 2816 2817 2818 2819
  if (unlikely(m_slow_path))
  {
    if (!(thd->options & OPTION_BIN_LOG))
      op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING);
    else if (thd->slave_thread)
      op->setAnyValue(thd->server_id);
  }
2820 2821
  m_rows_changed++;

2822 2823 2824 2825 2826 2827 2828
  /*
    Execute write operation
    NOTE When doing inserts with many values in 
    each INSERT statement it should not be necessary
    to NoCommit the transaction between each row.
    Find out how this is detected!
  */
2829
  m_rows_inserted++;
2830
  no_uncommitted_rows_update(1);
2831
  m_bulk_insert_not_flushed= TRUE;
2832
  if ((m_rows_to_insert == (ha_rows) 1) || 
2833
      ((m_rows_inserted % m_bulk_insert_rows) == 0) ||
2834
      m_primary_key_update ||
2835
      set_blob_value)
2836 2837 2838
  {
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
2839
                        "rows_inserted: %d  bulk_insert_rows: %d", 
2840
                        (int)m_rows_inserted, (int)m_bulk_insert_rows));
2841

2842
    m_bulk_insert_not_flushed= FALSE;
2843
    if (m_transaction_on)
2844
    {
2845
      if (execute_no_commit(this,trans,FALSE) != 0)
2846
      {
2847 2848 2849
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2850
      }
2851 2852
    }
    else
2853
    {
2854
      if (execute_commit(this,trans) != 0)
2855
      {
2856 2857 2858
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2859
      }
2860
      if (trans->restart() != 0)
2861
      {
2862 2863
        DBUG_ASSERT(0);
        DBUG_RETURN(-1);
2864
      }
2865
    }
2866
  }
2867
  if ((has_auto_increment) && (m_skip_auto_increment))
mskold@mysql.com's avatar
mskold@mysql.com committed
2868
  {
2869
    Ndb *ndb= get_ndb();
2870
    Uint64 next_val= (Uint64) table->next_number_field->val_int() + 1;
2871
#ifndef DBUG_OFF
2872
    char buff[22];
mskold@mysql.com's avatar
mskold@mysql.com committed
2873
    DBUG_PRINT("info", 
2874 2875
               ("Trying to set next auto increment value to %s",
                llstr(next_val, buff)));
2876
#endif
2877 2878
    Ndb_tuple_id_range_guard g(m_share);
    if (ndb->setAutoIncrementValue(m_table, g.range, next_val, TRUE)
2879
        == -1)
2880
      ERR_RETURN(ndb->getNdbError());
2881
  }
2882
  m_skip_auto_increment= TRUE;
2883

2884
  DBUG_PRINT("exit",("ok"));
2885 2886 2887 2888 2889 2890 2891
  DBUG_RETURN(0);
}


/* Compare if a key in a row has changed */

int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row,
2892
                           const byte * new_row)
2893 2894 2895 2896 2897 2898 2899 2900 2901
{
  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
  KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts;

  for (; key_part != end ; key_part++)
  {
    if (key_part->null_bit)
    {
      if ((old_row[key_part->null_offset] & key_part->null_bit) !=
2902 2903
          (new_row[key_part->null_offset] & key_part->null_bit))
        return 1;
2904
    }
2905
    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
2906 2907 2908
    {

      if (key_part->field->cmp_binary((char*) (old_row + key_part->offset),
2909 2910 2911
                                      (char*) (new_row + key_part->offset),
                                      (ulong) key_part->length))
        return 1;
2912 2913 2914 2915
    }
    else
    {
      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
2916 2917
                 key_part->length))
        return 1;
2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928
    }
  }
  return 0;
}

/*
  Update one record in NDB using primary key
*/

int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
{
2929
  THD *thd= table->in_use;
2930
  NdbTransaction *trans= m_active_trans;
2931
  NdbScanOperation* cursor= m_active_cursor;
2932 2933
  NdbOperation *op;
  uint i;
2934 2935
  uint32 old_part_id= 0, new_part_id= 0;
  int error;
2936
  longlong func_value;
2937 2938
  bool pk_update= (table_share->primary_key != MAX_KEY &&
		   key_cmp(table_share->primary_key, old_data, new_data));
2939
  DBUG_ENTER("update_row");
2940
  m_write_op= TRUE;
2941
  
2942
  /*
2943 2944
   * If IGNORE the ignore constraint violations on primary and unique keys,
   * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
2945
   */
2946 2947
  if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
                           thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
2948
  {
2949
    int peek_res= peek_indexed_rows(new_data, pk_update);
2950 2951 2952 2953 2954 2955 2956 2957 2958
    
    if (!peek_res) 
    {
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
  }

antony@ppcg5.local's avatar
antony@ppcg5.local committed
2959
  ha_statistic_increment(&SSV::ha_update_count);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2960
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2961
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2962
    table->timestamp_field->set_time();
2963
    bitmap_set_bit(table->write_set, table->timestamp_field->field_index);
2964
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2965

2966 2967
  if (m_use_partition_function &&
      (error= get_parts_for_update(old_data, new_data, table->record[0],
2968 2969
                                   m_part_info, &old_part_id, &new_part_id,
                                   &func_value)))
2970
  {
2971
    m_part_info->err_value= func_value;
2972 2973 2974
    DBUG_RETURN(error);
  }

2975 2976 2977 2978
  /*
   * Check for update of primary key or partition change
   * for special handling
   */  
2979
  if (pk_update || old_part_id != new_part_id)
2980
  {
2981
    int read_res, insert_res, delete_res, undo_res;
2982

2983 2984
    DBUG_PRINT("info", ("primary key update or partition change, "
                        "doing read+delete+insert"));
2985
    // Get all old fields, since we optimize away fields not in query
2986
    read_res= complemented_read(old_data, new_data, old_part_id);
2987 2988
    if (read_res)
    {
2989
      DBUG_PRINT("info", ("read failed"));
2990 2991
      DBUG_RETURN(read_res);
    }
2992
    // Delete old row
2993
    m_primary_key_update= TRUE;
2994
    delete_res= delete_row(old_data);
2995
    m_primary_key_update= FALSE;
2996 2997 2998
    if (delete_res)
    {
      DBUG_PRINT("info", ("delete failed"));
2999
      DBUG_RETURN(delete_res);
3000
    }     
3001 3002
    // Insert new row
    DBUG_PRINT("info", ("delete succeded"));
3003
    m_primary_key_update= TRUE;
3004
    insert_res= write_row(new_data);
3005
    m_primary_key_update= FALSE;
3006 3007 3008 3009 3010
    if (insert_res)
    {
      DBUG_PRINT("info", ("insert failed"));
      if (trans->commitStatus() == NdbConnection::Started)
      {
3011
        // Undo delete_row(old_data)
3012
        m_primary_key_update= TRUE;
3013 3014 3015 3016 3017 3018
        undo_res= write_row((byte *)old_data);
        if (undo_res)
          push_warning(current_thd, 
                       MYSQL_ERROR::WARN_LEVEL_WARN, 
                       undo_res, 
                       "NDB failed undoing delete at primary key update");
3019 3020 3021 3022 3023
        m_primary_key_update= FALSE;
      }
      DBUG_RETURN(insert_res);
    }
    DBUG_PRINT("info", ("delete+insert succeeded"));
3024
    DBUG_RETURN(0);
3025
  }
3026

3027
  if (cursor)
3028
  {
3029 3030 3031 3032 3033 3034 3035 3036
    /*
      We are scanning records and want to update the record
      that was just found, call updateTuple on the cursor 
      to take over the lock to a new update operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling updateTuple on cursor"));
3037
    if (!(op= cursor->updateCurrentTuple()))
3038
      ERR_RETURN(trans->getNdbError());
3039
    m_lock_tuple= FALSE;
3040
    m_ops_pending++;
3041
    if (uses_blob_value())
3042
      m_blobs_pending= TRUE;
3043 3044
    if (m_use_partition_function)
      cursor->setPartitionId(new_part_id);
3045 3046 3047
  }
  else
  {  
3048
    if (!(op= trans->getNdbOperation(m_table)) ||
3049
        op->updateTuple() != 0)
3050 3051
      ERR_RETURN(trans->getNdbError());  
    
3052 3053
    if (m_use_partition_function)
      op->setPartitionId(new_part_id);
3054
    if (table_share->primary_key == MAX_KEY) 
3055 3056 3057 3058 3059
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
      // Require that the PK for this record has previously been 
3060 3061
      // read into m_ref
      DBUG_DUMP("key", m_ref, NDB_HIDDEN_PRIMARY_KEY_LENGTH);
3062
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3063
      if (set_hidden_key(op, table->s->fields, m_ref))
3064
        ERR_RETURN(op->getNdbError());
3065 3066 3067 3068
    } 
    else 
    {
      int res;
3069
      if ((res= set_primary_key_from_record(op, old_data)))
3070
        DBUG_RETURN(res);
3071
    }
3072 3073
  }

3074 3075
  m_rows_changed++;

3076
  // Set non-key attribute(s)
3077
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3078
  for (i= 0; i < table_share->fields; i++) 
3079 3080
  {
    Field *field= table->field[i];
3081
    if (bitmap_is_set(table->write_set, i) &&
3082
        (!(field->flags & PRI_KEY_FLAG)) &&
3083
        set_ndb_value(op, field, i, new_data - table->record[0]))
3084 3085
    {
      dbug_tmp_restore_column_map(table->read_set, old_map);
3086
      ERR_RETURN(op->getNdbError());
3087
    }
3088
  }
3089
  dbug_tmp_restore_column_map(table->read_set, old_map);
3090

3091 3092 3093 3094 3095 3096 3097 3098 3099 3100
  if (m_use_partition_function)
  {
    if (func_value >= INT_MAX32)
      func_value= INT_MAX32;
    uint32 part_func_value= (uint32)func_value;
    uint no_fields= table_share->fields;
    if (table_share->primary_key == MAX_KEY)
      no_fields++;
    op->setValue(no_fields, part_func_value);
  }
3101

3102 3103 3104 3105 3106 3107 3108
  if (unlikely(m_slow_path))
  {
    if (!(thd->options & OPTION_BIN_LOG))
      op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING);
    else if (thd->slave_thread)
      op->setAnyValue(thd->server_id);
  }
3109 3110 3111 3112 3113 3114 3115
  /*
    Execute update operation if we are not doing a scan for update
    and there exist UPDATE AFTER triggers
  */

  if ((!cursor || m_update_cannot_batch) && 
      execute_no_commit(this,trans,false) != 0) {
3116
    no_uncommitted_rows_execute_failure();
3117
    DBUG_RETURN(ndb_err(trans));
3118
  }
3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129
  
  DBUG_RETURN(0);
}


/*
  Delete one record from NDB, using primary key 
*/

int ha_ndbcluster::delete_row(const byte *record)
{
3130
  THD *thd= table->in_use;
3131
  NdbTransaction *trans= m_active_trans;
3132
  NdbScanOperation* cursor= m_active_cursor;
3133
  NdbOperation *op;
3134 3135
  uint32 part_id;
  int error;
3136
  DBUG_ENTER("delete_row");
3137
  m_write_op= TRUE;
3138

antony@ppcg5.local's avatar
antony@ppcg5.local committed
3139
  ha_statistic_increment(&SSV::ha_delete_count);
3140
  m_rows_changed++;
3141

3142 3143 3144 3145 3146 3147 3148
  if (m_use_partition_function &&
      (error= get_part_for_delete(record, table->record[0], m_part_info,
                                  &part_id)))
  {
    DBUG_RETURN(error);
  }

3149
  if (cursor)
3150
  {
3151
    /*
3152
      We are scanning records and want to delete the record
3153
      that was just found, call deleteTuple on the cursor 
3154
      to take over the lock to a new delete operation
3155 3156 3157 3158
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
3159
    if (cursor->deleteCurrentTuple() != 0)
3160
      ERR_RETURN(trans->getNdbError());     
3161
    m_lock_tuple= FALSE;
3162
    m_ops_pending++;
3163

3164 3165 3166
    if (m_use_partition_function)
      cursor->setPartitionId(part_id);

3167 3168
    no_uncommitted_rows_update(-1);

3169 3170 3171 3172 3173 3174 3175 3176 3177
    if (unlikely(m_slow_path))
    {
      if (!(thd->options & OPTION_BIN_LOG))
        ((NdbOperation *)trans->getLastDefinedOperation())->
          setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING);
      else if (thd->slave_thread)
        ((NdbOperation *)trans->getLastDefinedOperation())->
          setAnyValue(thd->server_id);
    }
3178
    if (!(m_primary_key_update || m_delete_cannot_batch))
3179 3180
      // If deleting from cursor, NoCommit will be handled in next_result
      DBUG_RETURN(0);
3181 3182
  }
  else
3183
  {
3184
    
3185
    if (!(op=trans->getNdbOperation(m_table)) || 
3186
        op->deleteTuple() != 0)
3187 3188
      ERR_RETURN(trans->getNdbError());
    
3189 3190 3191
    if (m_use_partition_function)
      op->setPartitionId(part_id);

3192 3193
    no_uncommitted_rows_update(-1);
    
3194
    if (table_share->primary_key == MAX_KEY) 
3195 3196 3197 3198
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3199
      if (set_hidden_key(op, table->s->fields, m_ref))
3200
        ERR_RETURN(op->getNdbError());
3201 3202 3203
    } 
    else 
    {
3204 3205
      if ((error= set_primary_key_from_record(op, record)))
        DBUG_RETURN(error);
3206
    }
3207

3208 3209 3210 3211 3212 3213 3214
    if (unlikely(m_slow_path))
    {
      if (!(thd->options & OPTION_BIN_LOG))
        op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING);
      else if (thd->slave_thread)
        op->setAnyValue(thd->server_id);
    }
3215
  }
3216

3217
  // Execute delete operation
3218
  if (execute_no_commit(this,trans,FALSE) != 0) {
3219
    no_uncommitted_rows_execute_failure();
3220
    DBUG_RETURN(ndb_err(trans));
3221
  }
3222 3223
  DBUG_RETURN(0);
}
3224
  
3225 3226 3227 3228 3229
/*
  Unpack a record read from NDB 

  SYNOPSIS
    unpack_record()
3230
    buf                 Buffer to store read row
3231 3232 3233 3234 3235 3236 3237 3238

  NOTE
    The data for each row is read directly into the
    destination buffer. This function is primarily 
    called in order to check if any fields should be 
    set to null.
*/

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3239 3240
void ndb_unpack_record(TABLE *table, NdbValue *value,
                       MY_BITMAP *defined, byte *buf)
3241
{
3242
  Field **p_field= table->field, *field= *p_field;
3243
  my_ptrdiff_t row_offset= (my_ptrdiff_t) (buf - table->record[0]);
3244
  my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set);
3245
  DBUG_ENTER("ndb_unpack_record");
3246

3247 3248 3249 3250 3251 3252
  /*
    Set the filler bits of the null byte, since they are
    not touched in the code below.
    
    The filler bits are the MSBs in the last null byte
  */ 
3253 3254 3255
  if (table->s->null_bytes > 0)
       buf[table->s->null_bytes - 1]|= 256U - (1U <<
					       table->s->last_null_bit_pos);
3256 3257 3258
  /*
    Set null flag(s)
  */
3259 3260
  for ( ; field;
       p_field++, value++, field= *p_field)
3261
  {
3262
    field->set_notnull(row_offset);       
pekka@mysql.com's avatar
pekka@mysql.com committed
3263 3264
    if ((*value).ptr)
    {
3265
      if (!(field->flags & BLOB_FLAG))
pekka@mysql.com's avatar
pekka@mysql.com committed
3266
      {
3267 3268
        int is_null= (*value).rec->isNULL();
        if (is_null)
3269
        {
3270 3271
          if (is_null > 0)
          {
3272
	    DBUG_PRINT("info",("[%u] NULL",
3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285
                               (*value).rec->getColumn()->getColumnNo()));
            field->set_null(row_offset);
          }
          else
          {
            DBUG_PRINT("info",("[%u] UNDEFINED",
                               (*value).rec->getColumn()->getColumnNo()));
            bitmap_clear_bit(defined,
                             (*value).rec->getColumn()->getColumnNo());
          }
        }
        else if (field->type() == MYSQL_TYPE_BIT)
        {
3286 3287 3288 3289 3290 3291 3292 3293
          Field_bit *field_bit= static_cast<Field_bit*>(field);

          /*
            Move internal field pointer to point to 'buf'.  Calling
            the correct member function directly since we know the
            type of the object.
           */
          field_bit->Field_bit::move_field_offset(row_offset);
3294
          if (field->pack_length() < 5)
3295 3296
          {
            DBUG_PRINT("info", ("bit field H'%.8X", 
3297
                                (*value).rec->u_32_value()));
3298 3299
            field_bit->Field_bit::store((longlong) (*value).rec->u_32_value(),
                                        FALSE);
3300 3301 3302 3303 3304 3305
          }
          else
          {
            DBUG_PRINT("info", ("bit field H'%.8X%.8X",
                                *(Uint32 *)(*value).rec->aRef(),
                                *((Uint32 *)(*value).rec->aRef()+1)));
3306 3307 3308
#ifdef WORDS_BIGENDIAN
            /* lsw is stored first */
            Uint32 *buf= (Uint32 *)(*value).rec->aRef();
3309
            field_bit->Field_bit::store((((longlong)*buf)
3310
                                         & 0x000000000FFFFFFFFLL)
3311 3312
                                        |
                                        ((((longlong)*(buf+1)) << 32)
3313
                                         & 0xFFFFFFFF00000000LL),
3314
                                        TRUE);
3315
#else
3316 3317
            field_bit->Field_bit::store((longlong)
                                        (*value).rec->u_64_value(), TRUE);
3318
#endif
3319
          }
3320 3321 3322 3323 3324
          /*
            Move back internal field pointer to point to original
            value (usually record[0]).
           */
          field_bit->Field_bit::move_field_offset(-row_offset);
3325 3326
          DBUG_PRINT("info",("[%u] SET",
                             (*value).rec->getColumn()->getColumnNo()));
3327
          DBUG_DUMP("info", (const char*) field->ptr, field->pack_length());
3328 3329 3330 3331 3332
        }
        else
        {
          DBUG_PRINT("info",("[%u] SET",
                             (*value).rec->getColumn()->getColumnNo()));
3333
          DBUG_DUMP("info", (const char*) field->ptr, field->pack_length());
3334
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
3335 3336 3337
      }
      else
      {
3338
        NdbBlob *ndb_blob= (*value).blob;
3339
        uint col_no = ndb_blob->getColumn()->getColumnNo();
3340 3341
        int isNull;
        ndb_blob->getDefined(isNull);
3342
        if (isNull == 1)
3343
        {
serg@serg.mylan's avatar
serg@serg.mylan committed
3344
          DBUG_PRINT("info",("[%u] NULL", col_no));
3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359
          field->set_null(row_offset);
        }
        else if (isNull == -1)
        {
          DBUG_PRINT("info",("[%u] UNDEFINED", col_no));
          bitmap_clear_bit(defined, col_no);
        }
        else
        {
#ifndef DBUG_OFF
          // pointer vas set in get_ndb_blobs_value
          Field_blob *field_blob= (Field_blob*)field;
          char* ptr;
          field_blob->get_ptr(&ptr, row_offset);
          uint32 len= field_blob->get_length(row_offset);
3360 3361
          DBUG_PRINT("info",("[%u] SET ptr: 0x%lx  len: %u",
                             col_no, (long) ptr, len));
3362
#endif
3363
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
3364 3365
      }
    }
3366
  }
3367
  dbug_tmp_restore_column_map(table->write_set, old_map);
3368 3369 3370 3371 3372 3373
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::unpack_record(byte *buf)
{
  ndb_unpack_record(table, m_value, 0, buf);
3374 3375
#ifndef DBUG_OFF
  // Read and print all values that was fetched
3376
  if (table_share->primary_key == MAX_KEY)
3377 3378
  {
    // Table with hidden primary key
3379
    int hidden_no= table_share->fields;
3380
    const NDBTAB *tab= m_table;
3381
    char buff[22];
3382
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
3383
    const NdbRecAttr* rec= m_value[hidden_no].rec;
3384
    DBUG_ASSERT(rec);
3385
    DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no,
3386
			  hidden_col->getName(),
3387
                          llstr(rec->u_64_value(), buff)));
serg@serg.mylan's avatar
serg@serg.mylan committed
3388 3389
  }
  //DBUG_EXECUTE("value", print_results(););
3390 3391 3392 3393 3394
#endif
}

/*
  Utility function to print/dump the fetched field
serg@serg.mylan's avatar
serg@serg.mylan committed
3395 3396 3397
  to avoid unnecessary work, wrap in DBUG_EXECUTE as in:

    DBUG_EXECUTE("value", print_results(););
3398 3399 3400 3401 3402 3403 3404
 */

void ha_ndbcluster::print_results()
{
  DBUG_ENTER("print_results");

#ifndef DBUG_OFF
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3405

3406
  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3407
  String type(buf_type, sizeof(buf_type), &my_charset_bin);
3408
  String val(buf_val, sizeof(buf_val), &my_charset_bin);
3409
  for (uint f= 0; f < table_share->fields; f++)
3410
  {
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3411
    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
3412
    char buf[2000];
3413
    Field *field;
3414
    void* ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
3415
    NdbValue value;
3416

3417
    buf[0]= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3418
    field= table->field[f];
pekka@mysql.com's avatar
pekka@mysql.com committed
3419
    if (!(value= m_value[f]).ptr)
3420
    {
3421
      strmov(buf, "not read");
3422
      goto print_value;
3423
    }
3424

3425
    ptr= field->ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
3426 3427

    if (! (field->flags & BLOB_FLAG))
3428
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
3429 3430
      if (value.rec->isNULL())
      {
3431
        strmov(buf, "NULL");
3432
        goto print_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
3433
      }
3434 3435 3436 3437 3438
      type.length(0);
      val.length(0);
      field->sql_type(type);
      field->val_str(&val);
      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
pekka@mysql.com's avatar
pekka@mysql.com committed
3439 3440 3441
    }
    else
    {
3442
      NdbBlob *ndb_blob= value.blob;
3443
      bool isNull= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
3444
      ndb_blob->getNull(isNull);
3445 3446
      if (isNull)
        strmov(buf, "NULL");
3447
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3448

3449
print_value:
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3450
    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
3451 3452 3453 3454 3455 3456
  }
#endif
  DBUG_VOID_RETURN;
}


3457
int ha_ndbcluster::index_init(uint index, bool sorted)
3458
{
3459
  DBUG_ENTER("ha_ndbcluster::index_init");
3460 3461 3462
  DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));
  active_index= index;
  m_sorted= sorted;
mskold@mysql.com's avatar
mskold@mysql.com committed
3463 3464 3465 3466 3467
  /*
    Locks are are explicitly released in scan
    unless m_lock.type == TL_READ_HIGH_PRIORITY
    and no sub-sequent call to unlock_row()
  */
3468
  m_lock_tuple= FALSE;
3469
  DBUG_RETURN(0);
3470 3471 3472 3473 3474
}


int ha_ndbcluster::index_end()
{
3475
  DBUG_ENTER("ha_ndbcluster::index_end");
3476
  DBUG_RETURN(close_scan());
3477 3478
}

3479 3480 3481 3482 3483 3484 3485 3486
/**
 * Check if key contains null
 */
static
int
check_null_in_key(const KEY* key_info, const byte *key, uint key_len)
{
  KEY_PART_INFO *curr_part, *end_part;
3487
  const byte* end_ptr= key + key_len;
3488 3489 3490 3491 3492
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->key_parts;

  for (; curr_part != end_part && key < end_ptr; curr_part++)
  {
3493
    if (curr_part->null_bit && *key)
3494 3495 3496 3497 3498 3499
      return 1;

    key += curr_part->store_length;
  }
  return 0;
}
3500 3501

int ha_ndbcluster::index_read(byte *buf,
3502 3503
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
3504
{
3505 3506
  key_range start_key;
  bool descending= FALSE;
3507
  DBUG_ENTER("ha_ndbcluster::index_read");
3508 3509 3510
  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                       active_index, key_len, find_flag));

3511 3512 3513
  start_key.key= key;
  start_key.length= key_len;
  start_key.flag= find_flag;
3514
  descending= FALSE;
3515 3516 3517 3518 3519 3520 3521 3522 3523 3524
  switch (find_flag) {
  case HA_READ_KEY_OR_PREV:
  case HA_READ_BEFORE_KEY:
  case HA_READ_PREFIX_LAST:
  case HA_READ_PREFIX_LAST_OR_PREV:
    descending= TRUE;
    break;
  default:
    break;
  }
3525 3526
  DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending,
                                      m_sorted, buf));
3527 3528 3529 3530 3531
}


int ha_ndbcluster::index_next(byte *buf)
{
3532
  DBUG_ENTER("ha_ndbcluster::index_next");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3533
  ha_statistic_increment(&SSV::ha_read_next_count);
3534
  DBUG_RETURN(next_result(buf));
3535 3536 3537 3538 3539
}


int ha_ndbcluster::index_prev(byte *buf)
{
3540
  DBUG_ENTER("ha_ndbcluster::index_prev");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3541
  ha_statistic_increment(&SSV::ha_read_prev_count);
3542
  DBUG_RETURN(next_result(buf));
3543 3544 3545 3546 3547
}


int ha_ndbcluster::index_first(byte *buf)
{
3548
  DBUG_ENTER("ha_ndbcluster::index_first");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3549
  ha_statistic_increment(&SSV::ha_read_first_count);
3550 3551 3552
  // Start the ordered index scan and fetch the first row

  // Only HA_READ_ORDER indexes get called by index_first
3553
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL));
3554 3555 3556 3557 3558
}


int ha_ndbcluster::index_last(byte *buf)
{
3559
  DBUG_ENTER("ha_ndbcluster::index_last");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3560
  ha_statistic_increment(&SSV::ha_read_last_count);
3561
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL));
3562 3563
}

3564 3565 3566 3567 3568
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
{
  DBUG_ENTER("ha_ndbcluster::index_read_last");
  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
3569

3570
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
3571
                                           const key_range *end_key,
3572
                                           bool desc, bool sorted,
3573
                                           byte* buf)
3574
{
3575 3576 3577 3578
  part_id_range part_spec;
  ndb_index_type type= get_index_type(active_index);
  const KEY* key_info= table->key_info+active_index;
  int error; 
3579
  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
3580
  DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
3581

3582 3583 3584
  if (m_use_partition_function)
  {
    get_partition_set(table, buf, active_index, start_key, &part_spec);
3585
    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
3586 3587 3588 3589 3590 3591 3592
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      sorted= FALSE;
    }
  }
3607

3608 3609
  m_write_op= FALSE;
  switch (type){
3610
  case PRIMARY_KEY_ORDERED_INDEX:
3611
  case PRIMARY_KEY_INDEX:
3612
    if (start_key && 
3613 3614
        start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT)
3615
    {
3616
      if (m_active_cursor && (error= close_scan()))
3617
        DBUG_RETURN(error);
3618 3619 3620
      error= pk_read(start_key->key, start_key->length, buf,
		     part_spec.start_part);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
3621
    }
3622
    break;
3623
  case UNIQUE_ORDERED_INDEX:
3624
  case UNIQUE_INDEX:
3625
    if (start_key && start_key->length == key_info->key_length &&
3626 3627
        start_key->flag == HA_READ_KEY_EXACT && 
        !check_null_in_key(key_info, start_key->key, start_key->length))
3628
    {
3629
      if (m_active_cursor && (error= close_scan()))
3630
        DBUG_RETURN(error);
3631 3632 3633

      error= unique_index_read(start_key->key, start_key->length, buf);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
3634
    }
3635 3636 3637 3638 3639
    else if (type == UNIQUE_INDEX)
      DBUG_RETURN(unique_index_scan(key_info, 
				    start_key->key, 
				    start_key->length, 
				    buf));
3640 3641 3642 3643
    break;
  default:
    break;
  }
3644
  // Start the ordered index scan and fetch the first row
3645 3646
  DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
                                 &part_spec));
3647 3648
}

joreland@mysql.com's avatar
joreland@mysql.com committed
3649
int ha_ndbcluster::read_range_first(const key_range *start_key,
3650 3651
                                    const key_range *end_key,
                                    bool eq_r, bool sorted)
joreland@mysql.com's avatar
joreland@mysql.com committed
3652 3653 3654
{
  byte* buf= table->record[0];
  DBUG_ENTER("ha_ndbcluster::read_range_first");
3655 3656
  DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
                                      sorted, buf));
joreland@mysql.com's avatar
joreland@mysql.com committed
3657 3658
}

3659
int ha_ndbcluster::read_range_next()
3660 3661 3662 3663 3664 3665
{
  DBUG_ENTER("ha_ndbcluster::read_range_next");
  DBUG_RETURN(next_result(table->record[0]));
}


3666 3667
int ha_ndbcluster::rnd_init(bool scan)
{
3668
  NdbScanOperation *cursor= m_active_cursor;
3669 3670
  DBUG_ENTER("rnd_init");
  DBUG_PRINT("enter", ("scan: %d", scan));
3671
  // Check if scan is to be restarted
mskold@mysql.com's avatar
mskold@mysql.com committed
3672 3673 3674 3675
  if (cursor)
  {
    if (!scan)
      DBUG_RETURN(1);
3676
    if (cursor->restart(m_force_send) != 0)
3677 3678 3679 3680
    {
      DBUG_ASSERT(0);
      DBUG_RETURN(-1);
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
3681
  }
3682
  index_init(table_share->primary_key, 0);
3683 3684 3685
  DBUG_RETURN(0);
}

3686 3687
int ha_ndbcluster::close_scan()
{
3688
  NdbTransaction *trans= m_active_trans;
3689 3690
  DBUG_ENTER("close_scan");

3691 3692
  m_multi_cursor= 0;
  if (!m_active_cursor && !m_multi_cursor)
3693
    DBUG_RETURN(0);
3694

3695
  NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor;
3696

3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
3709
      if (!(op= cursor->lockCurrentTuple()))
3710
      {
3711
	m_lock_tuple= FALSE;
3712 3713 3714 3715
	ERR_RETURN(trans->getNdbError());
      }
      m_ops_pending++;      
  }
3716
  m_lock_tuple= FALSE;
3717
  if (m_ops_pending)
3718 3719 3720 3721 3722
  {
    /*
      Take over any pending transactions to the 
      deleteing/updating transaction before closing the scan    
    */
3723
    DBUG_PRINT("info", ("ops_pending: %ld", (long) m_ops_pending));    
3724
    if (execute_no_commit(this,trans,FALSE) != 0) {
3725
      no_uncommitted_rows_execute_failure();
3726
      DBUG_RETURN(ndb_err(trans));
3727
    }
3728
    m_ops_pending= 0;
3729 3730
  }
  
3731
  cursor->close(m_force_send, TRUE);
3732
  m_active_cursor= m_multi_cursor= NULL;
mskold@mysql.com's avatar
mskold@mysql.com committed
3733
  DBUG_RETURN(0);
3734
}
3735 3736 3737 3738

int ha_ndbcluster::rnd_end()
{
  DBUG_ENTER("rnd_end");
3739
  DBUG_RETURN(close_scan());
3740 3741 3742 3743 3744 3745
}


int ha_ndbcluster::rnd_next(byte *buf)
{
  DBUG_ENTER("rnd_next");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3746
  ha_statistic_increment(&SSV::ha_read_rnd_next_count);
3747

3748
  if (!m_active_cursor)
3749 3750
    DBUG_RETURN(full_table_scan(buf));
  DBUG_RETURN(next_result(buf));
3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763
}


/*
  An "interesting" record has been found and it's pk 
  retrieved by calling position
  Now it's time to read the record from db once 
  again
*/

int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
{
  DBUG_ENTER("rnd_pos");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
3764
  ha_statistic_increment(&SSV::ha_read_rnd_count);
3765 3766
  // The primary key for the record is stored in pos
  // Perform a pk_read using primary key "index"
3767 3768
  {
    part_id_range part_spec;
3769
    uint key_length= ref_length;
3770 3771
    if (m_use_partition_function)
    {
3772 3773 3774 3775 3776 3777
      if (table_share->primary_key == MAX_KEY)
      {
        /*
          The partition id has been fetched from ndb
          and has been stored directly after the hidden key
        */
3778
        DBUG_DUMP("key+part", (char *)pos, key_length);
3779
        key_length= ref_length - sizeof(m_part_id);
3780
        part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
3781 3782 3783 3784
      }
      else
      {
        key_range key_spec;
3785
        KEY *key_info= table->key_info + table_share->primary_key;
3786 3787 3788 3789 3790 3791 3792 3793
        key_spec.key= pos;
        key_spec.length= key_length;
        key_spec.flag= HA_READ_KEY_EXACT;
        get_full_part_id_from_key(table, buf, key_info, 
                                  &key_spec, &part_spec);
        DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
      }
      DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
3794
    }
3795
    DBUG_DUMP("key", (char *)pos, key_length);
3796
    DBUG_RETURN(pk_read(pos, key_length, buf, part_spec.start_part));
3797
  }
3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812
}


/*
  Store the primary key of this record in ref 
  variable, so that the row can be retrieved again later
  using "reference" in rnd_pos
*/

void ha_ndbcluster::position(const byte *record)
{
  KEY *key_info;
  KEY_PART_INFO *key_part;
  KEY_PART_INFO *end;
  byte *buff;
3813 3814
  uint key_length;

3815 3816
  DBUG_ENTER("position");

3817
  if (table_share->primary_key != MAX_KEY) 
3818
  {
3819
    key_length= ref_length;
3820
    key_info= table->key_info + table_share->primary_key;
3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835
    key_part= key_info->key_part;
    end= key_part + key_info->key_parts;
    buff= ref;
    
    for (; key_part != end; key_part++) 
    {
      if (key_part->null_bit) {
        /* Store 0 if the key part is a NULL part */      
        if (record[key_part->null_offset]
            & key_part->null_bit) {
          *buff++= 1;
          continue;
        }      
        *buff++= 0;
      }
3836 3837 3838 3839

      size_t len = key_part->length;
      const byte * ptr = record + key_part->offset;
      Field *field = key_part->field;
3840
      if (field->type() ==  MYSQL_TYPE_VARCHAR)
3841
      {
3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855
        if (((Field_varstring*)field)->length_bytes == 1)
        {
          /**
           * Keys always use 2 bytes length
           */
          buff[0] = ptr[0];
          buff[1] = 0;
          memcpy(buff+2, ptr + 1, len);
        }
        else
        {
          memcpy(buff, ptr, len + 2);
        }
        len += 2;
3856 3857 3858
      }
      else
      {
3859
        memcpy(buff, ptr, len);
3860 3861
      }
      buff += len;
3862 3863 3864 3865 3866 3867
    }
  } 
  else 
  {
    // No primary key, get hidden key
    DBUG_PRINT("info", ("Getting hidden key"));
3868 3869 3870
    // If table has user defined partition save the partition id as well
    if(m_use_partition_function)
    {
3871
      DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
3872 3873 3874
      key_length= ref_length - sizeof(m_part_id);
      memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
    }
3875 3876
    else
      key_length= ref_length;
3877
#ifndef DBUG_OFF
3878
    int hidden_no= table->s->fields;
3879
    const NDBTAB *tab= m_table;  
3880 3881 3882
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
    DBUG_ASSERT(hidden_col->getPrimaryKey() && 
                hidden_col->getAutoIncrement() &&
3883
                key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
3884
#endif
3885
    memcpy(ref, m_ref, key_length);
3886
  }
3887 3888 3889 3890
#ifndef DBUG_OFF
  if (table_share->primary_key == MAX_KEY && m_use_partition_function) 
    DBUG_DUMP("key+part", (char*)ref, key_length+sizeof(m_part_id));
#endif
3891
  DBUG_DUMP("ref", (char*)ref, key_length);
3892 3893 3894 3895
  DBUG_VOID_RETURN;
}


3896
int ha_ndbcluster::info(uint flag)
3897
{
3898
  int result= 0;
3899 3900 3901 3902 3903 3904 3905 3906 3907 3908
  DBUG_ENTER("info");
  DBUG_PRINT("enter", ("flag: %d", flag));
  
  if (flag & HA_STATUS_POS)
    DBUG_PRINT("info", ("HA_STATUS_POS"));
  if (flag & HA_STATUS_NO_LOCK)
    DBUG_PRINT("info", ("HA_STATUS_NO_LOCK"));
  if (flag & HA_STATUS_TIME)
    DBUG_PRINT("info", ("HA_STATUS_TIME"));
  if (flag & HA_STATUS_VARIABLE)
3909
  {
3910
    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
3911 3912
    if (m_table_info)
    {
3913
      if (m_ha_not_exact_count)
3914
        stats.records= 100;
3915
      else
3916
	result= records_update();
3917 3918 3919
    }
    else
    {
3920
      if ((my_errno= check_ndb_connection()))
3921
        DBUG_RETURN(my_errno);
3922
      Ndb *ndb= get_ndb();
3923
      ndb->setDatabaseName(m_dbname);
3924
      struct Ndb_statistics stat;
3925 3926 3927 3928
      if (ndb->setDatabaseName(m_dbname))
      {
        DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
      }
3929
      if (current_thd->variables.ndb_use_exact_count &&
3930
          (result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat))
stewart@willster.(none)'s avatar
stewart@willster.(none) committed
3931
          == 0)
3932
      {
3933 3934 3935
        stats.mean_rec_length= stat.row_size;
        stats.data_file_length= stat.fragment_memory;
        stats.records= stat.row_count;
3936 3937 3938
      }
      else
      {
3939 3940
        stats.mean_rec_length= 0;
        stats.records= 100;
3941
      }
3942
    }
3943
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3944 3945 3946 3947 3948
  if (flag & HA_STATUS_CONST)
  {
    DBUG_PRINT("info", ("HA_STATUS_CONST"));
    set_rec_per_key();
  }
3949
  if (flag & HA_STATUS_ERRKEY)
3950
  {
3951
    DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
3952
    errkey= m_dupkey;
3953
  }
3954
  if (flag & HA_STATUS_AUTO)
3955
  {
3956
    DBUG_PRINT("info", ("HA_STATUS_AUTO"));
3957
    if (m_table && table->found_next_number_field)
3958 3959
    {
      Ndb *ndb= get_ndb();
3960
      Ndb_tuple_id_range_guard g(m_share);
3961
      
3962
      Uint64 auto_increment_value64;
3963
      if (ndb->readAutoIncrementValue(m_table, g.range,
3964
                                      auto_increment_value64) == -1)
3965 3966 3967 3968
      {
        const NdbError err= ndb->getNdbError();
        sql_print_error("Error %lu in readAutoIncrementValue(): %s",
                        (ulong) err.code, err.message);
3969
        stats.auto_increment_value= ~(ulonglong)0;
3970
      }
3971
      else
3972
        stats.auto_increment_value= (ulonglong)auto_increment_value64;
3973 3974
    }
  }
3975 3976 3977 3978 3979

  if(result == -1)
    result= HA_ERR_NO_CONNECTION;

  DBUG_RETURN(result);
3980 3981
}

3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995

void ha_ndbcluster::get_dynamic_partition_info(PARTITION_INFO *stat_info,
                                               uint part_id)
{
  /* 
     This functions should be fixed. Suggested fix: to
     implement ndb function which retrives the statistics
     about ndb partitions.
  */
  bzero((char*) stat_info, sizeof(PARTITION_INFO));
  return;
}


3996 3997 3998 3999 4000 4001
int ha_ndbcluster::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("extra");
  switch (operation) {
  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
4002 4003
    DBUG_PRINT("info", ("Ignoring duplicate key"));
    m_ignore_dup_key= TRUE;
4004 4005 4006
    break;
  case HA_EXTRA_NO_IGNORE_DUP_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
4007
    m_ignore_dup_key= FALSE;
4008
    break;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4009 4010 4011 4012 4013 4014 4015 4016 4017 4018
  case HA_EXTRA_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= TRUE;
    break;
  case HA_EXTRA_NO_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= FALSE;
    break;
4019 4020
  case HA_EXTRA_WRITE_CAN_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
4021 4022
    if (!m_has_unique_index ||
        current_thd->slave_thread) /* always set if slave, quick fix for bug 27378 */
4023 4024 4025 4026 4027 4028 4029 4030 4031 4032
    {
      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
      m_use_write= TRUE;
    }
    break;
  case HA_EXTRA_WRITE_CANNOT_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
    m_use_write= FALSE;
    break;
4033 4034 4035 4036 4037 4038 4039
  case HA_EXTRA_DELETE_CANNOT_BATCH:
    DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
    m_delete_cannot_batch= TRUE;
    break;
  case HA_EXTRA_UPDATE_CANNOT_BATCH:
    DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
    m_update_cannot_batch= TRUE;
4040
    break;
4041 4042
  default:
    break;
4043 4044 4045 4046 4047
  }
  
  DBUG_RETURN(0);
}

4048 4049 4050 4051

int ha_ndbcluster::reset()
{
  DBUG_ENTER("ha_ndbcluster::reset");
4052 4053 4054 4055 4056
  if (m_cond)
  {
    m_cond->cond_clear();
  }

4057 4058 4059 4060 4061 4062 4063
  /*
    Regular partition pruning will set the bitmap appropriately.
    Some queries like ALTER TABLE doesn't use partition pruning and
    thus the 'used_partitions' bitmap needs to be initialized
  */
  if (m_part_info)
    bitmap_set_all(&m_part_info->used_partitions);
4064 4065 4066 4067

  /* reset flags set by extra calls */
  m_ignore_dup_key= FALSE;
  m_use_write= FALSE;
4068
  m_ignore_no_key= FALSE;
4069 4070
  m_delete_cannot_batch= FALSE;
  m_update_cannot_batch= FALSE;
4071

4072 4073 4074 4075
  DBUG_RETURN(0);
}


4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088
/* 
   Start of an insert, remember number of rows to be inserted, it will
   be used in write_row and get_autoincrement to send an optimal number
   of rows in each roundtrip to the server

   SYNOPSIS
   rows     number of rows to insert, 0 if unknown

*/

void ha_ndbcluster::start_bulk_insert(ha_rows rows)
{
  int bytes, batch;
4089
  const NDBTAB *tab= m_table;    
4090 4091

  DBUG_ENTER("start_bulk_insert");
pekka@mysql.com's avatar
pekka@mysql.com committed
4092
  DBUG_PRINT("enter", ("rows: %d", (int)rows));
4093
  
4094
  m_rows_inserted= (ha_rows) 0;
4095
  if (!m_use_write && m_ignore_dup_key)
4096 4097 4098
  {
    /*
      compare if expression with that in write_row
4099
      we have a situation where peek_indexed_rows() will be called
4100 4101 4102 4103 4104 4105 4106 4107
      so we cannot batch
    */
    DBUG_PRINT("info", ("Batching turned off as duplicate key is "
                        "ignored by using peek_row"));
    m_rows_to_insert= 1;
    m_bulk_insert_rows= 1;
    DBUG_VOID_RETURN;
  }
4108
  if (rows == (ha_rows) 0)
4109
  {
4110 4111
    /* We don't know how many will be inserted, guess */
    m_rows_to_insert= m_autoincrement_prefetch;
4112
  }
4113 4114
  else
    m_rows_to_insert= rows; 
4115 4116 4117 4118 4119 4120 4121 4122

  /* 
    Calculate how many rows that should be inserted
    per roundtrip to NDB. This is done in order to minimize the 
    number of roundtrips as much as possible. However performance will 
    degrade if too many bytes are inserted, thus it's limited by this 
    calculation.   
  */
4123
  const int bytesperbatch= 8192;
4124
  bytes= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
4125
  batch= bytesperbatch/bytes;
4126 4127
  batch= batch == 0 ? 1 : batch;
  DBUG_PRINT("info", ("batch: %d, bytes: %d", batch, bytes));
4128
  m_bulk_insert_rows= batch;
4129 4130 4131 4132 4133 4134 4135 4136 4137

  DBUG_VOID_RETURN;
}

/*
  End of an insert
 */
int ha_ndbcluster::end_bulk_insert()
{
4138 4139
  int error= 0;

4140
  DBUG_ENTER("end_bulk_insert");
4141
  // Check if last inserts need to be flushed
4142
  if (m_bulk_insert_not_flushed)
4143
  {
4144
    NdbTransaction *trans= m_active_trans;
4145 4146
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
4147
                        "rows_inserted: %d  bulk_insert_rows: %d", 
4148
                        (int) m_rows_inserted, (int) m_bulk_insert_rows)); 
4149
    m_bulk_insert_not_flushed= FALSE;
4150 4151
    if (m_transaction_on)
    {
4152
      if (execute_no_commit(this, trans,FALSE) != 0)
4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
    }
    else
    {
      if (execute_commit(this, trans) != 0)
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
      else
      {
4167
        IF_DBUG(int res=) trans->restart();
4168 4169
        DBUG_ASSERT(res == 0);
      }
4170
    }
4171 4172
  }

4173 4174
  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
4175
  DBUG_RETURN(error);
4176 4177
}

4178 4179 4180 4181

int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  DBUG_ENTER("extra_opt");
pekka@mysql.com's avatar
pekka@mysql.com committed
4182
  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
4183 4184 4185
  DBUG_RETURN(extra(operation));
}

4186 4187 4188 4189
static const char *ha_ndbcluster_exts[] = {
 ha_ndb_ext,
 NullS
};
4190

4191
const char** ha_ndbcluster::bas_ext() const
4192 4193 4194
{
  return ha_ndbcluster_exts;
}
4195 4196 4197 4198 4199 4200 4201 4202 4203

/*
  How many seeks it will take to read through the table
  This is to be comparable to the number returned by records_in_range so
  that we can decide if we should scan the table or use keys.
*/

double ha_ndbcluster::scan_time()
{
4204
  DBUG_ENTER("ha_ndbcluster::scan_time()");
4205
  double res= rows2double(stats.records*1000);
4206
  DBUG_PRINT("exit", ("table: %s value: %f", 
4207
                      m_tabname, res));
4208
  DBUG_RETURN(res);
4209 4210
}

4211 4212 4213 4214 4215 4216 4217
/*
  Convert MySQL table locks into locks supported by Ndb Cluster.
  Note that MySQL Cluster does currently not support distributed
  table locks, so to be safe one should set cluster in Single
  User Mode, before relying on table locks when updating tables
  from several MySQL servers
*/
4218 4219 4220 4221 4222 4223 4224 4225

THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
                                          THR_LOCK_DATA **to,
                                          enum thr_lock_type lock_type)
{
  DBUG_ENTER("store_lock");
  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) 
  {
4226

4227 4228 4229
    /* If we are not doing a LOCK TABLE, then allow multiple
       writers */
    
4230 4231 4232
    /* Since NDB does not currently have table locks
       this is treated as a ordinary lock */

4233
    if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248
         lock_type <= TL_WRITE) && !thd->in_lock_tables)      
      lock_type= TL_WRITE_ALLOW_WRITE;
    
    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
       MySQL would use the lock TL_READ_NO_INSERT on t2, and that
       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
       to t2. Convert the lock to a normal read lock to allow
       concurrent inserts to t2. */
    
    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
      lock_type= TL_READ;
    
    m_lock.type=lock_type;
  }
  *to++= &m_lock;
4249 4250

  DBUG_PRINT("exit", ("lock_type: %d", lock_type));
4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272
  
  DBUG_RETURN(to);
}

#ifndef DBUG_OFF
#define PRINT_OPTION_FLAGS(t) { \
      if (t->options & OPTION_NOT_AUTOCOMMIT) \
        DBUG_PRINT("thd->options", ("OPTION_NOT_AUTOCOMMIT")); \
      if (t->options & OPTION_BEGIN) \
        DBUG_PRINT("thd->options", ("OPTION_BEGIN")); \
      if (t->options & OPTION_TABLE_LOCK) \
        DBUG_PRINT("thd->options", ("OPTION_TABLE_LOCK")); \
}
#else
#define PRINT_OPTION_FLAGS(t)
#endif


/*
  As MySQL will execute an external lock for every new table it uses
  we can use this to start the transactions.
  If we are in auto_commit mode we just need to start a transaction
4273
  for the statement, this will be stored in thd_ndb.stmt.
4274
  If not, we have to start a master transaction if there doesn't exist
4275
  one from before, this will be stored in thd_ndb.all
4276 4277 4278
 
  When a table lock is held one transaction will be started which holds
  the table lock and for each statement a hupp transaction will be started  
4279
  If we are locking the table then:
4280
  - save the NdbDictionary::Table for easy access
4281 4282
  - save reference to table statistics
  - refresh list of the indexes for the table if needed (if altered)
4283 4284
 */

4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309
#ifdef HAVE_NDB_BINLOG
extern MASTER_INFO *active_mi;
static int ndbcluster_update_apply_status(THD *thd, int do_update)
{
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NDBDICT *dict= ndb->getDictionary();
  const NDBTAB *ndbtab;
  NdbTransaction *trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt;
  ndb->setDatabaseName(NDB_REP_DB);
  Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
  if (!(ndbtab= ndbtab_g.get_table()))
  {
    return -1;
  }
  NdbOperation *op= 0;
  int r= 0;
  r|= (op= trans->getNdbOperation(ndbtab)) == 0;
  DBUG_ASSERT(r == 0);
  if (do_update)
    r|= op->updateTuple();
  else
    r|= op->writeTuple();
  DBUG_ASSERT(r == 0);
  // server_id
4310
  r|= op->equal(0u, (Uint32)thd->server_id);
4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336
  DBUG_ASSERT(r == 0);
  if (!do_update)
  {
    // epoch
    r|= op->setValue(1u, (Uint64)0);
    DBUG_ASSERT(r == 0);
  }
  // log_name
  char tmp_buf[FN_REFLEN];
  ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
                   active_mi->rli.group_master_log_name,
                   strlen(active_mi->rli.group_master_log_name));
  r|= op->setValue(2u, tmp_buf);
  DBUG_ASSERT(r == 0);
  // start_pos
  r|= op->setValue(3u, (Uint64)active_mi->rli.group_master_log_pos);
  DBUG_ASSERT(r == 0);
  // end_pos
  r|= op->setValue(4u, (Uint64)active_mi->rli.group_master_log_pos + 
                   ((Uint64)active_mi->rli.future_event_relay_log_pos -
                    (Uint64)active_mi->rli.group_relay_log_pos));
  DBUG_ASSERT(r == 0);
  return 0;
}
#endif /* HAVE_NDB_BINLOG */

4337 4338 4339
int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
  int error=0;
4340
  NdbTransaction* trans= NULL;
4341
  DBUG_ENTER("external_lock");
4342

4343 4344 4345 4346
  /*
    Check that this handler instance has a connection
    set up to the Ndb object of thd
   */
4347
  if (check_ndb_connection(thd))
4348
    DBUG_RETURN(1);
4349

4350
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4351
  Ndb *ndb= thd_ndb->ndb;
4352

4353
  DBUG_PRINT("enter", ("this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
4354
                       "thd_ndb->lock_count: %d",
4355 4356
                       (long) this, (long) thd, (long) thd_ndb,
                       thd_ndb->lock_count));
4357

4358 4359
  if (lock_type != F_UNLCK)
  {
4360
    DBUG_PRINT("info", ("lock_type != F_UNLCK"));
4361 4362 4363 4364
    if (thd->lex->sql_command == SQLCOM_LOAD)
    {
      m_transaction_on= FALSE;
      /* Would be simpler if has_transactions() didn't always say "yes" */
4365
      thd->no_trans_update.all= thd->no_trans_update.stmt= TRUE;
4366 4367
    }
    else if (!thd->transaction.on)
4368 4369 4370
      m_transaction_on= FALSE;
    else
      m_transaction_on= thd->variables.ndb_use_transactions;
4371
    if (!thd_ndb->lock_count++)
4372 4373
    {
      PRINT_OPTION_FLAGS(thd);
4374
      if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) 
4375 4376
      {
        // Autocommit transaction
4377
        DBUG_ASSERT(!thd_ndb->stmt);
4378 4379
        DBUG_PRINT("trans",("Starting transaction stmt"));      

4380
        trans= ndb->startTransaction();
4381
        if (trans == NULL)
4382
          ERR_RETURN(ndb->getNdbError());
4383
        thd_ndb->init_open_tables();
4384
        thd_ndb->stmt= trans;
4385
	thd_ndb->query_state&= NDB_QUERY_NORMAL;
4386
        thd_ndb->trans_options= 0;
4387 4388 4389 4390
        thd_ndb->m_slow_path= FALSE;
        if (thd->slave_thread ||
            !(thd->options & OPTION_BIN_LOG))
          thd_ndb->m_slow_path= TRUE;
4391
        trans_register_ha(thd, FALSE, ndbcluster_hton);
4392 4393 4394
      } 
      else 
      { 
4395
        if (!thd_ndb->all)
4396
        {
4397 4398 4399 4400
          // Not autocommit transaction
          // A "master" transaction ha not been started yet
          DBUG_PRINT("trans",("starting transaction, all"));
          
4401
          trans= ndb->startTransaction();
4402
          if (trans == NULL)
4403
            ERR_RETURN(ndb->getNdbError());
4404
          thd_ndb->init_open_tables();
4405
          thd_ndb->all= trans; 
4406
	  thd_ndb->query_state&= NDB_QUERY_NORMAL;
4407
          thd_ndb->trans_options= 0;
4408 4409 4410 4411
          thd_ndb->m_slow_path= FALSE;
          if (thd->slave_thread ||
              !(thd->options & OPTION_BIN_LOG))
            thd_ndb->m_slow_path= TRUE;
4412
          trans_register_ha(thd, TRUE, ndbcluster_hton);
4413 4414 4415 4416 4417 4418 4419 4420

          /*
            If this is the start of a LOCK TABLE, a table look 
            should be taken on the table in NDB
           
            Check if it should be read or write lock
           */
          if (thd->options & (OPTION_TABLE_LOCK))
4421
          {
4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440
            //lockThisTable();
            DBUG_PRINT("info", ("Locking the table..." ));
          }

        }
      }
    }
    /*
      This is the place to make sure this handler instance
      has a started transaction.
     
      The transaction is started by the first handler on which 
      MySQL Server calls external lock
     
      Other handlers in the same stmt or transaction should use 
      the same NDB transaction. This is done by setting up the m_active_trans
      pointer to point to the NDB transaction. 
     */

4441 4442 4443
    // store thread specific data first to set the right context
    m_force_send=          thd->variables.ndb_force_send;
    m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
4444 4445
    m_autoincrement_prefetch= 
      (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
4446

4447
    m_active_trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt;
4448
    DBUG_ASSERT(m_active_trans);
4449
    // Start of transaction
4450 4451
    m_rows_changed= 0;
    m_ops_pending= 0;
4452
    m_slow_path= thd_ndb->m_slow_path;
4453
#ifdef HAVE_NDB_BINLOG
4454 4455 4456 4457 4458
    if (unlikely(m_slow_path))
    {
      if (m_share == ndb_apply_status_share && thd->slave_thread)
        thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
    }
4459
#endif
4460
    // TODO remove double pointers...
4461 4462
    m_thd_ndb_share= thd_ndb->get_open_table(thd, m_table);
    m_table_info= &m_thd_ndb_share->stat;
4463 4464
  }
  else
4465
  {
4466
    DBUG_PRINT("info", ("lock_type == F_UNLCK"));
4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484

    if (ndb_cache_check_time && m_rows_changed)
    {
      DBUG_PRINT("info", ("Rows has changed and util thread is running"));
      if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      {
        DBUG_PRINT("info", ("Add share to list of tables to be invalidated"));
        /* NOTE push_back allocates memory using transactions mem_root! */
        thd_ndb->changed_tables.push_back(m_share, &thd->transaction.mem_root);
      }

      pthread_mutex_lock(&m_share->mutex);
      DBUG_PRINT("info", ("Invalidating commit_count"));
      m_share->commit_count= 0;
      m_share->commit_count_lock++;
      pthread_mutex_unlock(&m_share->mutex);
    }

4485
    if (!--thd_ndb->lock_count)
4486 4487 4488 4489
    {
      DBUG_PRINT("trans", ("Last external_lock"));
      PRINT_OPTION_FLAGS(thd);

4490
      if (thd_ndb->stmt)
4491 4492 4493 4494 4495 4496 4497
      {
        /*
          Unlock is done without a transaction commit / rollback.
          This happens if the thread didn't update any rows
          We must in this case close the transaction to release resources
        */
        DBUG_PRINT("trans",("ending non-updating transaction"));
4498
        ndb->closeTransaction(m_active_trans);
4499
        thd_ndb->stmt= NULL;
4500 4501
      }
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4502
    m_table_info= NULL;
4503

4504 4505 4506 4507 4508 4509 4510 4511 4512
    /*
      This is the place to make sure this handler instance
      no longer are connected to the active transaction.

      And since the handler is no longer part of the transaction 
      it can't have open cursors, ops or blobs pending.
    */
    m_active_trans= NULL;    

4513 4514
    if (m_active_cursor)
      DBUG_PRINT("warning", ("m_active_cursor != NULL"));
4515 4516
    m_active_cursor= NULL;

4517 4518 4519 4520
    if (m_multi_cursor)
      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
    m_multi_cursor= NULL;
    
4521
    if (m_blobs_pending)
4522
      DBUG_PRINT("warning", ("blobs_pending != 0"));
4523
    m_blobs_pending= 0;
4524
    
4525
    if (m_ops_pending)
4526
      DBUG_PRINT("warning", ("ops_pending != 0L"));
4527
    m_ops_pending= 0;
4528
  }
4529
  thd->set_current_stmt_binlog_row_based_if_mixed();
4530 4531 4532
  DBUG_RETURN(error);
}

mskold@mysql.com's avatar
mskold@mysql.com committed
4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544
/*
  Unlock the last row read in an open scan.
  Rows are unlocked by default in ndb, but
  for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
  locks are kept if unlock_row() is not called.
*/

void ha_ndbcluster::unlock_row() 
{
  DBUG_ENTER("unlock_row");

  DBUG_PRINT("info", ("Unlocking row"));
4545
  m_lock_tuple= FALSE;
mskold@mysql.com's avatar
mskold@mysql.com committed
4546 4547 4548
  DBUG_VOID_RETURN;
}

4549
/*
4550 4551 4552 4553 4554
  Start a transaction for running a statement if one is not
  already running in a transaction. This will be the case in
  a BEGIN; COMMIT; block
  When using LOCK TABLE's external_lock will start a transaction
  since ndb does not currently does not support table locking
4555 4556
*/

serg@serg.mylan's avatar
serg@serg.mylan committed
4557
int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
4558 4559 4560 4561 4562
{
  int error=0;
  DBUG_ENTER("start_stmt");
  PRINT_OPTION_FLAGS(thd);

4563
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4564
  NdbTransaction *trans= (thd_ndb->stmt)?thd_ndb->stmt:thd_ndb->all;
4565
  if (!trans){
4566
    Ndb *ndb= thd_ndb->ndb;
4567
    DBUG_PRINT("trans",("Starting transaction stmt"));  
4568
    trans= ndb->startTransaction();
4569
    if (trans == NULL)
4570
      ERR_RETURN(ndb->getNdbError());
4571
    no_uncommitted_rows_reset(thd);
4572
    thd_ndb->stmt= trans;
4573
    thd_ndb->query_state&= NDB_QUERY_NORMAL;
4574
    trans_register_ha(thd, FALSE, ndbcluster_hton);
4575 4576
  }
  m_active_trans= trans;
4577
  // Start of statement
4578
  m_ops_pending= 0;    
4579 4580
  thd->set_current_stmt_binlog_row_based_if_mixed();

4581 4582 4583 4584 4585
  DBUG_RETURN(error);
}


/*
4586
  Commit a transaction started in NDB
4587 4588
 */

4589
static int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
4590 4591
{
  int res= 0;
4592 4593 4594
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
4595 4596 4597

  DBUG_ENTER("ndbcluster_commit");
  DBUG_PRINT("transaction",("%s",
4598
                            trans == thd_ndb->stmt ?
4599
                            "stmt" : "all"));
4600 4601 4602
  DBUG_ASSERT(ndb);
  if (trans == NULL)
    DBUG_RETURN(0);
4603

4604
#ifdef HAVE_NDB_BINLOG
4605 4606 4607 4608 4609 4610
  if (unlikely(thd_ndb->m_slow_path))
  {
    if (thd->slave_thread)
      ndbcluster_update_apply_status
        (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
  }
4611 4612
#endif /* HAVE_NDB_BINLOG */

4613
  if (execute_commit(thd,trans) != 0)
4614 4615
  {
    const NdbError err= trans->getNdbError();
4616
    const NdbOperation *error_op= trans->getNdbErrorOperation();
4617
    set_ndb_err(thd, err);
4618
    res= ndb_to_mysql_error(&err);
4619
    if (res != -1)
4620
      ndbcluster_print_error(res, error_op);
4621
  }
4622
  ndb->closeTransaction(trans);
4623

4624
  if (all)
4625 4626 4627
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;
4628 4629 4630 4631 4632 4633 4634

  /* Clear commit_count for tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    pthread_mutex_lock(&share->mutex);
4635 4636
    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
                        share->table_name, (ulong) share->commit_count));
4637 4638 4639 4640 4641 4642
    share->commit_count= 0;
    share->commit_count_lock++;
    pthread_mutex_unlock(&share->mutex);
  }
  thd_ndb->changed_tables.empty();

4643 4644 4645 4646 4647 4648 4649 4650
  DBUG_RETURN(res);
}


/*
  Rollback a transaction started in NDB
 */

4651
static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
4652 4653
{
  int res= 0;
4654 4655 4656
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
4657 4658 4659

  DBUG_ENTER("ndbcluster_rollback");
  DBUG_PRINT("transaction",("%s",
4660
                            trans == thd_ndb->stmt ? 
4661 4662 4663
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

4664
  if (trans->execute(NdbTransaction::Rollback) != 0)
4665 4666
  {
    const NdbError err= trans->getNdbError();
4667
    const NdbOperation *error_op= trans->getNdbErrorOperation();
4668
    set_ndb_err(thd, err);
4669
    res= ndb_to_mysql_error(&err);
4670 4671
    if (res != -1) 
      ndbcluster_print_error(res, error_op);
4672 4673
  }
  ndb->closeTransaction(trans);
4674

4675
  if (all)
4676 4677 4678 4679
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;

4680 4681 4682
  /* Clear list of tables changed by transaction */
  thd_ndb->changed_tables.empty();

4683
  DBUG_RETURN(res);
4684 4685 4686 4687
}


/*
pekka@mysql.com's avatar
pekka@mysql.com committed
4688 4689 4690
  Define NDB column based on Field.
  Returns 0 or mysql error code.
  Not member of ha_ndbcluster because NDBCOL cannot be declared.
pekka@mysql.com's avatar
pekka@mysql.com committed
4691 4692 4693

  MySQL text types with character set "binary" are mapped to true
  NDB binary types without a character set.  This may change.
4694 4695
 */

pekka@mysql.com's avatar
pekka@mysql.com committed
4696 4697 4698
static int create_ndb_column(NDBCOL &col,
                             Field *field,
                             HA_CREATE_INFO *info)
4699
{
pekka@mysql.com's avatar
pekka@mysql.com committed
4700
  // Set name
4701 4702 4703 4704
  if (col.setName(field->field_name))
  {
    return (my_errno= errno);
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4705 4706
  // Get char set
  CHARSET_INFO *cs= field->charset();
pekka@mysql.com's avatar
pekka@mysql.com committed
4707 4708 4709 4710
  // Set type and sizes
  const enum enum_field_types mysql_type= field->real_type();
  switch (mysql_type) {
  // Numeric types
4711
  case MYSQL_TYPE_TINY:        
pekka@mysql.com's avatar
pekka@mysql.com committed
4712 4713 4714 4715 4716 4717
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Tinyunsigned);
    else
      col.setType(NDBCOL::Tinyint);
    col.setLength(1);
    break;
4718
  case MYSQL_TYPE_SHORT:
pekka@mysql.com's avatar
pekka@mysql.com committed
4719 4720 4721 4722 4723 4724
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Smallunsigned);
    else
      col.setType(NDBCOL::Smallint);
    col.setLength(1);
    break;
4725
  case MYSQL_TYPE_LONG:
pekka@mysql.com's avatar
pekka@mysql.com committed
4726 4727 4728 4729 4730 4731
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Unsigned);
    else
      col.setType(NDBCOL::Int);
    col.setLength(1);
    break;
4732
  case MYSQL_TYPE_INT24:       
pekka@mysql.com's avatar
pekka@mysql.com committed
4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Mediumunsigned);
    else
      col.setType(NDBCOL::Mediumint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONGLONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Bigunsigned);
    else
      col.setType(NDBCOL::Bigint);
    col.setLength(1);
4745 4746
    break;
  case MYSQL_TYPE_FLOAT:
pekka@mysql.com's avatar
pekka@mysql.com committed
4747 4748 4749
    col.setType(NDBCOL::Float);
    col.setLength(1);
    break;
4750
  case MYSQL_TYPE_DOUBLE:
pekka@mysql.com's avatar
pekka@mysql.com committed
4751 4752 4753
    col.setType(NDBCOL::Double);
    col.setLength(1);
    break;
4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773
  case MYSQL_TYPE_DECIMAL:    
    {
      Field_decimal *f= (Field_decimal*)field;
      uint precision= f->pack_length();
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Olddecimalunsigned);
        precision-= (scale > 0);
      }
      else
      {
        col.setType(NDBCOL::Olddecimal);
        precision-= 1 + (scale > 0);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
4774 4775 4776
  case MYSQL_TYPE_NEWDECIMAL:    
    {
      Field_new_decimal *f= (Field_new_decimal*)field;
4777
      uint precision= f->precision;
4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Decimalunsigned);
      }
      else
      {
        col.setType(NDBCOL::Decimal);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4792 4793 4794 4795 4796
  // Date types
  case MYSQL_TYPE_DATETIME:    
    col.setType(NDBCOL::Datetime);
    col.setLength(1);
    break;
4797 4798 4799 4800
  case MYSQL_TYPE_DATE: // ?
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4801
  case MYSQL_TYPE_NEWDATE:
4802 4803 4804
    col.setType(NDBCOL::Date);
    col.setLength(1);
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4805
  case MYSQL_TYPE_TIME:        
4806 4807 4808
    col.setType(NDBCOL::Time);
    col.setLength(1);
    break;
4809 4810 4811 4812 4813 4814 4815
  case MYSQL_TYPE_YEAR:
    col.setType(NDBCOL::Year);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP:
    col.setType(NDBCOL::Timestamp);
    col.setLength(1);
pekka@mysql.com's avatar
pekka@mysql.com committed
4816 4817 4818
    break;
  // Char types
  case MYSQL_TYPE_STRING:      
4819
    if (field->pack_length() == 0)
4820 4821 4822 4823
    {
      col.setType(NDBCOL::Bit);
      col.setLength(1);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4824
    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
4825
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
4826
      col.setType(NDBCOL::Binary);
4827
      col.setLength(field->pack_length());
pekka@mysql.com's avatar
pekka@mysql.com committed
4828
    }
4829
    else
4830 4831 4832
    {
      col.setType(NDBCOL::Char);
      col.setCharset(cs);
4833
      col.setLength(field->pack_length());
4834
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4835
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4836 4837 4838 4839 4840 4841
  case MYSQL_TYPE_VAR_STRING: // ?
  case MYSQL_TYPE_VARCHAR:
    {
      Field_varstring* f= (Field_varstring*)field;
      if (f->length_bytes == 1)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4842
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4843 4844 4845 4846 4847 4848 4849 4850
          col.setType(NDBCOL::Varbinary);
        else {
          col.setType(NDBCOL::Varchar);
          col.setCharset(cs);
        }
      }
      else if (f->length_bytes == 2)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4851
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862
          col.setType(NDBCOL::Longvarbinary);
        else {
          col.setType(NDBCOL::Longvarchar);
          col.setCharset(cs);
        }
      }
      else
      {
        return HA_ERR_UNSUPPORTED;
      }
      col.setLength(field->field_length);
pekka@mysql.com's avatar
pekka@mysql.com committed
4863
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4864 4865 4866 4867
    break;
  // Blob types (all come in as MYSQL_TYPE_BLOB)
  mysql_type_tiny_blob:
  case MYSQL_TYPE_TINY_BLOB:
pekka@mysql.com's avatar
pekka@mysql.com committed
4868
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4869
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4870
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4871
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4872 4873
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4874 4875 4876 4877 4878
    col.setInlineSize(256);
    // No parts
    col.setPartSize(0);
    col.setStripeSize(0);
    break;
4879
  //mysql_type_blob:
4880
  case MYSQL_TYPE_GEOMETRY:
pekka@mysql.com's avatar
pekka@mysql.com committed
4881
  case MYSQL_TYPE_BLOB:    
pekka@mysql.com's avatar
pekka@mysql.com committed
4882
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4883
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4884
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4885
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4886 4887
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4888
    {
4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909
      Field_blob *field_blob= (Field_blob *)field;
      /*
       * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
       * Tinyblob gets no blob parts.  The other cases are just a crude
       * way to control part size and striping.
       *
       * In mysql blob(256) is promoted to blob(65535) so it does not
       * in fact fit "inline" in NDB.
       */
      if (field_blob->max_data_length() < (1 << 8))
        goto mysql_type_tiny_blob;
      else if (field_blob->max_data_length() < (1 << 16))
      {
        col.setInlineSize(256);
        col.setPartSize(2000);
        col.setStripeSize(16);
      }
      else if (field_blob->max_data_length() < (1 << 24))
        goto mysql_type_medium_blob;
      else
        goto mysql_type_long_blob;
pekka@mysql.com's avatar
pekka@mysql.com committed
4910 4911 4912 4913
    }
    break;
  mysql_type_medium_blob:
  case MYSQL_TYPE_MEDIUM_BLOB:   
pekka@mysql.com's avatar
pekka@mysql.com committed
4914
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4915
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4916
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4917
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4918 4919
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4920 4921 4922 4923 4924 4925
    col.setInlineSize(256);
    col.setPartSize(4000);
    col.setStripeSize(8);
    break;
  mysql_type_long_blob:
  case MYSQL_TYPE_LONG_BLOB:  
pekka@mysql.com's avatar
pekka@mysql.com committed
4926
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4927
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4928
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4929
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4930 4931
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944
    col.setInlineSize(256);
    col.setPartSize(8000);
    col.setStripeSize(4);
    break;
  // Other types
  case MYSQL_TYPE_ENUM:
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_SET:         
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
4945 4946
  case MYSQL_TYPE_BIT:
  {
4947
    int no_of_bits= field->field_length;
4948 4949 4950 4951 4952 4953 4954
    col.setType(NDBCOL::Bit);
    if (!no_of_bits)
      col.setLength(1);
      else
        col.setLength(no_of_bits);
    break;
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4955 4956 4957 4958 4959
  case MYSQL_TYPE_NULL:        
    goto mysql_type_unsupported;
  mysql_type_unsupported:
  default:
    return HA_ERR_UNSUPPORTED;
4960
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4961 4962 4963 4964 4965 4966
  // Set nullable and pk
  col.setNullable(field->maybe_null());
  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
  // Set autoincrement
  if (field->flags & AUTO_INCREMENT_FLAG) 
  {
4967
#ifndef DBUG_OFF
4968
    char buff[22];
4969
#endif
pekka@mysql.com's avatar
pekka@mysql.com committed
4970 4971
    col.setAutoIncrement(TRUE);
    ulonglong value= info->auto_increment_value ?
4972
      info->auto_increment_value : (ulonglong) 1;
4973
    DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));
pekka@mysql.com's avatar
pekka@mysql.com committed
4974
    col.setAutoIncrementInitialValue(value);
4975
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4976
  else
4977
    col.setAutoIncrement(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
4978
  return 0;
4979 4980
}

4981 4982 4983 4984
/*
  Create a table in NDB Cluster
*/

4985
int ha_ndbcluster::create(const char *name, 
4986
                          TABLE *form, 
4987
                          HA_CREATE_INFO *create_info)
4988
{
4989
  THD *thd= current_thd;
4990 4991
  NDBTAB tab;
  NDBCOL col;
joreland@mysql.com's avatar
joreland@mysql.com committed
4992
  uint pack_length, length, i, pk_length= 0;
4993
  const void *data= NULL, *pack_data= NULL;
4994
  bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
4995
  bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
4996
  char tablespace[FN_LEN];
4997
  NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
4998

pekka@mysql.com's avatar
pekka@mysql.com committed
4999
  DBUG_ENTER("ha_ndbcluster::create");
5000
  DBUG_PRINT("enter", ("name: %s", name));
5001

5002 5003 5004
  DBUG_ASSERT(*fn_rext((char*)name) == 0);
  set_dbname(name);
  set_tabname(name);
5005

5006 5007 5008 5009 5010 5011
  if ((my_errno= check_ndb_connection()))
    DBUG_RETURN(my_errno);
  
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();

mskold@mysql.com's avatar
mskold@mysql.com committed
5012 5013
  if (is_truncate)
  {
5014 5015 5016 5017 5018
    {
      Ndb_table_guard ndbtab_g(dict, m_tabname);
      if (!(m_table= ndbtab_g.get_table()))
	ERR_RETURN(dict->getNdbError());
      if ((get_tablespace_name(thd, tablespace, FN_LEN)))
5019
	create_info->tablespace= tablespace;    
5020 5021
      m_table= NULL;
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
5022 5023 5024 5025
    DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
    if ((my_errno= delete_table(name)))
      DBUG_RETURN(my_errno);
  }
5026
  table= form;
5027 5028 5029
  if (create_from_engine)
  {
    /*
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5030
      Table already exists in NDB and frm file has been created by 
5031 5032 5033
      caller.
      Do Ndb specific stuff, such as create a .ndb file
    */
5034
    if ((my_errno= write_ndb_file(name)))
5035
      DBUG_RETURN(my_errno);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5036
#ifdef HAVE_NDB_BINLOG
5037
    ndbcluster_create_binlog_setup(get_ndb(), name, strlen(name),
5038
                                   m_dbname, m_tabname, FALSE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5039
#endif /* HAVE_NDB_BINLOG */
5040 5041
    DBUG_RETURN(my_errno);
  }
5042

5043 5044 5045 5046 5047 5048
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow table creation unless
    schema distribution table is setup
    ( unless it is a creation of the schema dist table itself )
  */
5049
  if (!ndb_schema_share)
5050
  {
5051 5052 5053 5054 5055 5056 5057
    if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
          strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
    {
      DBUG_PRINT("info", ("Schema distribution table not setup"));
      DBUG_RETURN(HA_ERR_NO_CONNECTION);
    }
    single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
5058 5059 5060
  }
#endif /* HAVE_NDB_BINLOG */

5061
  DBUG_PRINT("table", ("name: %s", m_tabname));  
5062
  if (tab.setName(m_tabname))
5063 5064 5065
  {
    DBUG_RETURN(my_errno= errno);
  }
5066
  tab.setLogging(!(create_info->options & HA_LEX_CREATE_TMP_TABLE));    
5067 5068
  tab.setSingleUserMode(single_user_mode);

5069 5070 5071 5072
  // Save frm data for this table
  if (readfrm(name, &data, &length))
    DBUG_RETURN(1);
  if (packfrm(data, length, &pack_data, &pack_length))
5073 5074
  {
    my_free((char*)data, MYF(0));
5075
    DBUG_RETURN(2);
5076
  }
5077
  DBUG_PRINT("info", ("setFrm data: 0x%lx  len: %d", (long) pack_data, pack_length));
5078 5079 5080 5081
  tab.setFrm(pack_data, pack_length);      
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
  
5082 5083 5084
  /*
    Check for disk options
  */
5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100
  if (create_info->storage_media == HA_SM_DISK)
  { 
    if (create_info->tablespace)
      tab.setTablespaceName(create_info->tablespace);
    else
      tab.setTablespaceName("DEFAULT-TS");
  }
  else if (create_info->tablespace)
  {
    if (create_info->storage_media == HA_SM_MEMORY)
    {
      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
			  ER_ILLEGAL_HA_CREATE_OPTION,
			  ER(ER_ILLEGAL_HA_CREATE_OPTION),
			  ndbcluster_hton_name,
			  "TABLESPACE currently only supported for "
5101
			  "STORAGE DISK");
5102 5103 5104 5105 5106 5107
      DBUG_RETURN(HA_ERR_UNSUPPORTED);
    }
    tab.setTablespaceName(create_info->tablespace);
    create_info->storage_media = HA_SM_DISK;  //if use tablespace, that also means store on disk
  }

5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129
  /*
    Handle table row type

    Default is to let table rows have var part reference so that online 
    add column can be performed in the future.  Explicitly setting row 
    type to fixed will omit var part reference, which will save data 
    memory in ndb, but at the cost of not being able to online add 
    column to this table
  */
  switch (create_info->row_type) {
  case ROW_TYPE_FIXED:
    tab.setForceVarPart(FALSE);
    break;
  case ROW_TYPE_DYNAMIC:
    /* fall through, treat as default */
  default:
    /* fall through, treat as default */
  case ROW_TYPE_DEFAULT:
    tab.setForceVarPart(TRUE);
    break;
  }

5130 5131 5132
  /*
    Setup columns
  */
5133
  for (i= 0; i < form->s->fields; i++) 
5134 5135
  {
    Field *field= form->field[i];
5136
    DBUG_PRINT("info", ("name: %s  type: %u  pack_length: %d", 
5137
                        field->field_name, field->real_type(),
5138
                        field->pack_length()));
5139
    if ((my_errno= create_ndb_column(col, field, create_info)))
pekka@mysql.com's avatar
pekka@mysql.com committed
5140
      DBUG_RETURN(my_errno);
5141
 
5142
    if (create_info->storage_media == HA_SM_DISK)
5143 5144 5145 5146
      col.setStorageType(NdbDictionary::Column::StorageTypeDisk);
    else
      col.setStorageType(NdbDictionary::Column::StorageTypeMemory);

5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167
    switch (create_info->row_type) {
    case ROW_TYPE_FIXED:
      if (field_type_forces_var_part(field->type()))
      {
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
                            ER_ILLEGAL_HA_CREATE_OPTION,
                            ER(ER_ILLEGAL_HA_CREATE_OPTION),
                            ndbcluster_hton_name,
                            "Row format FIXED incompatible with "
                            "variable sized attribute");
        DBUG_RETURN(HA_ERR_UNSUPPORTED);
      }
      break;
    case ROW_TYPE_DYNAMIC:
      /*
        Future: make columns dynamic in this case
      */
      break;
    default:
      break;
    }
5168 5169 5170 5171
    if (tab.addColumn(col))
    {
      DBUG_RETURN(my_errno= errno);
    }
5172
    if (col.getPrimaryKey())
joreland@mysql.com's avatar
joreland@mysql.com committed
5173
      pk_length += (field->pack_length() + 3) / 4;
5174
  }
5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185

  KEY* key_info;
  for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
  {
    KEY_PART_INFO *key_part= key_info->key_part;
    KEY_PART_INFO *end= key_part + key_info->key_parts;
    for (; key_part != end; key_part++)
      tab.getColumn(key_part->fieldnr-1)->setStorageType(
                             NdbDictionary::Column::StorageTypeMemory);
  }

5186
  // No primary key, create shadow key as 64 bit, auto increment  
5187
  if (form->s->primary_key == MAX_KEY) 
5188 5189
  {
    DBUG_PRINT("info", ("Generating shadow key"));
5190 5191 5192 5193
    if (col.setName("$PK"))
    {
      DBUG_RETURN(my_errno= errno);
    }
5194 5195
    col.setType(NdbDictionary::Column::Bigunsigned);
    col.setLength(1);
5196
    col.setNullable(FALSE);
5197 5198
    col.setPrimaryKey(TRUE);
    col.setAutoIncrement(TRUE);
5199 5200 5201 5202
    if (tab.addColumn(col))
    {
      DBUG_RETURN(my_errno= errno);
    }
joreland@mysql.com's avatar
joreland@mysql.com committed
5203 5204
    pk_length += 2;
  }
5205
 
joreland@mysql.com's avatar
joreland@mysql.com committed
5206
  // Make sure that blob tables don't have to big part size
5207
  for (i= 0; i < form->s->fields; i++) 
joreland@mysql.com's avatar
joreland@mysql.com committed
5208 5209 5210 5211 5212 5213 5214
  {
    /**
     * The extra +7 concists
     * 2 - words from pk in blob table
     * 5 - from extra words added by tup/dict??
     */
    switch (form->field[i]->real_type()) {
5215
    case MYSQL_TYPE_GEOMETRY:
joreland@mysql.com's avatar
joreland@mysql.com committed
5216 5217 5218 5219
    case MYSQL_TYPE_BLOB:    
    case MYSQL_TYPE_MEDIUM_BLOB:   
    case MYSQL_TYPE_LONG_BLOB: 
    {
5220 5221
      NdbDictionary::Column * column= tab.getColumn(i);
      int size= pk_length + (column->getPartSize()+3)/4 + 7;
5222
      if (size > NDB_MAX_TUPLE_SIZE_IN_WORDS && 
5223
         (pk_length+7) < NDB_MAX_TUPLE_SIZE_IN_WORDS)
joreland@mysql.com's avatar
joreland@mysql.com committed
5224
      {
5225
        size= NDB_MAX_TUPLE_SIZE_IN_WORDS - pk_length - 7;
5226
        column->setPartSize(4*size);
joreland@mysql.com's avatar
joreland@mysql.com committed
5227 5228 5229 5230 5231 5232 5233 5234 5235 5236
      }
      /**
       * If size > NDB_MAX and pk_length+7 >= NDB_MAX
       *   then the table can't be created anyway, so skip
       *   changing part size, and have error later
       */ 
    }
    default:
      break;
    }
5237
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5238

5239
  // Check partition info
5240
  partition_info *part_info= form->part_info;
5241
  if ((my_errno= set_up_partition_info(part_info, form, (void*)&tab)))
5242
  {
5243
    DBUG_RETURN(my_errno);
5244 5245
  }

5246
  // Create the table in NDB     
5247
  if (dict->createTable(tab) != 0) 
5248 5249
  {
    const NdbError err= dict->getNdbError();
5250
    set_ndb_err(thd, err);
5251 5252 5253
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
  }
5254 5255 5256 5257 5258 5259

  Ndb_table_guard ndbtab_g(dict, m_tabname);
  // temporary set m_table during create
  // reset at return
  m_table= ndbtab_g.get_table();
  // TODO check also that we have the same frm...
5260 5261 5262 5263
  if (!m_table)
  {
    /* purecov: begin deadcode */
    const NdbError err= dict->getNdbError();
5264
    set_ndb_err(thd, err);
5265 5266 5267 5268
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
    /* purecov: end */
  }
5269

5270 5271
  DBUG_PRINT("info", ("Table %s/%s created successfully", 
                      m_dbname, m_tabname));
5272

5273
  // Create secondary indexes
5274
  my_errno= create_indexes(ndb, form);
5275

5276
  if (!my_errno)
5277
    my_errno= write_ndb_file(name);
5278 5279 5280 5281 5282 5283
  else
  {
    /*
      Failed to create an index,
      drop the table (and all it's indexes)
    */
5284
    while (dict->dropTableGlobal(*m_table))
5285
    {
5286 5287 5288 5289 5290 5291 5292 5293 5294
      switch (dict->getNdbError().status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed) 
            continue; // retry indefinitly
          break;
        default:
          break;
      }
5295
      break;
5296
    }
5297 5298
    m_table = 0;
    DBUG_RETURN(my_errno);
5299
  }
5300

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5301 5302 5303 5304 5305 5306 5307 5308 5309
#ifdef HAVE_NDB_BINLOG
  if (!my_errno)
  {
    NDB_SHARE *share= 0;
    pthread_mutex_lock(&ndbcluster_mutex);
    /*
      First make sure we get a "fresh" share here, not an old trailing one...
    */
    {
5310
      uint length= (uint) strlen(name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5311
      if ((share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
5312
                                           (byte*) name, length)))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5313 5314 5315 5316 5317
        handle_trailing_share(share);
    }
    /*
      get a new share
    */
5318

5319
    /* ndb_share reference create */
5320
    if (!(share= get_share(name, form, TRUE, TRUE)))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5321
    {
5322
      sql_print_error("NDB: allocating table share for %s failed", name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5323 5324
      /* my_errno is set */
    }
5325 5326 5327 5328 5329
    else
    {
      DBUG_PRINT("NDB_SHARE", ("%s binlog create  use_count: %u",
                               share->key, share->use_count));
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5330 5331 5332 5333 5334 5335
    pthread_mutex_unlock(&ndbcluster_mutex);

    while (!IS_TMP_PREFIX(m_tabname))
    {
      String event_name(INJECTOR_EVENT_LEN);
      ndb_rep_event_name(&event_name,m_dbname,m_tabname);
5336 5337
      int do_event_op= ndb_binlog_running;

5338
      if (!ndb_schema_share &&
5339 5340 5341
          strcmp(share->db, NDB_REP_DB) == 0 &&
          strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
        do_event_op= 1;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5342 5343 5344 5345 5346

      /*
        Always create an event for the table, as other mysql servers
        expect it to be there.
      */
5347
      if (!ndbcluster_create_event(ndb, m_table, event_name.c_ptr(), share,
5348
                                   share && do_event_op ? 2 : 1/* push warning */))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5349
      {
5350 5351 5352
        if (ndb_extra_logging)
          sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
                                event_name.c_ptr());
5353
        if (share && 
5354
            ndbcluster_create_event_ops(share, m_table, event_name.c_ptr()))
5355 5356 5357 5358 5359
        {
          sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
                          " Event: %s", name);
          /* a warning has been issued to the client */
        }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5360
      }
5361 5362 5363 5364
      /*
        warning has been issued if ndbcluster_create_event failed
        and (share && do_event_op)
      */
5365
      if (share && !do_event_op)
5366
        share->flags|= NSF_NO_BINLOG;
5367 5368
      ndbcluster_log_schema_op(thd, share,
                               thd->query, thd->query_length,
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5369
                               share->db, share->table_name,
5370 5371
                               m_table->getObjectId(),
                               m_table->getObjectVersion(),
mskold@mysql.com's avatar
mskold@mysql.com committed
5372 5373 5374
                               (is_truncate) ?
			       SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE, 
			       0, 0, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5375 5376 5377 5378 5379
      break;
    }
  }
#endif /* HAVE_NDB_BINLOG */

5380
  m_table= 0;
5381 5382 5383
  DBUG_RETURN(my_errno);
}

5384 5385
int ha_ndbcluster::create_handler_files(const char *file,
                                        const char *old_name,
5386
                                        int action_flag,
5387
                                        HA_CREATE_INFO *create_info)
5388 5389 5390
{ 
  Ndb* ndb;
  const NDBTAB *tab;
5391
  const void *data= NULL, *pack_data= NULL;
5392 5393 5394 5395 5396
  uint length, pack_length;
  int error= 0;

  DBUG_ENTER("create_handler_files");

5397
  if (action_flag != CHF_INDEX_FLAG)
5398 5399 5400
  {
    DBUG_RETURN(FALSE);
  }
5401
  DBUG_PRINT("enter", ("file: %s", file));
5402 5403 5404 5405
  if (!(ndb= get_ndb()))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  NDBDICT *dict= ndb->getDictionary();
5406
  if (!create_info->frm_only)
5407
    DBUG_RETURN(0); // Must be a create, ignore since frm is saved in create
5408 5409 5410 5411

  // TODO handle this
  DBUG_ASSERT(m_table != 0);

5412 5413
  set_dbname(file);
  set_tabname(file);
5414
  Ndb_table_guard ndbtab_g(dict, m_tabname);
5415
  DBUG_PRINT("info", ("m_dbname: %s, m_tabname: %s", m_dbname, m_tabname));
5416
  if (!(tab= ndbtab_g.get_table()))
5417 5418
    DBUG_RETURN(0); // Unkown table, must be temporary table

5419
  DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED);
5420
  if (readfrm(file, &data, &length) ||
5421 5422 5423 5424 5425
      packfrm(data, length, &pack_data, &pack_length))
  {
    DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
5426
    error= 1;
5427
  }
5428 5429
  else
  {
5430 5431
    DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb", 
                        m_tabname));
5432 5433 5434 5435
    NdbDictionary::Table new_tab= *tab;
    new_tab.setFrm(pack_data, pack_length);
    if (dict->alterTableGlobal(*tab, new_tab))
    {
5436
      set_ndb_err(current_thd, dict->getNdbError());
5437 5438
      error= ndb_to_mysql_error(&dict->getNdbError());
    }
5439 5440
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
5441
  }
5442
  
5443
  set_ndb_share_state(m_share, NSS_INITIAL);
5444 5445 5446
  /* ndb_share reference schema(?) free */
  DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free  use_count: %u",
                           m_share->key, m_share->use_count));
5447
  free_share(&m_share); // Decrease ref_count
5448 5449 5450 5451

  DBUG_RETURN(error);
}

5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479
int ha_ndbcluster::create_index(const char *name, KEY *key_info, 
                                NDB_INDEX_TYPE idx_type, uint idx_no)
{
  int error= 0;
  char unique_name[FN_LEN];
  static const char* unique_suffix= "$unique";
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
  DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));  

  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
                        unique_name, idx_no));
  }
    
  switch (idx_type){
  case PRIMARY_KEY_INDEX:
    // Do nothing, already created
    break;
  case PRIMARY_KEY_ORDERED_INDEX:
    error= create_ordered_index(name, key_info);
    break;
  case UNIQUE_ORDERED_INDEX:
    if (!(error= create_ordered_index(name, key_info)))
      error= create_unique_index(unique_name, key_info);
    break;
  case UNIQUE_INDEX:
5480 5481 5482 5483 5484 5485 5486
    if (check_index_fields_not_null(key_info))
    {
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
			  ER_NULL_COLUMN_IN_INDEX,
			  "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
    }
    error= create_unique_index(unique_name, key_info);
5487 5488
    break;
  case ORDERED_INDEX:
5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499
    if (key_info->algorithm == HA_KEY_ALG_HASH)
    {
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
			  ER_ILLEGAL_HA_CREATE_OPTION,
			  ER(ER_ILLEGAL_HA_CREATE_OPTION),
			  ndbcluster_hton_name,
			  "Ndb does not support non-unique "
			  "hash based indexes");
      error= HA_ERR_UNSUPPORTED;
      break;
    }
5500 5501 5502 5503 5504 5505 5506 5507 5508
    error= create_ordered_index(name, key_info);
    break;
  default:
    DBUG_ASSERT(FALSE);
    break;
  }
  
  DBUG_RETURN(error);
}
5509

5510
int ha_ndbcluster::create_ordered_index(const char *name, 
5511
                                        KEY *key_info)
5512
{
5513
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
5514
  DBUG_RETURN(create_ndb_index(name, key_info, FALSE));
5515 5516 5517
}

int ha_ndbcluster::create_unique_index(const char *name, 
5518
                                       KEY *key_info)
5519 5520
{

5521
  DBUG_ENTER("ha_ndbcluster::create_unique_index");
5522
  DBUG_RETURN(create_ndb_index(name, key_info, TRUE));
5523 5524 5525
}


5526 5527 5528 5529
/*
  Create an index in NDB Cluster
 */

5530 5531 5532
int ha_ndbcluster::create_ndb_index(const char *name, 
                                     KEY *key_info,
                                     bool unique)
5533
{
5534 5535
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
5536 5537 5538
  KEY_PART_INFO *key_part= key_info->key_part;
  KEY_PART_INFO *end= key_part + key_info->key_parts;
  
5539
  DBUG_ENTER("ha_ndbcluster::create_index");
5540
  DBUG_PRINT("enter", ("name: %s ", name));
5541

5542
  NdbDictionary::Index ndb_index(name);
5543
  if (unique)
5544 5545 5546 5547 5548
    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
  else 
  {
    ndb_index.setType(NdbDictionary::Index::OrderedIndex);
    // TODO Only temporary ordered indexes supported
5549
    ndb_index.setLogging(FALSE); 
5550
  }
5551 5552 5553 5554
  if (ndb_index.setTable(m_tabname))
  {
    DBUG_RETURN(my_errno= errno);
  }
5555 5556 5557 5558 5559

  for (; key_part != end; key_part++) 
  {
    Field *field= key_part->field;
    DBUG_PRINT("info", ("attr: %s", field->field_name));
5560 5561 5562 5563
    if (ndb_index.addColumnName(field->field_name))
    {
      DBUG_RETURN(my_errno= errno);
    }
5564 5565
  }
  
5566
  if (dict->createIndex(ndb_index, *m_table))
5567 5568 5569 5570 5571 5572 5573
    ERR_RETURN(dict->getNdbError());

  // Success
  DBUG_PRINT("info", ("Created index %s", name));
  DBUG_RETURN(0);  
}

5574 5575 5576 5577 5578
/*
 Prepare for an on-line alter table
*/ 
void ha_ndbcluster::prepare_for_alter()
{
5579
  /* ndb_share reference schema */
5580
  ndbcluster_get_share(m_share); // Increase ref_count
5581 5582
  DBUG_PRINT("NDB_SHARE", ("%s binlog schema  use_count: %u",
                           m_share->key, m_share->use_count));
5583 5584 5585
  set_ndb_share_state(m_share, NSS_ALTERED);
}

5586 5587 5588 5589 5590 5591 5592 5593
/*
  Add an index on-line to a table
*/
int ha_ndbcluster::add_index(TABLE *table_arg, 
                             KEY *key_info, uint num_of_keys)
{
  int error= 0;
  uint idx;
5594 5595
  DBUG_ENTER("ha_ndbcluster::add_index");
  DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
5596
  DBUG_ASSERT(m_share->state == NSS_ALTERED);
5597

5598 5599 5600 5601 5602
  for (idx= 0; idx < num_of_keys; idx++)
  {
    KEY *key= key_info + idx;
    KEY_PART_INFO *key_part= key->key_part;
    KEY_PART_INFO *end= key_part + key->key_parts;
5603
    NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
5604 5605 5606 5607 5608 5609 5610 5611 5612
    DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
    // Add fields to key_part struct
    for (; key_part != end; key_part++)
      key_part->field= table->field[key_part->fieldnr];
    // Check index type
    // Create index in ndb
    if((error= create_index(key_info[idx].name, key, idx_type, idx)))
      break;
  }
5613
  if (error)
5614
  {
5615
    set_ndb_share_state(m_share, NSS_INITIAL);
5616 5617 5618
    /* ndb_share reference schema free */
    DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
                             m_share->key, m_share->use_count));
5619
    free_share(&m_share); // Decrease ref_count
5620
  }
5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631
  DBUG_RETURN(error);  
}

/*
  Mark one or several indexes for deletion. and
  renumber the remaining indexes
*/
int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, 
                                      uint *key_num, uint num_of_keys)
{
  DBUG_ENTER("ha_ndbcluster::prepare_drop_index");
5632
  DBUG_ASSERT(m_share->state == NSS_ALTERED);
5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643
  // Mark indexes for deletion
  uint idx;
  for (idx= 0; idx < num_of_keys; idx++)
  {
    DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num));
    m_index[*key_num++].status= TO_BE_DROPPED;
  }
  // Renumber indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
5644 5645
  renumber_indexes(ndb, table_arg);
  DBUG_RETURN(0);
5646 5647 5648 5649 5650 5651 5652
}
 
/*
  Really drop all indexes marked for deletion
*/
int ha_ndbcluster::final_drop_index(TABLE *table_arg)
{
5653
  int error;
5654 5655 5656 5657 5658 5659
  DBUG_ENTER("ha_ndbcluster::final_drop_index");
  DBUG_PRINT("info", ("ha_ndbcluster::final_drop_index"));
  // Really drop indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
5660 5661 5662
  if((error= drop_indexes(ndb, table_arg)))
  {
    m_share->state= NSS_INITIAL;
5663 5664 5665
    /* ndb_share reference schema free */
    DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
                             m_share->key, m_share->use_count));
5666 5667 5668
    free_share(&m_share); // Decrease ref_count
  }
  DBUG_RETURN(error);
5669 5670
}

5671 5672 5673 5674 5675 5676
/*
  Rename a table in NDB Cluster
*/

int ha_ndbcluster::rename_table(const char *from, const char *to)
{
5677
  NDBDICT *dict;
5678
  char old_dbname[FN_HEADLEN];
5679
  char new_dbname[FN_HEADLEN];
5680
  char new_tabname[FN_HEADLEN];
5681 5682
  const NDBTAB *orig_tab;
  int result;
5683 5684
  bool recreate_indexes= FALSE;
  NDBDICT::List index_list;
5685 5686

  DBUG_ENTER("ha_ndbcluster::rename_table");
5687
  DBUG_PRINT("info", ("Renaming %s to %s", from, to));
5688
  set_dbname(from, old_dbname);
5689
  set_dbname(to, new_dbname);
5690 5691 5692
  set_tabname(from);
  set_tabname(to, new_tabname);

5693 5694 5695
  if (check_ndb_connection())
    DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);

mskold@mysql.com's avatar
mskold@mysql.com committed
5696
  Ndb *ndb= get_ndb();
5697
  ndb->setDatabaseName(old_dbname);
mskold@mysql.com's avatar
mskold@mysql.com committed
5698
  dict= ndb->getDictionary();
5699 5700
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  if (!(orig_tab= ndbtab_g.get_table()))
5701
    ERR_RETURN(dict->getNdbError());
5702

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5703
#ifdef HAVE_NDB_BINLOG
5704 5705 5706
  int ndb_table_id= orig_tab->getObjectId();
  int ndb_table_version= orig_tab->getObjectVersion();

5707
  /* ndb_share reference temporary */
5708
  NDB_SHARE *share= get_share(from, 0, FALSE);
5709
  if (share)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5710
  {
5711 5712
    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                             share->key, share->use_count));
5713
    IF_DBUG(int r=) rename_share(share, to);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5714 5715 5716
    DBUG_ASSERT(r == 0);
  }
#endif
5717 5718 5719 5720 5721
  if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
  {
    dict->listIndexes(index_list, *orig_tab);    
    recreate_indexes= TRUE;
  }
5722 5723
  // Change current database to that of target table
  set_dbname(to);
5724 5725 5726 5727
  if (ndb->setDatabaseName(m_dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
5728

5729 5730 5731
  NdbDictionary::Table new_tab= *orig_tab;
  new_tab.setName(new_tabname);
  if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
5732
  {
5733
    NdbError ndb_error= dict->getNdbError();
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5734 5735 5736
#ifdef HAVE_NDB_BINLOG
    if (share)
    {
5737 5738
      IF_DBUG(int ret=) rename_share(share, from);
      DBUG_ASSERT(ret == 0);
5739 5740 5741
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5742 5743 5744
      free_share(&share);
    }
#endif
5745
    ERR_RETURN(ndb_error);
5746 5747 5748 5749
  }
  
  // Rename .ndb file
  if ((result= handler::rename_table(from, to)))
5750
  {
5751
    // ToDo in 4.1 should rollback alter table...
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5752 5753
#ifdef HAVE_NDB_BINLOG
    if (share)
5754 5755 5756 5757
    {
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                               share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5758
      free_share(&share);
5759
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5760
#endif
5761
    DBUG_RETURN(result);
5762
  }
5763

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774
#ifdef HAVE_NDB_BINLOG
  int is_old_table_tmpfile= 1;
  if (share && share->op)
    dict->forceGCPWait();

  /* handle old table */
  if (!IS_TMP_PREFIX(m_tabname))
  {
    is_old_table_tmpfile= 0;
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, from + sizeof(share_prefix) - 1, 0);
5775 5776
    ndbcluster_handle_drop_table(ndb, event_name.c_ptr(), share,
                                 "rename table");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5777 5778 5779 5780 5781 5782 5783
  }

  if (!result && !IS_TMP_PREFIX(new_tabname))
  {
    /* always create an event for the table */
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, to + sizeof(share_prefix) - 1, 0);
5784 5785
    Ndb_table_guard ndbtab_g2(dict, new_tabname);
    const NDBTAB *ndbtab= ndbtab_g2.get_table();
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5786

5787
    if (!ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share,
5788
                                 share && ndb_binlog_running ? 2 : 1/* push warning */))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5789 5790 5791 5792
    {
      if (ndb_extra_logging)
        sql_print_information("NDB Binlog: RENAME Event: %s",
                              event_name.c_ptr());
5793
      if (share &&
5794
          ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr()))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5795
      {
5796 5797 5798
        sql_print_error("NDB Binlog: FAILED create event operations "
                        "during RENAME. Event %s", event_name.c_ptr());
        /* a warning has been issued to the client */
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5799 5800
      }
    }
5801 5802 5803 5804
    /*
      warning has been issued if ndbcluster_create_event failed
      and (share && ndb_binlog_running)
    */
5805
    if (!is_old_table_tmpfile)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5806 5807
      ndbcluster_log_schema_op(current_thd, share,
                               current_thd->query, current_thd->query_length,
5808 5809
                               old_dbname, m_tabname,
                               ndb_table_id, ndb_table_version,
5810
                               SOT_RENAME_TABLE,
5811
                               m_dbname, new_tabname, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5812
  }
5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837

  // If we are moving tables between databases, we need to recreate
  // indexes
  if (recreate_indexes)
  {
    for (unsigned i = 0; i < index_list.count; i++) 
    {
        NDBDICT::List::Element& index_el = index_list.elements[i];
	// Recreate any indexes not stored in the system database
	if (my_strcasecmp(system_charset_info, 
			  index_el.database, NDB_SYSTEM_DATABASE))
	{
	  set_dbname(from);
	  ndb->setDatabaseName(m_dbname);
	  const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);
	  DBUG_PRINT("info", ("Creating index %s/%s",
			      index_el.database, index->getName()));
	  dict->createIndex(*index, new_tab);
	  DBUG_PRINT("info", ("Dropping index %s/%s",
			      index_el.database, index->getName()));
	  set_dbname(from);
	  ndb->setDatabaseName(m_dbname);
	  dict->dropIndexGlobal(*index);
	}
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5838 5839
  }
  if (share)
5840 5841 5842 5843
  {
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5844
    free_share(&share);
5845
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5846 5847
#endif

5848 5849 5850 5851 5852
  DBUG_RETURN(result);
}


/*
5853 5854
  Delete table from NDB Cluster

5855 5856
 */

5857 5858 5859 5860 5861 5862 5863 5864
/* static version which does not need a handler */

int
ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb,
                            const char *path,
                            const char *db,
                            const char *table_name)
{
5865
  THD *thd= current_thd;
5866 5867
  DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table");
  NDBDICT *dict= ndb->getDictionary();
5868 5869
  int ndb_table_id= 0;
  int ndb_table_version= 0;
5870
#ifdef HAVE_NDB_BINLOG
5871 5872 5873 5874
  /*
    Don't allow drop table unless
    schema distribution table is setup
  */
5875
  if (!ndb_schema_share)
5876 5877 5878 5879
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
5880
  /* ndb_share reference temporary */
5881
  NDB_SHARE *share= get_share(path, 0, FALSE);
5882 5883 5884 5885 5886
  if (share)
  {
    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                             share->key, share->use_count));
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5887
#endif
5888 5889 5890

  /* Drop the table from NDB */
  
5891
  int res= 0;
5892
  if (h && h->m_table)
5893
  {
5894 5895
retry_temporary_error1:
    if (dict->dropTableGlobal(*h->m_table) == 0)
5896 5897 5898
    {
      ndb_table_id= h->m_table->getObjectId();
      ndb_table_version= h->m_table->getObjectVersion();
5899
      DBUG_PRINT("info", ("success 1"));
5900
    }
5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911
    else
    {
      switch (dict->getNdbError().status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed) 
            goto retry_temporary_error1; // retry indefinitly
          break;
        default:
          break;
      }
5912
      set_ndb_err(thd, dict->getNdbError());
5913
      res= ndb_to_mysql_error(&dict->getNdbError());
5914
      DBUG_PRINT("info", ("error(1) %u", res));
5915
    }
5916
    h->release_metadata(thd, ndb);
5917 5918 5919 5920
  }
  else
  {
    ndb->setDatabaseName(db);
5921 5922 5923 5924 5925
    while (1)
    {
      Ndb_table_guard ndbtab_g(dict, table_name);
      if (ndbtab_g.get_table())
      {
5926
    retry_temporary_error2:
5927 5928 5929 5930
        if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0)
        {
          ndb_table_id= ndbtab_g.get_table()->getObjectId();
          ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
5931 5932
          DBUG_PRINT("info", ("success 2"));
          break;
5933
        }
5934
        else
5935
        {
5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949
          switch (dict->getNdbError().status)
          {
            case NdbError::TemporaryError:
              if (!thd->killed) 
                goto retry_temporary_error2; // retry indefinitly
              break;
            default:
              if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
              {
                ndbtab_g.invalidate();
                continue;
              }
              break;
          }
5950 5951
        }
      }
5952
      set_ndb_err(thd, dict->getNdbError());
5953 5954
      res= ndb_to_mysql_error(&dict->getNdbError());
      DBUG_PRINT("info", ("error(2) %u", res));
5955 5956
      break;
    }
5957 5958 5959 5960
  }

  if (res)
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971
#ifdef HAVE_NDB_BINLOG
    /* the drop table failed for some reason, drop the share anyways */
    if (share)
    {
      pthread_mutex_lock(&ndbcluster_mutex);
      if (share->state != NSS_DROPPED)
      {
        /*
          The share kept by the server has not been freed, free it
        */
        share->state= NSS_DROPPED;
5972 5973 5974
        /* ndb_share reference create free */
        DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
                                 share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5975 5976
        free_share(&share, TRUE);
      }
5977 5978 5979
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5980 5981 5982 5983
      free_share(&share, TRUE);
      pthread_mutex_unlock(&ndbcluster_mutex);
    }
#endif
5984 5985 5986
    DBUG_RETURN(res);
  }

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997
#ifdef HAVE_NDB_BINLOG
  /* stop the logging of the dropped table, and cleanup */

  /*
    drop table is successful even if table does not exist in ndb
    and in case table was actually not dropped, there is no need
    to force a gcp, and setting the event_name to null will indicate
    that there is no event to be dropped
  */
  int table_dropped= dict->getNdbError().code != 709;

mskold@mysql.com's avatar
mskold@mysql.com committed
5998 5999
  if (!IS_TMP_PREFIX(table_name) && share &&
      current_thd->lex->sql_command != SQLCOM_TRUNCATE)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6000
  {
6001 6002
    ndbcluster_log_schema_op(thd, share,
                             thd->query, thd->query_length,
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6003
                             share->db, share->table_name,
6004
                             ndb_table_id, ndb_table_version,
6005
                             SOT_DROP_TABLE, 0, 0, 1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016
  }
  else if (table_dropped && share && share->op) /* ndbcluster_log_schema_op
                                                   will do a force GCP */
    dict->forceGCPWait();

  if (!IS_TMP_PREFIX(table_name))
  {
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, path + sizeof(share_prefix) - 1, 0);
    ndbcluster_handle_drop_table(ndb,
                                 table_dropped ? event_name.c_ptr() : 0,
6017
                                 share, "delete table");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028
  }

  if (share)
  {
    pthread_mutex_lock(&ndbcluster_mutex);
    if (share->state != NSS_DROPPED)
    {
      /*
        The share kept by the server has not been freed, free it
      */
      share->state= NSS_DROPPED;
6029 6030 6031
      /* ndb_share reference create free */
      DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
                               share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6032 6033
      free_share(&share, TRUE);
    }
6034 6035 6036
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6037 6038 6039 6040
    free_share(&share, TRUE);
    pthread_mutex_unlock(&ndbcluster_mutex);
  }
#endif
6041 6042 6043
  DBUG_RETURN(0);
}

6044 6045
int ha_ndbcluster::delete_table(const char *name)
{
6046
  DBUG_ENTER("ha_ndbcluster::delete_table");
6047 6048 6049
  DBUG_PRINT("enter", ("name: %s", name));
  set_dbname(name);
  set_tabname(name);
6050

6051 6052 6053 6054 6055
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow drop table unless
    schema distribution table is setup
  */
6056
  if (!ndb_schema_share)
6057 6058 6059 6060 6061 6062
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
#endif

6063 6064
  if (check_ndb_connection())
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
6065 6066

  /* Call ancestor function to delete .ndb file */
6067
  handler::delete_table(name);
6068 6069

  DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname));
6070 6071 6072
}


6073 6074 6075 6076
void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
                                       ulonglong nb_desired_values,
                                       ulonglong *first_value,
                                       ulonglong *nb_reserved_values)
6077
{
6078 6079
  int cache_size;
  Uint64 auto_value;
6080 6081
  DBUG_ENTER("get_auto_increment");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
6082
  Ndb *ndb= get_ndb();
6083
   
6084
  if (m_rows_inserted > m_rows_to_insert)
6085
  {
6086 6087
    /* We guessed too low */
    m_rows_to_insert+= m_autoincrement_prefetch;
6088
  }
serg@serg.mylan's avatar
serg@serg.mylan committed
6089
  cache_size= 
6090 6091 6092 6093
    (int) ((m_rows_to_insert - m_rows_inserted < m_autoincrement_prefetch) ?
           m_rows_to_insert - m_rows_inserted :
           ((m_rows_to_insert > m_autoincrement_prefetch) ?
            m_rows_to_insert : m_autoincrement_prefetch));
6094
  uint retries= NDB_AUTO_INCREMENT_RETRIES;
6095 6096
  int retry_sleep= 30; /* 30 milliseconds, transaction */
  for (;;)
6097
  {
6098
    Ndb_tuple_id_range_guard g(m_share);
6099
    if (m_skip_auto_increment &&
6100
        ndb->readAutoIncrementValue(m_table, g.range, auto_value) ||
6101
        ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size, increment, offset))
6102 6103 6104 6105 6106 6107 6108 6109 6110 6111
    {
      if (--retries &&
          ndb->getNdbError().status == NdbError::TemporaryError);
      {
        my_sleep(retry_sleep);
        continue;
      }
      const NdbError err= ndb->getNdbError();
      sql_print_error("Error %lu in ::get_auto_increment(): %s",
                      (ulong) err.code, err.message);
6112 6113
      *first_value= ~(ulonglong) 0;
      DBUG_VOID_RETURN;
6114 6115
    }
    break;
6116
  }
6117 6118 6119 6120
  *first_value= (longlong)auto_value;
  /* From the point of view of MySQL, NDB reserves one row at a time */
  *nb_reserved_values= 1;
  DBUG_VOID_RETURN;
6121 6122 6123 6124 6125 6126 6127
}


/*
  Constructor for the NDB Cluster table handler 
 */

6128 6129 6130 6131 6132 6133 6134
#define HA_NDBCLUSTER_TABLE_FLAGS \
                HA_REC_NOT_IN_SEQ | \
                HA_NULL_IN_KEY | \
                HA_AUTO_PART_KEY | \
                HA_NO_PREFIX_CHAR_KEYS | \
                HA_NEED_READ_RANGE_BUFFER | \
                HA_CAN_GEOMETRY | \
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6135
                HA_CAN_BIT_FIELD | \
6136 6137
                HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \
                HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \
6138
                HA_PARTIAL_COLUMN_READ | \
6139 6140
                HA_HAS_OWN_BINLOGGING | \
                HA_HAS_RECORDS
6141

6142 6143
ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
  handler(hton, table_arg),
6144 6145 6146
  m_active_trans(NULL),
  m_active_cursor(NULL),
  m_table(NULL),
6147
  m_table_info(NULL),
6148
  m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS),
6149
  m_share(0),
6150 6151 6152
  m_part_info(NULL),
  m_use_partition_function(FALSE),
  m_sorted(FALSE),
6153
  m_use_write(FALSE),
6154
  m_ignore_dup_key(FALSE),
6155
  m_has_unique_index(FALSE),
6156
  m_primary_key_update(FALSE),
6157
  m_ignore_no_key(FALSE),
6158 6159 6160
  m_rows_to_insert((ha_rows) 1),
  m_rows_inserted((ha_rows) 0),
  m_bulk_insert_rows((ha_rows) 1024),
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6161
  m_rows_changed((ha_rows) 0),
6162
  m_bulk_insert_not_flushed(FALSE),
6163 6164
  m_delete_cannot_batch(FALSE),
  m_update_cannot_batch(FALSE),
6165 6166 6167
  m_ops_pending(0),
  m_skip_auto_increment(TRUE),
  m_blobs_pending(0),
6168
  m_blobs_offset(0),
6169 6170
  m_blobs_buffer(0),
  m_blobs_buffer_size(0),
6171 6172 6173
  m_dupkey((uint) -1),
  m_ha_not_exact_count(FALSE),
  m_force_send(TRUE),
6174
  m_autoincrement_prefetch((ha_rows) 32),
6175
  m_transaction_on(TRUE),
6176
  m_cond(NULL),
mskold@mysql.com's avatar
mskold@mysql.com committed
6177
  m_multi_cursor(NULL)
6178
{
6179
  int i;
6180
 
6181 6182 6183 6184 6185
  DBUG_ENTER("ha_ndbcluster");

  m_tabname[0]= '\0';
  m_dbname[0]= '\0';

6186 6187
  stats.records= ~(ha_rows)0; // uninitialized
  stats.block_size= 1024;
6188

tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
6189 6190
  for (i= 0; i < MAX_KEY; i++)
    ndb_init_index(m_index[i]);
6191

6192 6193 6194 6195
  DBUG_VOID_RETURN;
}


6196 6197 6198 6199 6200 6201 6202 6203 6204 6205
int ha_ndbcluster::ha_initialise()
{
  DBUG_ENTER("ha_ndbcluster::ha_initialise");
  if (check_ndb_in_thd(current_thd))
  {
    DBUG_RETURN(FALSE);
  }
  DBUG_RETURN(TRUE);
}

6206 6207 6208 6209 6210 6211
/*
  Destructor for NDB Cluster table handler
 */

ha_ndbcluster::~ha_ndbcluster() 
{
6212 6213
  THD *thd= current_thd;
  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
6214 6215
  DBUG_ENTER("~ha_ndbcluster");

6216
  if (m_share)
6217
  {
6218 6219 6220
    /* ndb_share reference handler free */
    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
                             m_share->key, m_share->use_count));
6221 6222
    free_share(&m_share);
  }
6223
  release_metadata(thd, ndb);
6224 6225
  my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
  m_blobs_buffer= 0;
6226 6227

  // Check for open cursor/transaction
6228 6229
  if (m_active_cursor) {
  }
6230
  DBUG_ASSERT(m_active_cursor == NULL);
6231 6232
  if (m_active_trans) {
  }
6233 6234
  DBUG_ASSERT(m_active_trans == NULL);

6235 6236 6237 6238 6239 6240 6241
  // Discard any generated condition
  DBUG_PRINT("info", ("Deleting generated condition"));
  if (m_cond)
  {
    delete m_cond;
    m_cond= NULL;
  }
6242

6243 6244 6245 6246
  DBUG_VOID_RETURN;
}


mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6247

6248 6249 6250 6251
/*
  Open a table for further use
  - fetch metadata for this table from NDB
  - check that table exists
6252 6253 6254 6255

  RETURN
    0    ok
    < 0  Table has changed
6256 6257 6258 6259
*/

int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
{
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6260
  int res;
6261
  KEY *key;
6262 6263 6264
  DBUG_ENTER("ha_ndbcluster::open");
  DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",
                       name, mode, test_if_locked));
6265
  
6266 6267 6268 6269
  /*
    Setup ref_length to make room for the whole 
    primary key to be written in the ref variable
  */
6270
  
6271
  if (table_share->primary_key != MAX_KEY) 
6272
  {
6273
    key= table->key_info+table_share->primary_key;
6274 6275
    ref_length= key->key_length;
  }
6276 6277 6278 6279 6280 6281 6282 6283 6284 6285
  else // (table_share->primary_key == MAX_KEY) 
  {
    if (m_use_partition_function)
    {
      ref_length+= sizeof(m_part_id);
    }
  }

  DBUG_PRINT("info", ("ref_length: %d", ref_length));

6286
  // Init table lock structure 
6287
  /* ndb_share reference handler */
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6288
  if (!(m_share=get_share(name, table)))
6289
    DBUG_RETURN(1);
6290 6291
  DBUG_PRINT("NDB_SHARE", ("%s handler  use_count: %u",
                           m_share->key, m_share->use_count));
6292 6293 6294 6295 6296
  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
  
  set_dbname(name);
  set_tabname(name);
  
6297
  if ((res= check_ndb_connection()) ||
6298
      (res= get_metadata(name)))
6299 6300 6301 6302
  {
    /* ndb_share reference handler free */
    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
                             m_share->key, m_share->use_count));
6303 6304
    free_share(&m_share);
    m_share= 0;
6305
    DBUG_RETURN(res);
6306
  }
6307
  while (1)
6308 6309
  {
    Ndb *ndb= get_ndb();
6310 6311
    if (ndb->setDatabaseName(m_dbname))
    {
6312
      set_ndb_err(current_thd, ndb->getNdbError());
6313 6314
      res= ndb_to_mysql_error(&ndb->getNdbError());
      break;
6315
    }
stewart@willster.(none)'s avatar
stewart@willster.(none) committed
6316
    struct Ndb_statistics stat;
6317
    res= ndb_get_table_statistics(NULL, FALSE, ndb, m_table, &stat);
6318 6319 6320
    stats.mean_rec_length= stat.row_size;
    stats.data_file_length= stat.fragment_memory;
    stats.records= stat.row_count;
6321 6322
    if(!res)
      res= info(HA_STATUS_CONST);
6323
    break;
6324
  }
6325 6326
  if (res)
  {
6327
    free_share(&m_share);
6328
    m_share= 0;
6329
    release_metadata(current_thd, get_ndb());
6330
    DBUG_RETURN(res);
6331
  }
6332 6333 6334 6335
#ifdef HAVE_NDB_BINLOG
  if (!ndb_binlog_tables_inited && ndb_binlog_running)
    table->db_stat|= HA_READ_ONLY;
#endif
6336
  DBUG_RETURN(0);
6337 6338
}

6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352
/*
  Set partition info

  SYNOPSIS
    set_part_info()
    part_info

  RETURN VALUE
    NONE

  DESCRIPTION
    Set up partition info when handler object created
*/

6353 6354 6355 6356 6357
void ha_ndbcluster::set_part_info(partition_info *part_info)
{
  m_part_info= part_info;
  if (!(m_part_info->part_type == HASH_PARTITION &&
        m_part_info->list_of_part_fields &&
6358
        !m_part_info->is_sub_partitioned()))
6359 6360
    m_use_partition_function= TRUE;
}
6361 6362 6363 6364 6365 6366 6367 6368

/*
  Close the table
  - release resources setup by open()
 */

int ha_ndbcluster::close(void)
{
6369
  DBUG_ENTER("close");
6370
  THD *thd= table->in_use;
6371
  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
6372 6373 6374
  /* ndb_share reference handler free */
  DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
                           m_share->key, m_share->use_count));
6375 6376
  free_share(&m_share);
  m_share= 0;
6377
  release_metadata(thd, ndb);
6378 6379 6380 6381
  DBUG_RETURN(0);
}


6382
Thd_ndb* ha_ndbcluster::seize_thd_ndb()
6383
{
6384 6385
  Thd_ndb *thd_ndb;
  DBUG_ENTER("seize_thd_ndb");
6386

6387
  thd_ndb= new Thd_ndb();
6388 6389 6390 6391 6392
  if (thd_ndb == NULL)
  {
    my_errno= HA_ERR_OUT_OF_MEM;
    return NULL;
  }
6393
  if (thd_ndb->ndb->init(max_transactions) != 0)
6394
  {
6395
    ERR_PRINT(thd_ndb->ndb->getNdbError());
6396 6397 6398 6399 6400 6401
    /*
      TODO 
      Alt.1 If init fails because to many allocated Ndb 
      wait on condition for a Ndb object to be released.
      Alt.2 Seize/release from pool, wait until next release 
    */
6402 6403
    delete thd_ndb;
    thd_ndb= NULL;
6404
  }
6405
  DBUG_RETURN(thd_ndb);
6406 6407 6408
}


6409
void ha_ndbcluster::release_thd_ndb(Thd_ndb* thd_ndb)
6410
{
6411 6412
  DBUG_ENTER("release_thd_ndb");
  delete thd_ndb;
6413 6414 6415 6416 6417
  DBUG_VOID_RETURN;
}


/*
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
6418
  If this thread already has a Thd_ndb object allocated
6419
  in current THD, reuse it. Otherwise
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
6420
  seize a Thd_ndb object, assign it to current THD and use it.
6421 6422 6423
 
*/

6424
Ndb* check_ndb_in_thd(THD* thd)
6425
{
6426
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
6427
  if (!thd_ndb)
6428
  {
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
6429
    if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6430
      return NULL;
6431
    set_thd_ndb(thd, thd_ndb);
6432
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6433
  return thd_ndb->ndb;
6434 6435
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
6436

6437

6438
int ha_ndbcluster::check_ndb_connection(THD* thd)
6439
{
6440
  Ndb *ndb;
6441 6442
  DBUG_ENTER("check_ndb_connection");
  
6443
  if (!(ndb= check_ndb_in_thd(thd)))
6444
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
6445 6446 6447 6448
  if (ndb->setDatabaseName(m_dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
6449 6450 6451
  DBUG_RETURN(0);
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
6452

6453
static int ndbcluster_close_connection(handlerton *hton, THD *thd)
6454
{
6455
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
6456
  DBUG_ENTER("ndbcluster_close_connection");
6457 6458
  if (thd_ndb)
  {
6459
    ha_ndbcluster::release_thd_ndb(thd_ndb);
6460
    set_thd_ndb(thd, NULL); // not strictly required but does not hurt either
6461
  }
6462
  DBUG_RETURN(0);
6463 6464 6465 6466 6467 6468 6469
}


/*
  Try to discover one table from NDB
 */

6470 6471 6472 6473
int ndbcluster_discover(handlerton *hton, THD* thd, const char *db, 
                        const char *name,
                        const void** frmblob, 
                        uint* frmlen)
6474
{
6475 6476
  int error= 0;
  NdbError ndb_error;
6477
  uint len;
6478
  const void* data= NULL;
6479
  Ndb* ndb;
6480
  char key[FN_REFLEN];
6481
  DBUG_ENTER("ndbcluster_discover");
6482
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 
6483

6484 6485
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);  
6486 6487 6488 6489
  if (ndb->setDatabaseName(db))
  {
    ERR_RETURN(ndb->getNdbError());
  }
6490
  NDBDICT* dict= ndb->getDictionary();
6491
  build_table_filename(key, sizeof(key), db, name, "", 0);
6492
  /* ndb_share reference temporary */
6493
  NDB_SHARE *share= get_share(key, 0, FALSE);
6494 6495 6496 6497 6498
  if (share)
  {
    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                             share->key, share->use_count));
  }
6499
  if (share && get_ndb_share_state(share) == NSS_ALTERED)
6500
  {
6501 6502 6503 6504
    // Frm has been altered on disk, but not yet written to ndb
    if (readfrm(key, &data, &len))
    {
      DBUG_PRINT("error", ("Could not read frm"));
6505 6506
      error= 1;
      goto err;
6507
    }
6508
  }
6509
  else
6510
  {
6511 6512 6513 6514
    Ndb_table_guard ndbtab_g(dict, name);
    const NDBTAB *tab= ndbtab_g.get_table();
    if (!tab)
    {
6515 6516
      const NdbError err= dict->getNdbError();
      if (err.code == 709 || err.code == 723)
6517
      {
6518
        error= -1;
6519 6520
        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
      }
6521
      else
6522 6523
      {
        error= -1;
6524
        ndb_error= err;
6525 6526
        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
      }
6527
      goto err;
6528 6529 6530 6531 6532 6533 6534
    }
    DBUG_PRINT("info", ("Found table %s", tab->getName()));
    
    len= tab->getFrmLength();  
    if (len == 0 || tab->getFrmData() == NULL)
    {
      DBUG_PRINT("error", ("No frm data found."));
6535 6536
      error= 1;
      goto err;
6537 6538 6539 6540 6541
    }
    
    if (unpackfrm(&data, &len, tab->getFrmData()))
    {
      DBUG_PRINT("error", ("Could not unpack table"));
6542 6543
      error= 1;
      goto err;
6544
    }
6545
  }
6546 6547 6548 6549

  *frmlen= len;
  *frmblob= data;
  
6550
  if (share)
6551 6552 6553 6554
  {
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key, share->use_count));
6555
    free_share(&share);
6556
  }
6557

6558
  DBUG_RETURN(0);
6559
err:
6560
  my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
6561
  if (share)
6562 6563 6564 6565
  {
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key, share->use_count));
6566
    free_share(&share);
6567
  }
6568 6569 6570 6571 6572
  if (ndb_error.code)
  {
    ERR_RETURN(ndb_error);
  }
  DBUG_RETURN(error);
6573 6574 6575
}

/*
6576
  Check if a table exists in NDB
6577

6578
 */
6579

6580 6581
int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd, 
                                      const char *db,
6582
                                      const char *name)
6583 6584
{
  Ndb* ndb;
6585
  DBUG_ENTER("ndbcluster_table_exists_in_engine");
6586
  DBUG_PRINT("enter", ("db: %s  name: %s", db, name));
6587 6588

  if (!(ndb= check_ndb_in_thd(thd)))
6589
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
6590
  NDBDICT* dict= ndb->getDictionary();
6591 6592 6593
  NdbDictionary::Dictionary::List list;
  if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
    ERR_RETURN(dict->getNdbError());
6594
  for (uint i= 0 ; i < list.count ; i++)
6595
  {
6596 6597 6598 6599 6600 6601
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    if (my_strcasecmp(system_charset_info, elmt.database, db))
      continue;
    if (my_strcasecmp(system_charset_info, elmt.name, name))
      continue;
    DBUG_PRINT("info", ("Found table"));
6602
    DBUG_RETURN(HA_ERR_TABLE_EXIST);
6603
  }
6604
  DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
6605 6606
}

6607 6608


6609
extern "C" byte* tables_get_key(const char *entry, uint *length,
6610
                                my_bool not_used __attribute__((unused)))
6611 6612 6613 6614 6615 6616
{
  *length= strlen(entry);
  return (byte*) entry;
}


6617 6618
/*
  Drop a database in NDB Cluster
6619 6620
  NOTE add a dummy void function, since stupid handlerton is returning void instead of int...
*/
6621

6622
int ndbcluster_drop_database_impl(const char *path)
6623 6624 6625 6626 6627 6628 6629 6630 6631
{
  DBUG_ENTER("ndbcluster_drop_database");
  THD *thd= current_thd;
  char dbname[FN_HEADLEN];
  Ndb* ndb;
  NdbDictionary::Dictionary::List list;
  uint i;
  char *tabname;
  List<char> drop_list;
6632
  int ret= 0;
6633 6634 6635 6636
  ha_ndbcluster::set_dbname(path, (char *)&dbname);
  DBUG_PRINT("enter", ("db: %s", dbname));
  
  if (!(ndb= check_ndb_in_thd(thd)))
6637
    DBUG_RETURN(-1);
6638 6639 6640 6641 6642
  
  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
6643
    DBUG_RETURN(-1);
6644 6645
  for (i= 0 ; i < list.count ; i++)
  {
6646 6647
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));     
6648 6649
    
    // Add only tables that belongs to db
6650
    if (my_strcasecmp(system_charset_info, elmt.database, dbname))
6651
      continue;
6652 6653
    DBUG_PRINT("info", ("%s must be dropped", elmt.name));     
    drop_list.push_back(thd->strdup(elmt.name));
6654 6655
  }
  // Drop any tables belonging to database
6656
  char full_path[FN_REFLEN];
6657
  char *tmp= full_path +
6658
    build_table_filename(full_path, sizeof(full_path), dbname, "", "", 0);
6659 6660 6661 6662
  if (ndb->setDatabaseName(dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
6663 6664
  List_iterator_fast<char> it(drop_list);
  while ((tabname=it++))
6665
  {
6666
    tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1);
6667
    VOID(pthread_mutex_lock(&LOCK_open));
6668
    if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname))
6669 6670
    {
      const NdbError err= dict->getNdbError();
6671
      if (err.code != 709 && err.code != 723)
6672
      {
6673
        set_ndb_err(thd, err);
6674
        ret= ndb_to_mysql_error(&err);
6675
      }
6676
    }
6677
    VOID(pthread_mutex_unlock(&LOCK_open));
6678 6679
  }
  DBUG_RETURN(ret);      
6680 6681
}

6682
static void ndbcluster_drop_database(handlerton *hton, char *path)
6683
{
6684 6685 6686 6687 6688 6689
  DBUG_ENTER("ndbcluster_drop_database");
#ifdef HAVE_NDB_BINLOG
  /*
    Don't allow drop database unless
    schema distribution table is setup
  */
6690
  if (!ndb_schema_share)
6691 6692 6693 6694 6695 6696
  {
    DBUG_PRINT("info", ("Schema distribution table not setup"));
    DBUG_VOID_RETURN;
    //DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
#endif
6697
  ndbcluster_drop_database_impl(path);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6698 6699
#ifdef HAVE_NDB_BINLOG
  char db[FN_REFLEN];
6700
  THD *thd= current_thd;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6701
  ha_ndbcluster::set_dbname(path, db);
6702 6703
  ndbcluster_log_schema_op(thd, 0,
                           thd->query, thd->query_length,
6704
                           db, "", 0, 0, SOT_DROP_DB, 0, 0, 0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6705
#endif
6706
  DBUG_VOID_RETURN;
6707
}
6708

6709 6710 6711 6712 6713 6714
int ndb_create_table_from_engine(THD *thd, const char *db,
                                 const char *table_name)
{
  LEX *old_lex= thd->lex, newlex;
  thd->lex= &newlex;
  newlex.current_select= NULL;
6715
  lex_start(thd);
6716 6717 6718 6719 6720
  int res= ha_create_table_from_engine(thd, db, table_name);
  thd->lex= old_lex;
  return res;
}

6721 6722 6723
/*
  find all tables in ndb and discover those needed
*/
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6724
int ndbcluster_find_all_files(THD *thd)
6725 6726 6727
{
  Ndb* ndb;
  char key[FN_REFLEN];
6728 6729 6730
  NDBDICT *dict;
  int unhandled, retries= 5, skipped;
  DBUG_ENTER("ndbcluster_find_all_files");
6731 6732 6733 6734

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

6735
  dict= ndb->getDictionary();
6736

6737 6738
  LINT_INIT(unhandled);
  LINT_INIT(skipped);
6739 6740
  do
  {
jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
6741
    NdbDictionary::Dictionary::List list;
6742 6743 6744
    if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
      ERR_RETURN(dict->getNdbError());
    unhandled= 0;
6745 6746
    skipped= 0;
    retries--;
6747 6748 6749
    for (uint i= 0 ; i < list.count ; i++)
    {
      NDBDICT::List::Element& elmt= list.elements[i];
6750
      if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6751 6752 6753 6754
      {
        DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
        continue;
      }
6755
      DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name));
6756 6757 6758
      if (elmt.state != NDBOBJ::StateOnline &&
          elmt.state != NDBOBJ::StateBackup &&
          elmt.state != NDBOBJ::StateBuilding)
6759 6760 6761
      {
        sql_print_information("NDB: skipping setup table %s.%s, in state %d",
                              elmt.database, elmt.name, elmt.state);
6762
        skipped++;
6763 6764 6765 6766
        continue;
      }

      ndb->setDatabaseName(elmt.database);
6767 6768 6769
      Ndb_table_guard ndbtab_g(dict, elmt.name);
      const NDBTAB *ndbtab= ndbtab_g.get_table();
      if (!ndbtab)
6770
      {
6771
        if (retries == 0)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6772 6773 6774 6775
          sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s",
                          elmt.database, elmt.name,
                          dict->getNdbError().code,
                          dict->getNdbError().message);
6776 6777 6778 6779 6780 6781 6782
        unhandled++;
        continue;
      }

      if (ndbtab->getFrmLength() == 0)
        continue;
    
6783
      /* check if database exists */
6784
      char *end= key +
6785
        build_table_filename(key, sizeof(key), elmt.database, "", "", 0);
6786 6787 6788 6789 6790
      if (my_access(key, F_OK))
      {
        /* no such database defined, skip table */
        continue;
      }
6791 6792 6793
      /* finalize construction of path */
      end+= tablename_to_filename(elmt.name, end,
                                  sizeof(key)-(end-key));
6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805
      const void *data= 0, *pack_data= 0;
      uint length, pack_length;
      int discover= 0;
      if (readfrm(key, &data, &length) ||
          packfrm(data, length, &pack_data, &pack_length))
      {
        discover= 1;
        sql_print_information("NDB: missing frm for %s.%s, discovering...",
                              elmt.database, elmt.name);
      }
      else if (cmp_frm(ndbtab, pack_data, pack_length))
      {
6806
        /* ndb_share reference temporary */
6807
        NDB_SHARE *share= get_share(key, 0, FALSE);
6808 6809 6810 6811 6812
        if (share)
        {
          DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                                   share->key, share->use_count));
        }
6813
        if (!share || get_ndb_share_state(share) != NSS_ALTERED)
6814 6815 6816 6817 6818
        {
          discover= 1;
          sql_print_information("NDB: mismatch in frm for %s.%s, discovering...",
                                elmt.database, elmt.name);
        }
6819
        if (share)
6820 6821 6822 6823
        {
          /* ndb_share reference temporary free */
          DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                                   share->key, share->use_count));
6824
          free_share(&share);
6825
        }
6826 6827 6828 6829
      }
      my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR));
      my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR));

6830
      pthread_mutex_lock(&LOCK_open);
6831 6832 6833
      if (discover)
      {
        /* ToDo 4.1 database needs to be created if missing */
6834
        if (ndb_create_table_from_engine(thd, elmt.database, elmt.name))
6835 6836 6837 6838
        {
          /* ToDo 4.1 handle error */
        }
      }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6839
#ifdef HAVE_NDB_BINLOG
6840
      else
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6841 6842
      {
        /* set up replication for this table */
6843 6844 6845
        ndbcluster_create_binlog_setup(ndb, key, end-key,
                                       elmt.database, elmt.name,
                                       TRUE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6846 6847
      }
#endif
6848
      pthread_mutex_unlock(&LOCK_open);
6849 6850
    }
  }
6851
  while (unhandled && retries);
6852

6853
  DBUG_RETURN(-(skipped + unhandled));
6854
}
6855

6856 6857 6858
int ndbcluster_find_files(handlerton *hton, THD *thd,
                          const char *db,
                          const char *path,
6859
                          const char *wild, bool dir, List<char> *files)
6860
{
6861 6862 6863
  DBUG_ENTER("ndbcluster_find_files");
  DBUG_PRINT("enter", ("db: %s", db));
  { // extra bracket to avoid gcc 2.95.3 warning
6864
  uint i;
6865
  Ndb* ndb;
6866
  char name[FN_REFLEN];
6867
  HASH ndb_tables, ok_tables;
6868
  NDBDICT::List list;
6869 6870 6871 6872

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

6873
  if (dir)
6874
    DBUG_RETURN(0); // Discover of databases not yet supported
6875

6876
  // List tables in NDB
6877
  NDBDICT *dict= ndb->getDictionary();
6878
  if (dict->listObjects(list, 
6879
                        NdbDictionary::Object::UserTable) != 0)
6880
    ERR_RETURN(dict->getNdbError());
6881

6882
  if (hash_init(&ndb_tables, system_charset_info,list.count,0,0,
6883
                (hash_get_key)tables_get_key,0,0))
6884 6885 6886 6887 6888 6889
  {
    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
    DBUG_RETURN(-1);
  }

  if (hash_init(&ok_tables, system_charset_info,32,0,0,
6890
                (hash_get_key)tables_get_key,0,0))
6891 6892 6893 6894 6895 6896
  {
    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
    hash_free(&ndb_tables);
    DBUG_RETURN(-1);
  }  

6897 6898
  for (i= 0 ; i < list.count ; i++)
  {
6899
    NDBDICT::List::Element& elmt= list.elements[i];
6900
    if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6901 6902 6903 6904
    {
      DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
      continue;
    }
6905
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
6906

6907
    // Add only tables that belongs to db
6908
    if (my_strcasecmp(system_charset_info, elmt.database, db))
6909
      continue;
6910

6911
    // Apply wildcard to list of tables in NDB
6912
    if (wild)
6913
    {
6914 6915
      if (lower_case_table_names)
      {
6916
        if (wild_case_compare(files_charset_info, elmt.name, wild))
6917
          continue;
6918
      }
6919
      else if (wild_compare(elmt.name,wild,0))
6920
        continue;
6921
    }
6922 6923
    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));     
    my_hash_insert(&ndb_tables, (byte*)thd->strdup(elmt.name));
6924 6925
  }

6926 6927 6928 6929 6930
  char *file_name;
  List_iterator<char> it(*files);
  List<char> delete_list;
  while ((file_name=it++))
  {
6931
    bool file_on_disk= FALSE;
6932 6933 6934 6935
    DBUG_PRINT("info", ("%s", file_name));     
    if (hash_search(&ndb_tables, file_name, strlen(file_name)))
    {
      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name));
6936
      file_on_disk= TRUE;
6937 6938
    }
    
6939
    // Check for .ndb file with this name
6940
    build_table_filename(name, sizeof(name), db, file_name, ha_ndb_ext, 0);
6941
    DBUG_PRINT("info", ("Check access for %s", name));
6942
    if (my_access(name, F_OK))
6943 6944 6945
    {
      DBUG_PRINT("info", ("%s did not exist on disk", name));     
      // .ndb file did not exist on disk, another table type
6946
      if (file_on_disk)
6947 6948 6949 6950 6951
      {
	// Ignore this ndb table
	gptr record=  hash_search(&ndb_tables, file_name, strlen(file_name));
	DBUG_ASSERT(record);
	hash_delete(&ndb_tables, record);
6952 6953 6954 6955
	push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
			    ER_TABLE_EXISTS_ERROR,
			    "Local table %s.%s shadows ndb table",
			    db, file_name);
6956
      }
6957 6958 6959 6960
      continue;
    }
    if (file_on_disk) 
    {
6961
      // File existed in NDB and as frm file, put in ok_tables list
6962
      my_hash_insert(&ok_tables, (byte*)file_name);
6963
      continue;
6964
    }
6965 6966 6967
    DBUG_PRINT("info", ("%s existed on disk", name));     
    // The .ndb file exists on disk, but it's not in list of tables in ndb
    // Verify that handler agrees table is gone.
6968
    if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name) == HA_ERR_NO_SUCH_TABLE)    
6969 6970 6971 6972 6973 6974 6975
    {
      DBUG_PRINT("info", ("NDB says %s does not exists", file_name));     
      it.remove();
      // Put in list of tables to remove from disk
      delete_list.push_back(thd->strdup(file_name));
    }
  }
6976

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6977 6978 6979
#ifdef HAVE_NDB_BINLOG
  /* setup logging to binlog for all discovered tables */
  {
6980
    char *end, *end1= name +
6981
      build_table_filename(name, sizeof(name), db, "", "", 0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6982 6983 6984
    for (i= 0; i < ok_tables.records; i++)
    {
      file_name= (char*)hash_element(&ok_tables, i);
6985 6986
      end= end1 +
        tablename_to_filename(file_name, end1, sizeof(name) - (end1 - name));
6987 6988 6989 6990
      pthread_mutex_lock(&LOCK_open);
      ndbcluster_create_binlog_setup(ndb, name, end-name,
                                     db, file_name, TRUE);
      pthread_mutex_unlock(&LOCK_open);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
6991 6992 6993 6994
    }
  }
#endif

6995 6996 6997 6998
  // Check for new files to discover
  DBUG_PRINT("info", ("Checking for new files to discover"));       
  List<char> create_list;
  for (i= 0 ; i < ndb_tables.records ; i++)
6999
  {
7000 7001
    file_name= hash_element(&ndb_tables, i);
    if (!hash_search(&ok_tables, file_name, strlen(file_name)))
7002
    {
7003
      build_table_filename(name, sizeof(name), db, file_name, reg_ext, 0);
7004
      if (my_access(name, F_OK))
7005 7006 7007 7008 7009 7010
      {
        DBUG_PRINT("info", ("%s must be discovered", file_name));
        // File is in list of ndb tables and not in ok_tables
        // This table need to be created
        create_list.push_back(thd->strdup(file_name));
      }
7011 7012
    }
  }
7013

7014 7015
  // Lock mutex before deleting and creating frm files
  pthread_mutex_lock(&LOCK_open);
7016

7017 7018 7019 7020 7021
  if (!global_read_lock)
  {
    // Delete old files
    List_iterator_fast<char> it3(delete_list);
    while ((file_name=it3++))
7022 7023
    {
      DBUG_PRINT("info", ("Remove table %s/%s", db, file_name));
7024 7025 7026 7027
      // Delete the table and all related files
      TABLE_LIST table_list;
      bzero((char*) &table_list,sizeof(table_list));
      table_list.db= (char*) db;
7028
      table_list.alias= table_list.table_name= (char*)file_name;
7029
      (void)mysql_rm_table_part2(thd, &table_list,
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7030 7031 7032 7033
                                                                 /* if_exists */ FALSE,
                                                                 /* drop_temporary */ FALSE,
                                                                 /* drop_view */ FALSE,
                                                                 /* dont_log_query*/ TRUE);
7034 7035
      /* Clear error message that is returned when table is deleted */
      thd->clear_error();
7036 7037 7038
    }
  }

7039 7040 7041 7042
  // Create new files
  List_iterator_fast<char> it2(create_list);
  while ((file_name=it2++))
  {  
7043
    DBUG_PRINT("info", ("Table %s need discovery", file_name));
7044
    if (ndb_create_table_from_engine(thd, db, file_name) == 0)
7045
      files->push_back(thd->strdup(file_name)); 
7046 7047
  }

7048
  pthread_mutex_unlock(&LOCK_open);
7049 7050
  
  hash_free(&ok_tables);
7051
  hash_free(&ndb_tables);
7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068

  // Delete schema file from files
  if (!strcmp(db, NDB_REP_DB))
  {
    uint count = 0;
    while (count++ < files->elements)
    {
      file_name = (char *)files->pop();
      if (!strcmp(file_name, NDB_SCHEMA_TABLE))
      {
        DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user",
                   NDB_REP_DB, NDB_SCHEMA_TABLE));
        continue;
      }
      files->push_back(file_name); 
    }
  }
7069
  } // extra bracket to avoid gcc 2.95.3 warning
7070
  DBUG_RETURN(0);    
7071 7072 7073 7074 7075 7076 7077 7078
}


/*
  Initialise all gloal variables before creating 
  a NDB Cluster table handler
 */

7079 7080 7081
/* Call back after cluster connect */
static int connect_callback()
{
7082
  pthread_mutex_lock(&LOCK_ndb_util_thread);
7083
  update_status_variables(g_ndb_cluster_connection);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7084 7085 7086 7087 7088 7089 7090

  uint node_id, i= 0;
  Ndb_cluster_connection_node_iter node_iter;
  memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map));
  while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter)))
    g_node_id_map[node_id]= i++;

7091
  pthread_cond_signal(&COND_ndb_util_thread);
7092
  pthread_mutex_unlock(&LOCK_ndb_util_thread);
7093 7094 7095
  return 0;
}

7096
extern int ndb_dictionary_is_mysqld;
7097
extern pthread_mutex_t LOCK_plugin;
7098

7099
static int ndbcluster_init(void *p)
7100
{
7101
  int res;
7102
  DBUG_ENTER("ndbcluster_init");
7103

7104 7105 7106
  if (ndbcluster_inited)
    DBUG_RETURN(FALSE);

7107 7108 7109 7110 7111 7112 7113
  /*
    Below we create new THD's. They'll need LOCK_plugin, but it's taken now by
    plugin initialization code. Release it to avoid deadlocks.  It's safe, as
    there're no threads that may concurrently access plugin control structures.
  */
  pthread_mutex_unlock(&LOCK_plugin);

7114 7115 7116 7117 7118 7119
  pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
  pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&COND_ndb_util_thread, NULL);
  pthread_cond_init(&COND_ndb_util_ready, NULL);
  ndb_util_thread_running= -1;
  ndbcluster_terminating= 0;
7120
  ndb_dictionary_is_mysqld= 1;
7121 7122 7123 7124
  ndbcluster_hton= (handlerton *)p;

  {
    handlerton *h= ndbcluster_hton;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7125
    h->state=            SHOW_OPTION_YES;
7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137
    h->db_type=          DB_TYPE_NDBCLUSTER;
    h->close_connection= ndbcluster_close_connection;
    h->commit=           ndbcluster_commit;
    h->rollback=         ndbcluster_rollback;
    h->create=           ndbcluster_create_handler; /* Create a new handler */
    h->drop_database=    ndbcluster_drop_database;  /* Drop a database */
    h->panic=            ndbcluster_end;            /* Panic call */
    h->show_status=      ndbcluster_show_status;    /* Show status */
    h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */
    h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */
    h->alter_table_flags=ndbcluster_alter_table_flags; /* Alter table flags */
    h->fill_files_table= ndbcluster_fill_files_table;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7138 7139 7140
#ifdef HAVE_NDB_BINLOG
    ndbcluster_binlog_init_handlerton();
#endif
7141 7142 7143 7144
    h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED;
    h->discover=         ndbcluster_discover;
    h->find_files= ndbcluster_find_files;
    h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7145 7146
  }

7147 7148 7149
  // Initialize ndb interface
  ndb_init_internal();

7150
  // Set connectstring if specified
7151 7152
  if (opt_ndbcluster_connectstring != 0)
    DBUG_PRINT("connectstring", ("%s", opt_ndbcluster_connectstring));     
7153
  if ((g_ndb_cluster_connection=
7154
       new Ndb_cluster_connection(opt_ndbcluster_connectstring)) == 0)
7155
  {
7156
    DBUG_PRINT("error",("Ndb_cluster_connection(%s)",
7157
                        opt_ndbcluster_connectstring));
7158
    my_errno= HA_ERR_OUT_OF_MEM;
7159
    goto ndbcluster_init_error;
7160
  }
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
7161 7162
  {
    char buf[128];
7163
    my_snprintf(buf, sizeof(buf), "mysqld --server-id=%lu", server_id);
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
7164 7165
    g_ndb_cluster_connection->set_name(buf);
  }
7166 7167 7168
  g_ndb_cluster_connection->set_optimized_node_selection
    (opt_ndb_optimized_node_selection);

7169
  // Create a Ndb object to open the connection  to NDB
7170 7171 7172
  if ( (g_ndb= new Ndb(g_ndb_cluster_connection, "sys")) == 0 )
  {
    DBUG_PRINT("error", ("failed to create global ndb object"));
7173
    my_errno= HA_ERR_OUT_OF_MEM;
7174 7175
    goto ndbcluster_init_error;
  }
7176 7177 7178
  if (g_ndb->init() != 0)
  {
    ERR_PRINT (g_ndb->getNdbError());
7179
    goto ndbcluster_init_error;
7180
  }
7181

7182
  if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0)
7183
  {
7184
    connect_callback();
7185
    DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d",
7186 7187
                       g_ndb_cluster_connection->get_connected_host(),
                       g_ndb_cluster_connection->get_connected_port()));
7188
    g_ndb_cluster_connection->wait_until_ready(10,3);
7189
  } 
7190
  else if (res == 1)
7191
  {
7192
    if (g_ndb_cluster_connection->start_connect_thread(connect_callback)) 
7193
    {
7194
      DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()"));
7195 7196
      goto ndbcluster_init_error;
    }
7197
#ifndef DBUG_OFF
7198 7199
    {
      char buf[1024];
7200
      DBUG_PRINT("info",
7201 7202 7203 7204
                 ("NDBCLUSTER storage engine not started, "
                  "will connect using %s",
                  g_ndb_cluster_connection->
                  get_connectstring(buf,sizeof(buf))));
7205
    }
7206
#endif
7207
  }
7208
  else
7209 7210 7211
  {
    DBUG_ASSERT(res == -1);
    DBUG_PRINT("error", ("permanent error"));
7212
    goto ndbcluster_init_error;
7213
  }
7214
  
7215 7216
  (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0,
                   (hash_get_key) ndbcluster_get_key,0,0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7217 7218
#ifdef HAVE_NDB_BINLOG
  /* start the ndb injector thread */
7219 7220
  if (ndbcluster_binlog_start())
    goto ndbcluster_init_error;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7221
#endif /* HAVE_NDB_BINLOG */
7222

jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
7223
  ndb_cache_check_time = opt_ndb_cache_check_time;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7224 7225 7226 7227 7228
  // Create utility thread
  pthread_t tmp;
  if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
  {
    DBUG_PRINT("error", ("Could not create ndb utility thread"));
7229 7230 7231 7232
    hash_free(&ndbcluster_open_tables);
    pthread_mutex_destroy(&ndbcluster_mutex);
    pthread_mutex_destroy(&LOCK_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_thread);
7233
    pthread_cond_destroy(&COND_ndb_util_ready);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7234 7235
    goto ndbcluster_init_error;
  }
7236

7237 7238
  /* Wait for the util thread to start */
  pthread_mutex_lock(&LOCK_ndb_util_thread);
7239 7240
  while (ndb_util_thread_running < 0)
    pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread);
7241
  pthread_mutex_unlock(&LOCK_ndb_util_thread);
7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252
  
  if (!ndb_util_thread_running)
  {
    DBUG_PRINT("error", ("ndb utility thread exited prematurely"));
    hash_free(&ndbcluster_open_tables);
    pthread_mutex_destroy(&ndbcluster_mutex);
    pthread_mutex_destroy(&LOCK_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_ready);
    goto ndbcluster_init_error;
  }
7253

7254 7255
  pthread_mutex_lock(&LOCK_plugin);

7256
  ndbcluster_inited= 1;
7257
  DBUG_RETURN(FALSE);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7258

7259
ndbcluster_init_error:
7260
  if (g_ndb)
7261 7262 7263 7264 7265
    delete g_ndb;
  g_ndb= NULL;
  if (g_ndb_cluster_connection)
    delete g_ndb_cluster_connection;
  g_ndb_cluster_connection= NULL;
7266 7267
  ndbcluster_hton->state= SHOW_OPTION_DISABLED;               // If we couldn't use handler

7268 7269
  pthread_mutex_lock(&LOCK_plugin);

7270
  DBUG_RETURN(TRUE);
7271 7272
}

7273
static int ndbcluster_end(handlerton *hton, ha_panic_function type)
7274 7275
{
  DBUG_ENTER("ndbcluster_end");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7276

7277 7278
  if (!ndbcluster_inited)
    DBUG_RETURN(0);
7279 7280 7281
  ndbcluster_inited= 0;

  /* wait for util thread to finish */
7282
  sql_print_information("Stopping Cluster Utility thread");
7283
  pthread_mutex_lock(&LOCK_ndb_util_thread);
7284 7285 7286 7287
  ndbcluster_terminating= 1;
  pthread_cond_signal(&COND_ndb_util_thread);
  while (ndb_util_thread_running > 0)
    pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread);
7288 7289
  pthread_mutex_unlock(&LOCK_ndb_util_thread);

7290

7291 7292 7293
#ifdef HAVE_NDB_BINLOG
  {
    pthread_mutex_lock(&ndbcluster_mutex);
7294
    while (ndbcluster_open_tables.records)
7295 7296
    {
      NDB_SHARE *share=
7297
        (NDB_SHARE*) hash_element(&ndbcluster_open_tables, 0);
7298 7299 7300 7301
#ifndef DBUG_OFF
      fprintf(stderr, "NDB: table share %s with use_count %d not freed\n",
              share->key, share->use_count);
#endif
7302
      ndbcluster_real_free_share(&share);
7303 7304 7305 7306 7307 7308
    }
    pthread_mutex_unlock(&ndbcluster_mutex);
  }
#endif
  hash_free(&ndbcluster_open_tables);

7309
  if (g_ndb)
7310 7311
  {
#ifndef DBUG_OFF
7312 7313
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
7314 7315 7316 7317 7318 7319 7320 7321 7322 7323
    while (g_ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
7324
    delete g_ndb;
7325
    g_ndb= NULL;
7326
  }
7327
  delete g_ndb_cluster_connection;
7328
  g_ndb_cluster_connection= NULL;
7329

7330 7331 7332
  // cleanup ndb interface
  ndb_end_internal();

7333
  pthread_mutex_destroy(&ndbcluster_mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7334 7335
  pthread_mutex_destroy(&LOCK_ndb_util_thread);
  pthread_cond_destroy(&COND_ndb_util_thread);
7336
  pthread_cond_destroy(&COND_ndb_util_ready);
7337 7338 7339
  DBUG_RETURN(0);
}

7340 7341 7342
void ha_ndbcluster::print_error(int error, myf errflag)
{
  DBUG_ENTER("ha_ndbcluster::print_error");
7343
  DBUG_PRINT("enter", ("error: %d", error));
7344 7345

  if (error == HA_ERR_NO_PARTITION_FOUND)
7346
    m_part_info->print_no_partition_found(table);
7347 7348 7349 7350 7351 7352
  else
    handler::print_error(error, errflag);
  DBUG_VOID_RETURN;
}


7353 7354 7355 7356 7357
/*
  Static error print function called from
  static handler method ndbcluster_commit
  and ndbcluster_rollback
*/
7358 7359

void ndbcluster_print_error(int error, const NdbOperation *error_op)
7360
{
7361
  DBUG_ENTER("ndbcluster_print_error");
7362
  TABLE_SHARE share;
7363
  const char *tab_name= (error_op) ? error_op->getTableName() : "";
7364 7365 7366 7367
  share.db.str= (char*) "";
  share.db.length= 0;
  share.table_name.str= (char *) tab_name;
  share.table_name.length= strlen(tab_name);
7368
  ha_ndbcluster error_handler(ndbcluster_hton, &share);
7369
  error_handler.print_error(error, MYF(0));
ndbdev@ndbmaster.mysql.com's avatar
ndbdev@ndbmaster.mysql.com committed
7370
  DBUG_VOID_RETURN;
7371
}
7372

7373 7374 7375
/**
 * Set a given location from full pathname to database name
 *
7376
 */
7377
void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
7378
{
7379 7380 7381 7382
  char *end, *ptr, *tmp_name;
  char tmp_buff[FN_REFLEN];
 
  tmp_name= tmp_buff;
7383
  /* Scan name from the end */
7384 7385 7386 7387 7388 7389
  ptr= strend(path_name)-1;
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  ptr--;
  end= ptr;
7390 7391 7392 7393
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  uint name_len= end - ptr;
7394 7395
  memcpy(tmp_name, ptr + 1, name_len);
  tmp_name[name_len]= '\0';
7396 7397
#ifdef __WIN__
  /* Put to lower case */
7398
  
7399
  ptr= tmp_name;
7400 7401
  
  while (*ptr != '\0') {
7402
    *ptr= tolower(*ptr);
7403 7404 7405
    ptr++;
  }
#endif
7406
  filename_to_tablename(tmp_name, dbname, FN_REFLEN);
7407 7408
}

7409 7410 7411 7412 7413 7414 7415 7416 7417
/*
  Set m_dbname from full pathname to table file
 */

void ha_ndbcluster::set_dbname(const char *path_name)
{
  set_dbname(path_name, m_dbname);
}

7418 7419 7420 7421 7422 7423 7424
/**
 * Set a given location from full pathname to table file
 *
 */
void
ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
{
7425 7426 7427 7428
  char *end, *ptr, *tmp_name;
  char tmp_buff[FN_REFLEN];

  tmp_name= tmp_buff;
7429
  /* Scan name from the end */
7430 7431
  end= strend(path_name)-1;
  ptr= end;
7432 7433 7434
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
7435
  uint name_len= end - ptr;
7436 7437
  memcpy(tmp_name, ptr + 1, end - ptr);
  tmp_name[name_len]= '\0';
7438 7439
#ifdef __WIN__
  /* Put to lower case */
7440
  ptr= tmp_name;
7441 7442 7443 7444 7445 7446
  
  while (*ptr != '\0') {
    *ptr= tolower(*ptr);
    ptr++;
  }
#endif
7447
  filename_to_tablename(tmp_name, tabname, FN_REFLEN);
7448 7449 7450
}

/*
7451
  Set m_tabname from full pathname to table file 
7452 7453
 */

7454
void ha_ndbcluster::set_tabname(const char *path_name)
7455
{
7456
  set_tabname(path_name, m_tabname);
7457 7458 7459 7460
}


ha_rows 
7461 7462 7463 7464
ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
                                key_range *max_key)
{
  KEY *key_info= table->key_info + inx;
7465
  uint key_length= key_info->key_length;
7466
  NDB_INDEX_TYPE idx_type= get_index_type(inx);  
7467 7468

  DBUG_ENTER("records_in_range");
7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481
  // Prevent partial read of hash indexes by returning HA_POS_ERROR
  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
      ((min_key && min_key->length < key_length) ||
       (max_key && max_key->length < key_length)))
    DBUG_RETURN(HA_POS_ERROR);
  
  // Read from hash index with full key
  // This is a "const" table which returns only one record!      
  if ((idx_type != ORDERED_INDEX) &&
      ((min_key && min_key->length == key_length) || 
       (max_key && max_key->length == key_length)))
    DBUG_RETURN(1);
  
7482 7483 7484 7485 7486 7487
  if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
       idx_type == UNIQUE_ORDERED_INDEX ||
       idx_type == ORDERED_INDEX) &&
    m_index[inx].index_stat != NULL)
  {
    NDB_INDEX_DATA& d=m_index[inx];
7488
    const NDBINDEX* index= d.index;
7489 7490 7491 7492 7493 7494 7495 7496 7497 7498
    Ndb* ndb=get_ndb();
    NdbTransaction* trans=NULL;
    NdbIndexScanOperation* op=NULL;
    int res=0;
    Uint64 rows;

    do
    {
      // We must provide approx table rows
      Uint64 table_rows=0;
7499 7500
      Ndb_local_table_statistics *ndb_info= m_table_info;
      if (ndb_info->records != ~(ha_rows)0 && ndb_info->records != 0)
7501
      {
7502 7503
        table_rows = ndb_info->records;
        DBUG_PRINT("info", ("use info->records: %lu", (ulong) table_rows));
7504 7505 7506 7507
      }
      else
      {
        Ndb_statistics stat;
7508
        if ((res=ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat)))
7509 7510
          break;
        table_rows=stat.row_count;
7511
        DBUG_PRINT("info", ("use db row_count: %lu", (ulong) table_rows));
7512 7513 7514 7515 7516 7517 7518 7519 7520 7521
        if (table_rows == 0) {
          // Problem if autocommit=0
#ifdef ndb_get_table_statistics_uses_active_trans
          rows=0;
          break;
#endif
        }
      }

      // Define scan op for the range
7522 7523
      if ((trans=m_active_trans) == NULL || 
	  trans->commitStatus() != NdbTransaction::Started)
7524 7525 7526 7527 7528 7529 7530 7531 7532 7533
      {
        DBUG_PRINT("info", ("no active trans"));
        if (! (trans=ndb->startTransaction()))
          ERR_BREAK(ndb->getNdbError(), res);
      }
      if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table)))
        ERR_BREAK(trans->getNdbError(), res);
      if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1)
        ERR_BREAK(op->getNdbError(), res);
      const key_range *keys[2]={ min_key, max_key };
7534
      if ((res=set_bounds(op, inx, TRUE, keys)) != 0)
7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559
        break;

      // Decide if db should be contacted
      int flags=0;
      if (d.index_stat_query_count < d.index_stat_cache_entries ||
          (d.index_stat_update_freq != 0 &&
           d.index_stat_query_count % d.index_stat_update_freq == 0))
      {
        DBUG_PRINT("info", ("force stat from db"));
        flags|=NdbIndexStat::RR_UseDb;
      }
      if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1)
        ERR_BREAK(d.index_stat->getNdbError(), res);
      d.index_stat_query_count++;
    } while (0);

    if (trans != m_active_trans && rows == 0)
      rows = 1;
    if (trans != m_active_trans && trans != NULL)
      ndb->closeTransaction(trans);
    if (res != 0)
      DBUG_RETURN(HA_POS_ERROR);
    DBUG_RETURN(rows);
  }

7560
  DBUG_RETURN(10); /* Good guess when you don't know anything */
7561 7562
}

7563
ulonglong ha_ndbcluster::table_flags(void) const
7564 7565
{
  if (m_ha_not_exact_count)
7566 7567
    return m_table_flags & ~HA_STATS_RECORDS_IS_EXACT;
  return m_table_flags;
7568 7569 7570
}
const char * ha_ndbcluster::table_type() const 
{
7571
  return("NDBCLUSTER");
7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588
}
uint ha_ndbcluster::max_supported_record_length() const
{ 
  return NDB_MAX_TUPLE_SIZE;
}
uint ha_ndbcluster::max_supported_keys() const
{
  return MAX_KEY;
}
uint ha_ndbcluster::max_supported_key_parts() const 
{
  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
}
uint ha_ndbcluster::max_supported_key_length() const
{
  return NDB_MAX_KEY_SIZE;
}
pekka@mysql.com's avatar
pekka@mysql.com committed
7589 7590 7591 7592
uint ha_ndbcluster::max_supported_key_part_length() const
{
  return NDB_MAX_KEY_SIZE;
}
7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613
bool ha_ndbcluster::low_byte_first() const
{ 
#ifdef WORDS_BIGENDIAN
  return FALSE;
#else
  return TRUE;
#endif
}
const char* ha_ndbcluster::index_type(uint key_number)
{
  switch (get_index_type(key_number)) {
  case ORDERED_INDEX:
  case UNIQUE_ORDERED_INDEX:
  case PRIMARY_KEY_ORDERED_INDEX:
    return "BTREE";
  case UNIQUE_INDEX:
  case PRIMARY_KEY_INDEX:
  default:
    return "HASH";
  }
}
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7614

7615 7616
uint8 ha_ndbcluster::table_cache_type()
{
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7617 7618 7619 7620 7621 7622
  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
}


uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname,
7623
                         Uint64 *commit_count)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7624
{
7625 7626
  char name[FN_REFLEN];
  NDB_SHARE *share;
7627 7628
  DBUG_ENTER("ndb_get_commitcount");

7629
  build_table_filename(name, sizeof(name), dbname, tabname, "", 0);
7630 7631 7632 7633 7634 7635 7636
  DBUG_PRINT("enter", ("name: %s", name));
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) name,
                                       strlen(name))))
  {
    pthread_mutex_unlock(&ndbcluster_mutex);
7637
    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name));
7638 7639
    DBUG_RETURN(1);
  }
7640
  /* ndb_share reference temporary, free below */
7641
  share->use_count++;
7642 7643
  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                           share->key, share->use_count));
7644 7645 7646
  pthread_mutex_unlock(&ndbcluster_mutex);

  pthread_mutex_lock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7647 7648
  if (ndb_cache_check_time > 0)
  {
7649
    if (share->commit_count != 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7650
    {
7651
      *commit_count= share->commit_count;
7652
#ifndef DBUG_OFF
7653
      char buff[22];
7654
#endif
7655 7656
      DBUG_PRINT("info", ("Getting commit_count: %s from share",
                          llstr(share->commit_count, buff)));
7657
      pthread_mutex_unlock(&share->mutex);
7658 7659 7660
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key, share->use_count));
7661
      free_share(&share);
7662
      DBUG_RETURN(0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7663 7664
    }
  }
7665
  DBUG_PRINT("info", ("Get commit_count from NDB"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7666 7667 7668
  Ndb *ndb;
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(1);
7669 7670 7671 7672
  if (ndb->setDatabaseName(dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
7673 7674
  uint lock= share->commit_count_lock;
  pthread_mutex_unlock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7675 7676

  struct Ndb_statistics stat;
7677
  {
7678 7679
    Ndb_table_guard ndbtab_g(ndb->getDictionary(), tabname);
    if (ndbtab_g.get_table() == 0
7680
        || ndb_get_table_statistics(NULL, FALSE, ndb, ndbtab_g.get_table(), &stat))
7681
    {
7682 7683 7684
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key, share->use_count));
7685 7686 7687
      free_share(&share);
      DBUG_RETURN(1);
    }
7688 7689 7690
  }

  pthread_mutex_lock(&share->mutex);
7691
  if (share->commit_count_lock == lock)
7692
  {
7693
#ifndef DBUG_OFF
7694
    char buff[22];
7695
#endif
7696 7697
    DBUG_PRINT("info", ("Setting commit_count to %s",
                        llstr(stat.commit_count, buff)));
7698 7699 7700 7701 7702 7703 7704 7705 7706
    share->commit_count= stat.commit_count;
    *commit_count= stat.commit_count;
  }
  else
  {
    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
    *commit_count= 0;
  }
  pthread_mutex_unlock(&share->mutex);
7707 7708 7709
  /* ndb_share reference temporary free */
  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                           share->key, share->use_count));
7710
  free_share(&share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746
  DBUG_RETURN(0);
}


/*
  Check if a cached query can be used.
  This is done by comparing the supplied engine_data to commit_count of
  the table.
  The commit_count is either retrieved from the share for the table, where
  it has been cached by the util thread. If the util thread is not started,
  NDB has to be contacetd to retrieve the commit_count, this will introduce
  a small delay while waiting for NDB to answer.


  SYNOPSIS
  ndbcluster_cache_retrieval_allowed
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1

    engine_data    parameter retrieved when query was first inserted into
                   the cache. If the value of engine_data is changed,
                   all queries for this table should be invalidated.

  RETURN VALUE
    TRUE  Yes, use the query from cache
    FALSE No, don't use the cached query, and if engine_data
          has changed, all queries for this table should be invalidated

*/

static my_bool
ndbcluster_cache_retrieval_allowed(THD *thd,
7747 7748
                                   char *full_name, uint full_name_len,
                                   ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7749 7750 7751 7752 7753
{
  Uint64 commit_count;
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
  char *dbname= full_name;
  char *tabname= dbname+strlen(dbname)+1;
7754
#ifndef DBUG_OFF
7755
  char buff[22], buff2[22];
7756
#endif
7757
  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
7758 7759
  DBUG_PRINT("enter", ("dbname: %s, tabname: %s, is_autocommit: %d",
                       dbname, tabname, is_autocommit));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7760 7761

  if (!is_autocommit)
7762 7763
  {
    DBUG_PRINT("exit", ("No, don't use cache in transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7764
    DBUG_RETURN(FALSE);
7765
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7766 7767 7768

  if (ndb_get_commitcount(thd, dbname, tabname, &commit_count))
  {
7769 7770
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7771 7772
    DBUG_RETURN(FALSE);
  }
7773 7774
  DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",
                      llstr(*engine_data, buff), llstr(commit_count, buff2)));
7775
  if (commit_count == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7776
  {
7777 7778
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, local commit has been performed"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7779 7780
    DBUG_RETURN(FALSE);
  }
7781 7782 7783 7784 7785 7786
  else if (*engine_data != commit_count)
  {
    *engine_data= commit_count; /* invalidate */
     DBUG_PRINT("exit", ("No, commit_count has changed"));
     DBUG_RETURN(FALSE);
   }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7787

7788 7789
  DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",
                      llstr(*engine_data, buff)));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817
  DBUG_RETURN(TRUE);
}


/**
   Register a table for use in the query cache. Fetch the commit_count
   for the table and return it in engine_data, this will later be used
   to check if the table has changed, before the cached query is reused.

   SYNOPSIS
   ha_ndbcluster::can_query_cache_table
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1
    qc_engine_callback  function to be called before using cache on this table
    engine_data    out, commit_count for this table

  RETURN VALUE
    TRUE  Yes, it's ok to cahce this query
    FALSE No, don't cach the query

*/

my_bool
ha_ndbcluster::register_query_cache_table(THD *thd,
7818 7819 7820
                                          char *full_name, uint full_name_len,
                                          qc_engine_callback *engine_callback,
                                          ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7821
{
7822
  Uint64 commit_count;
7823
#ifndef DBUG_OFF
7824
  char buff[22];
7825
#endif
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7826
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
7827
  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
7828 7829 7830
  DBUG_PRINT("enter",("dbname: %s, tabname: %s, is_autocommit: %d",
		      m_dbname, m_tabname, is_autocommit));

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7831
  if (!is_autocommit)
7832
  {
serg@serg.mylan's avatar
serg@serg.mylan committed
7833
    DBUG_PRINT("exit", ("Can't register table during transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7834
    DBUG_RETURN(FALSE);
7835
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7836 7837 7838 7839

  if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count))
  {
    *engine_data= 0;
serg@serg.mylan's avatar
serg@serg.mylan committed
7840
    DBUG_PRINT("exit", ("Error, could not get commitcount"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7841 7842 7843 7844
    DBUG_RETURN(FALSE);
  }
  *engine_data= commit_count;
  *engine_callback= ndbcluster_cache_retrieval_allowed;
7845
  DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));
7846
  DBUG_RETURN(commit_count > 0);
7847
}
7848

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7849

7850
/*
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
7851
  Handling the shared NDB_SHARE structure that is needed to
7852 7853 7854 7855 7856 7857
  provide table locking.
  It's also used for sharing data with other NDB handlers
  in the same MySQL Server. There is currently not much
  data we want to or can share.
 */

7858
static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
7859
                                my_bool not_used __attribute__((unused)))
7860
{
7861 7862 7863 7864
  *length= share->key_length;
  return (byte*) share->key;
}

7865

7866
#ifndef DBUG_OFF
7867 7868

static void print_share(const char* where, NDB_SHARE* share)
7869
{
7870
  fprintf(DBUG_FILE,
7871
          "%s %s.%s: use_count: %u, commit_count: %lu\n",
7872
          where, share->db, share->table_name, share->use_count,
7873
          (ulong) share->commit_count);
7874 7875 7876 7877
  fprintf(DBUG_FILE,
          "  - key: %s, key_length: %d\n",
          share->key, share->key_length);

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7878
#ifdef HAVE_NDB_BINLOG
7879 7880 7881 7882 7883
  if (share->table)
    fprintf(DBUG_FILE,
            "  - share->table: %p %s.%s\n",
            share->table, share->table->s->db.str,
            share->table->s->table_name.str);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7884
#endif
7885
}
7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898


static void print_ndbcluster_open_tables()
{
  DBUG_LOCK_FILE;
  fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
  for (uint i= 0; i < ndbcluster_open_tables.records; i++)
    print_share("",
                (NDB_SHARE*)hash_element(&ndbcluster_open_tables, i));
  fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
  DBUG_UNLOCK_FILE;
}

7899 7900
#endif

7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911

#define dbug_print_open_tables()                \
  DBUG_EXECUTE("info",                          \
               print_ndbcluster_open_tables(););

#define dbug_print_share(t, s)                  \
  DBUG_LOCK_FILE;                               \
  DBUG_EXECUTE("info",                          \
               print_share((t), (s)););         \
  DBUG_UNLOCK_FILE;

7912

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925
#ifdef HAVE_NDB_BINLOG
/*
  For some reason a share is still around, try to salvage the situation
  by closing all cached tables. If the share still exists, there is an
  error somewhere but only report this to the error log.  Keep this
  "trailing share" but rename it since there are still references to it
  to avoid segmentation faults.  There is a risk that the memory for
  this trailing share leaks.
  
  Must be called with previous pthread_mutex_lock(&ndbcluster_mutex)
*/
int handle_trailing_share(NDB_SHARE *share)
{
7926
  THD *thd= current_thd;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7927 7928 7929
  static ulong trailing_share_id= 0;
  DBUG_ENTER("handle_trailing_share");

7930
  /* ndb_share reference temporary, free below */
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7931
  ++share->use_count;
7932 7933
  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                           share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7934 7935
  pthread_mutex_unlock(&ndbcluster_mutex);

7936 7937 7938 7939
  TABLE_LIST table_list;
  bzero((char*) &table_list,sizeof(table_list));
  table_list.db= share->db;
  table_list.alias= table_list.table_name= share->table_name;
7940
  safe_mutex_assert_owner(&LOCK_open);
7941
  close_cached_tables(thd, 0, &table_list, TRUE);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7942 7943

  pthread_mutex_lock(&ndbcluster_mutex);
7944 7945 7946
  /* ndb_share reference temporary free */
  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                           share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7947 7948
  if (!--share->use_count)
  {
7949
    if (ndb_extra_logging)
7950 7951
      sql_print_information("NDB_SHARE: trailing share "
                            "%s(connect_count: %u) "
7952 7953 7954 7955 7956 7957
                            "released by close_cached_tables at "
                            "connect_count: %u",
                            share->key,
                            share->connect_count,
                            g_ndb_cluster_connection->get_connect_count());
    ndbcluster_real_free_share(&share);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7958 7959 7960 7961 7962 7963 7964
    DBUG_RETURN(0);
  }

  /*
    share still exists, if share has not been dropped by server
    release that share
  */
7965
  if (share->state != NSS_DROPPED)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7966
  {
7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985
    share->state= NSS_DROPPED;
    /* ndb_share reference create free */
    DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
                             share->key, share->use_count));
    --share->use_count;

    if (share->use_count == 0)
    {
      if (ndb_extra_logging)
        sql_print_information("NDB_SHARE: trailing share "
                              "%s(connect_count: %u) "
                              "released after NSS_DROPPED check "
                              "at connect_count: %u",
                              share->key,
                              share->connect_count,
                              g_ndb_cluster_connection->get_connect_count());
      ndbcluster_real_free_share(&share);
      DBUG_RETURN(0);
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
7986 7987 7988 7989 7990 7991 7992
  }

  sql_print_error("NDB_SHARE: %s already exists  use_count=%d."
                  " Moving away for safety, but possible memleak.",
                  share->key, share->use_count);
  dbug_print_open_tables();

7993 7994 7995
  /*
    Ndb share has not been released as it should
  */
7996
#ifdef NOT_YET
7997
  DBUG_ASSERT(FALSE);
7998
#endif
7999

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018
  /*
    This is probably an error.  We can however save the situation
    at the cost of a possible mem leak, by "renaming" the share
    - First remove from hash
  */
  hash_delete(&ndbcluster_open_tables, (byte*) share);

  /*
    now give it a new name, just a running number
    if space is not enough allocate some more
  */
  {
    const uint min_key_length= 10;
    if (share->key_length < min_key_length)
    {
      share->key= alloc_root(&share->mem_root, min_key_length + 1);
      share->key_length= min_key_length;
    }
    share->key_length=
8019
      my_snprintf(share->key, min_key_length + 1, "#leak%lu",
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081
                  trailing_share_id++);
  }
  /* Keep it for possible the future trailing free */
  my_hash_insert(&ndbcluster_open_tables, (byte*) share);

  DBUG_RETURN(0);
}

/*
  Rename share is used during rename table.
*/
static int rename_share(NDB_SHARE *share, const char *new_key)
{
  NDB_SHARE *tmp;
  pthread_mutex_lock(&ndbcluster_mutex);
  uint new_length= (uint) strlen(new_key);
  DBUG_PRINT("rename_share", ("old_key: %s  old__length: %d",
                              share->key, share->key_length));
  if ((tmp= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                     (byte*) new_key, new_length)))
    handle_trailing_share(tmp);

  /* remove the share from hash */
  hash_delete(&ndbcluster_open_tables, (byte*) share);
  dbug_print_open_tables();

  /* save old stuff if insert should fail */
  uint old_length= share->key_length;
  char *old_key= share->key;

  /*
    now allocate and set the new key, db etc
    enough space for key, db, and table_name
  */
  share->key= alloc_root(&share->mem_root, 2 * (new_length + 1));
  strmov(share->key, new_key);
  share->key_length= new_length;

  if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
  {
    // ToDo free the allocated stuff above?
    DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed",
                         share->key));
    share->key= old_key;
    share->key_length= old_length;
    if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
    {
      sql_print_error("rename_share: failed to recover %s", share->key);
      DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed",
                           share->key));
    }
    dbug_print_open_tables();
    pthread_mutex_unlock(&ndbcluster_mutex);
    return -1;
  }
  dbug_print_open_tables();

  share->db= share->key + new_length + 1;
  ha_ndbcluster::set_dbname(new_key, share->db);
  share->table_name= share->db + strlen(share->db) + 1;
  ha_ndbcluster::set_tabname(new_key, share->table_name);

8082
  dbug_print_share("rename_share:", share);
8083
  if (share->table)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8084
  {
8085 8086 8087 8088 8089 8090 8091
    if (share->op == 0)
    {
      share->table->s->db.str= share->db;
      share->table->s->db.length= strlen(share->db);
      share->table->s->table_name.str= share->table_name;
      share->table->s->table_name.length= strlen(share->table_name);
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8092 8093 8094 8095 8096 8097 8098 8099 8100 8101
  }
  /* else rename will be handled when the ALTER event comes */
  share->old_names= old_key;
  // ToDo free old_names after ALTER EVENT

  pthread_mutex_unlock(&ndbcluster_mutex);
  return 0;
}
#endif

8102 8103 8104 8105
/*
  Increase refcount on existing share.
  Always returns share and cannot fail.
*/
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8106
NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
8107 8108
{
  pthread_mutex_lock(&ndbcluster_mutex);
8109 8110 8111
  share->use_count++;

  dbug_print_open_tables();
8112
  dbug_print_share("ndbcluster_get_share:", share);
8113 8114 8115 8116
  pthread_mutex_unlock(&ndbcluster_mutex);
  return share;
}

monty@mysql.com's avatar
monty@mysql.com committed
8117

8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131
/*
  Get a share object for key

  Returns share for key, and increases the refcount on the share.

  create_if_not_exists == TRUE:
    creates share if it does not alreade exist
    returns 0 only due to out of memory, and then sets my_error

  create_if_not_exists == FALSE:
    returns 0 if share does not exist

  have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken
*/
monty@mysql.com's avatar
monty@mysql.com committed
8132

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8133 8134 8135
NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
                                bool create_if_not_exists,
                                bool have_lock)
8136 8137
{
  NDB_SHARE *share;
monty@mysql.com's avatar
monty@mysql.com committed
8138 8139 8140 8141
  uint length= (uint) strlen(key);
  DBUG_ENTER("ndbcluster_get_share");
  DBUG_PRINT("enter", ("key: '%s'", key));

8142 8143 8144 8145 8146
  if (!have_lock)
    pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                        (byte*) key,
                                        length)))
8147
  {
8148 8149 8150 8151 8152
    if (!create_if_not_exists)
    {
      DBUG_PRINT("error", ("get_share: %s does not exist", key));
      if (!have_lock)
        pthread_mutex_unlock(&ndbcluster_mutex);
8153
      DBUG_RETURN(0);
8154 8155
    }
    if ((share= (NDB_SHARE*) my_malloc(sizeof(*share),
8156 8157
                                       MYF(MY_WME | MY_ZEROFILL))))
    {
8158 8159 8160 8161 8162
      MEM_ROOT **root_ptr=
        my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC);
      MEM_ROOT *old_root= *root_ptr;
      init_sql_alloc(&share->mem_root, 1024, 0);
      *root_ptr= &share->mem_root; // remember to reset before return
8163
      share->state= NSS_INITIAL;
8164 8165 8166 8167
      /* enough space for key, db, and table_name */
      share->key= alloc_root(*root_ptr, 2 * (length + 1));
      share->key_length= length;
      strmov(share->key, key);
8168 8169
      if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
      {
8170 8171 8172 8173 8174
        free_root(&share->mem_root, MYF(0));
        my_free((gptr) share, 0);
        *root_ptr= old_root;
        if (!have_lock)
          pthread_mutex_unlock(&ndbcluster_mutex);
8175
        DBUG_RETURN(0);
8176 8177
      }
      thr_lock_init(&share->lock);
8178
      pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8179
      share->commit_count= 0;
8180
      share->commit_count_lock= 0;
8181 8182 8183 8184
      share->db= share->key + length + 1;
      ha_ndbcluster::set_dbname(key, share->db);
      share->table_name= share->db + strlen(share->db) + 1;
      ha_ndbcluster::set_tabname(key, share->table_name);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8185 8186 8187
#ifdef HAVE_NDB_BINLOG
      ndbcluster_binlog_init_share(share, table);
#endif
8188
      *root_ptr= old_root;
8189 8190 8191
    }
    else
    {
8192 8193 8194 8195
      DBUG_PRINT("error", ("get_share: failed to alloc share"));
      if (!have_lock)
        pthread_mutex_unlock(&ndbcluster_mutex);
      my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share));
8196
      DBUG_RETURN(0);
8197 8198 8199
    }
  }
  share->use_count++;
8200

8201
  dbug_print_open_tables();
8202
  dbug_print_share("ndbcluster_get_share:", share);
8203 8204
  if (!have_lock)
    pthread_mutex_unlock(&ndbcluster_mutex);
8205
  DBUG_RETURN(share);
8206 8207
}

monty@mysql.com's avatar
monty@mysql.com committed
8208

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8209
void ndbcluster_real_free_share(NDB_SHARE **share)
8210
{
monty@mysql.com's avatar
monty@mysql.com committed
8211
  DBUG_ENTER("ndbcluster_real_free_share");
8212
  dbug_print_share("ndbcluster_real_free_share:", *share);
8213 8214 8215 8216 8217

  hash_delete(&ndbcluster_open_tables, (byte*) *share);
  thr_lock_delete(&(*share)->lock);
  pthread_mutex_destroy(&(*share)->mutex);

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8218 8219 8220
#ifdef HAVE_NDB_BINLOG
  if ((*share)->table)
  {
8221
    // (*share)->table->mem_root is freed by closefrm
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8222
    closefrm((*share)->table, 0);
8223 8224
    // (*share)->table_share->mem_root is freed by free_table_share
    free_table_share((*share)->table_share);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8225 8226 8227 8228 8229 8230 8231 8232
#ifndef DBUG_OFF
    bzero((gptr)(*share)->table_share, sizeof(*(*share)->table_share));
    bzero((gptr)(*share)->table, sizeof(*(*share)->table));
    (*share)->table_share= 0;
    (*share)->table= 0;
#endif
  }
#endif
monty@mysql.com's avatar
monty@mysql.com committed
8233
  free_root(&(*share)->mem_root, MYF(0));
8234 8235 8236 8237
  my_free((gptr) *share, MYF(0));
  *share= 0;

  dbug_print_open_tables();
monty@mysql.com's avatar
monty@mysql.com committed
8238
  DBUG_VOID_RETURN;
8239 8240
}

8241

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8242
void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
8243
{
8244 8245 8246 8247 8248
  if (!have_lock)
    pthread_mutex_lock(&ndbcluster_mutex);
  if ((*share)->util_lock == current_thd)
    (*share)->util_lock= 0;
  if (!--(*share)->use_count)
8249
  {
8250
    ndbcluster_real_free_share(share);
8251
  }
8252 8253 8254
  else
  {
    dbug_print_open_tables();
8255
    dbug_print_share("ndbcluster_free_share:", *share);
8256 8257 8258
  }
  if (!have_lock)
    pthread_mutex_unlock(&ndbcluster_mutex);
8259 8260 8261
}


8262 8263
static 
int
8264
ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, const NDBTAB *ndbtab,
8265
                         struct Ndb_statistics * ndbstat)
8266
{
8267
  NdbTransaction* pTrans;
8268
  NdbError error;
8269
  int retries= 10;
8270
  int reterr= 0;
8271
  int retry_sleep= 30; /* 30 milliseconds, transaction */
8272
#ifndef DBUG_OFF
8273
  char buff[22], buff2[22], buff3[22], buff4[22];
8274
#endif
8275
  DBUG_ENTER("ndb_get_table_statistics");
kostja@bodhi.local's avatar
kostja@bodhi.local committed
8276
  DBUG_PRINT("enter", ("table: %s", ndbtab->getName()));
8277

8278 8279
  DBUG_ASSERT(ndbtab != 0);

8280
  do
8281
  {
8282
    Uint64 rows, commits, fixed_mem, var_mem;
8283
    Uint32 size;
8284
    Uint32 count= 0;
8285 8286
    Uint64 sum_rows= 0;
    Uint64 sum_commits= 0;
8287 8288
    Uint64 sum_row_size= 0;
    Uint64 sum_mem= 0;
8289 8290 8291 8292
    NdbScanOperation*pOp;
    int check;

    if ((pTrans= ndb->startTransaction()) == NULL)
8293
    {
8294 8295 8296
      error= ndb->getNdbError();
      goto retry;
    }
8297
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8298
    if ((pOp= pTrans->getNdbScanOperation(ndbtab)) == NULL)
8299 8300 8301
    {
      error= pTrans->getNdbError();
      goto retry;
8302
    }
8303
    
8304
    if (pOp->readTuples(NdbOperation::LM_CommittedRead))
8305 8306 8307 8308
    {
      error= pOp->getNdbError();
      goto retry;
    }
8309
    
8310 8311 8312 8313 8314
    if (pOp->interpret_exit_last_row() == -1)
    {
      error= pOp->getNdbError();
      goto retry;
    }
8315 8316 8317
    
    pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows);
    pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits);
8318
    pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size);
8319 8320 8321 8322
    pOp->getValue(NdbDictionary::Column::FRAGMENT_FIXED_MEMORY, 
		  (char*)&fixed_mem);
    pOp->getValue(NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY, 
		  (char*)&var_mem);
8323
    
8324
    if (pTrans->execute(NdbTransaction::NoCommit,
8325
                        NdbOperation::AbortOnError,
8326
                        TRUE) == -1)
8327
    {
8328 8329
      error= pTrans->getNdbError();
      goto retry;
8330
    }
8331
    
monty@mishka.local's avatar
monty@mishka.local committed
8332
    while ((check= pOp->nextResult(TRUE, TRUE)) == 0)
8333 8334 8335
    {
      sum_rows+= rows;
      sum_commits+= commits;
8336
      if (sum_row_size < size)
8337
        sum_row_size= size;
8338
      sum_mem+= fixed_mem + var_mem;
8339
      count++;
8340 8341 8342
    }
    
    if (check == -1)
8343 8344 8345 8346
    {
      error= pOp->getNdbError();
      goto retry;
    }
8347

8348
    pOp->close(TRUE);
8349

8350
    ndb->closeTransaction(pTrans);
8351 8352 8353 8354 8355 8356

    ndbstat->row_count= sum_rows;
    ndbstat->commit_count= sum_commits;
    ndbstat->row_size= sum_row_size;
    ndbstat->fragment_memory= sum_mem;

8357 8358 8359 8360 8361 8362 8363
    DBUG_PRINT("exit", ("records: %s  commits: %s "
                        "row_size: %s  mem: %s count: %u",
			llstr(sum_rows, buff),
                        llstr(sum_commits, buff2),
                        llstr(sum_row_size, buff3),
                        llstr(sum_mem, buff4),
                        count));
8364

8365
    DBUG_RETURN(0);
8366
retry:
8367 8368
    if(report_error)
    {
8369
      if (file && pTrans)
8370 8371 8372 8373 8374 8375 8376 8377 8378 8379
      {
        reterr= file->ndb_err(pTrans);
      }
      else
      {
        const NdbError& tmp= error;
        ERR_PRINT(tmp);
        reterr= ndb_to_mysql_error(&tmp);
      }
    }
8380 8381 8382
    else
      reterr= error.code;

8383 8384 8385 8386 8387 8388 8389 8390 8391 8392
    if (pTrans)
    {
      ndb->closeTransaction(pTrans);
      pTrans= NULL;
    }
    if (error.status == NdbError::TemporaryError && retries--)
    {
      my_sleep(retry_sleep);
      continue;
    }
8393
    set_ndb_err(current_thd, error);
8394
    break;
8395
  } while(1);
8396 8397 8398
  DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
                      error.code, error.message));
  DBUG_RETURN(reterr);
8399 8400
}

8401 8402 8403 8404 8405
/*
  Create a .ndb file to serve as a placeholder indicating 
  that the table with this name is a ndb table
*/

8406
int ha_ndbcluster::write_ndb_file(const char *name)
8407 8408 8409 8410 8411 8412
{
  File file;
  bool error=1;
  char path[FN_REFLEN];
  
  DBUG_ENTER("write_ndb_file");
8413
  DBUG_PRINT("enter", ("name: %s", name));
8414

8415
  (void)strxnmov(path, FN_REFLEN-1, 
8416
                 mysql_data_home,"/",name,ha_ndb_ext,NullS);
8417 8418 8419 8420 8421 8422 8423 8424 8425 8426

  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
  {
    // It's an empty file
    error=0;
    my_close(file,MYF(0));
  }
  DBUG_RETURN(error);
}

8427
void 
8428 8429
ha_ndbcluster::release_completed_operations(NdbTransaction *trans,
					    bool force_release)
8430 8431 8432 8433 8434 8435 8436 8437
{
  if (trans->hasBlobOperation())
  {
    /* We are reading/writing BLOB fields, 
       releasing operation records is unsafe
    */
    return;
  }
8438 8439 8440 8441 8442 8443 8444 8445 8446 8447
  if (!force_release)
  {
    if (get_thd_ndb(current_thd)->query_state & NDB_QUERY_MULTI_READ_RANGE)
    {
      /* We are batching reads and have not consumed all fetched
	 rows yet, releasing operation records is unsafe 
      */
      return;
    }
  }
8448
  trans->releaseCompletedOperations();
8449 8450
}

8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468
bool 
ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
				       KEY_MULTI_RANGE *end_range,
				       HANDLER_BUFFER *buffer)
{
  DBUG_ENTER("null_value_index_search");
  KEY* key_info= table->key_info + active_index;
  KEY_MULTI_RANGE *range= ranges;
  ulong reclength= table->s->reclength;
  byte *curr= (byte*)buffer->buffer;
  byte *end_of_buffer= (byte*)buffer->buffer_end;
  
  for (; range<end_range && curr+reclength <= end_of_buffer; 
       range++)
  {
    const byte *key= range->start_key.key;
    uint key_len= range->start_key.length;
    if (check_null_in_key(key_info, key, key_len))
8469
      DBUG_RETURN(TRUE);
8470 8471
    curr += reclength;
  }
8472
  DBUG_RETURN(FALSE);
8473 8474
}

8475
int
8476
ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
8477 8478 8479 8480
                                      KEY_MULTI_RANGE *ranges, 
                                      uint range_count,
                                      bool sorted, 
                                      HANDLER_BUFFER *buffer)
8481
{
8482
  m_write_op= FALSE;
8483 8484
  int res;
  KEY* key_info= table->key_info + active_index;
8485
  NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
8486
  ulong reclength= table_share->reclength;
8487
  NdbOperation* op;
8488
  Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
8489
  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
8490

8491 8492 8493 8494
  /**
   * blobs and unique hash index with NULL can't be batched currently
   */
  if (uses_blob_value() ||
8495
      (cur_index_type ==  UNIQUE_INDEX &&
8496 8497
       has_null_in_unique_index(active_index) &&
       null_value_index_search(ranges, ranges+range_count, buffer)))
8498
  {
8499
    m_disable_multi_read= TRUE;
8500
    DBUG_RETURN(handler::read_multi_range_first(found_range_p, 
8501 8502 8503 8504
                                                ranges, 
                                                range_count,
                                                sorted, 
                                                buffer));
8505
  }
8506
  thd_ndb->query_state|= NDB_QUERY_MULTI_READ_RANGE;
8507
  m_disable_multi_read= FALSE;
8508 8509 8510 8511

  /**
   * Copy arguments into member variables
   */
8512 8513 8514
  m_multi_ranges= ranges;
  multi_range_curr= ranges;
  multi_range_end= ranges+range_count;
8515 8516 8517
  multi_range_sorted= sorted;
  multi_range_buffer= buffer;

8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528
  /**
   * read multi range will read ranges as follows (if not ordered)
   *
   * input    read order
   * ======   ==========
   * pk-op 1  pk-op 1
   * pk-op 2  pk-op 2
   * range 3  range (3,5) NOTE result rows will be intermixed
   * pk-op 4  pk-op 4
   * range 5
   * pk-op 6  pk-ok 6
8529 8530
   */   

mskold@mysql.com's avatar
mskold@mysql.com committed
8531
  /**
8532 8533
   * Variables for loop
   */
8534 8535
  byte *curr= (byte*)buffer->buffer;
  byte *end_of_buffer= (byte*)buffer->buffer_end;
8536 8537
  NdbOperation::LockMode lm= 
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
mskold@mysql.com's avatar
mskold@mysql.com committed
8538
  bool need_pk = (lm == NdbOperation::LM_Read);
8539 8540 8541
  const NDBTAB *tab= m_table;
  const NDBINDEX *unique_idx= m_index[active_index].unique_index;
  const NDBINDEX *idx= m_index[active_index].index; 
8542 8543
  const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
  NdbIndexScanOperation* scanOp= 0;
8544 8545
  for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; 
       multi_range_curr++)
8546
  {
8547 8548 8549 8550 8551 8552
    part_id_range part_spec;
    if (m_use_partition_function)
    {
      get_partition_set(table, curr, active_index,
                        &multi_range_curr->start_key,
                        &part_spec);
8553
      DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
8554 8555 8556 8557 8558
                          part_spec.start_part, part_spec.end_part));
      /*
        If partition pruning has found no partition in set
        we can skip this scan
      */
8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569
      if (part_spec.start_part > part_spec.end_part)
      {
        /*
          We can skip this partition since the key won't fit into any
          partition
        */
        curr += reclength;
        multi_range_curr->range_flag |= SKIP_RANGE;
        continue;
      }
    }
8570
    switch (cur_index_type) {
8571 8572
    case PRIMARY_KEY_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
8573 8574 8575
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
        goto range;
      // else fall through
8576
    case PRIMARY_KEY_INDEX:
8577
    {
8578
      multi_range_curr->range_flag |= UNIQUE_RANGE;
8579
      if ((op= m_active_trans->getNdbOperation(tab)) && 
8580 8581 8582
          !op->readTuple(lm) && 
          !set_primary_key(op, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
8583
          (!m_use_partition_function ||
8584
           (op->setPartitionId(part_spec.start_part), TRUE)))
8585
        curr += reclength;
8586
      else
8587
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
8588
      break;
8589 8590
    }
    break;
8591 8592
    case UNIQUE_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
8593 8594 8595 8596 8597
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
          !check_null_in_key(key_info, multi_range_curr->start_key.key,
                             multi_range_curr->start_key.length)))
        goto range;
      // else fall through
8598
    case UNIQUE_INDEX:
8599
    {
8600
      multi_range_curr->range_flag |= UNIQUE_RANGE;
8601
      if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && 
8602 8603
          !op->readTuple(lm) && 
          !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
8604
          !define_read_attrs(curr, op))
8605
        curr += reclength;
8606
      else
8607
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
8608 8609
      break;
    }
8610
    case ORDERED_INDEX: {
8611
  range:
8612
      multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
8613 8614
      if (scanOp == 0)
      {
8615 8616 8617 8618 8619 8620
        if (m_multi_cursor)
        {
          scanOp= m_multi_cursor;
          DBUG_ASSERT(scanOp->getSorted() == sorted);
          DBUG_ASSERT(scanOp->getLockMode() == 
                      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
8621
          if (scanOp->reset_bounds(m_force_send))
8622 8623 8624 8625 8626
            DBUG_RETURN(ndb_err(m_active_trans));
          
          end_of_buffer -= reclength;
        }
        else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) 
mskold@mysql.com's avatar
mskold@mysql.com committed
8627
                 &&!scanOp->readTuples(lm, 0, parallelism, sorted, 
8628
				       FALSE, TRUE, need_pk, TRUE)
8629
                 &&!(m_cond && m_cond->generate_scan_filter(scanOp))
8630 8631 8632 8633 8634 8635 8636 8637 8638 8639
                 &&!define_read_attrs(end_of_buffer-reclength, scanOp))
        {
          m_multi_cursor= scanOp;
          m_multi_range_cursor_result_ptr= end_of_buffer-reclength;
        }
        else
        {
          ERR_RETURN(scanOp ? scanOp->getNdbError() : 
                     m_active_trans->getNdbError());
        }
8640
      }
8641

8642
      const key_range *keys[2]= { &multi_range_curr->start_key, 
8643
                                  &multi_range_curr->end_key };
8644
      if ((res= set_bounds(scanOp, active_index, FALSE, keys,
8645
                           multi_range_curr-ranges)))
8646
        DBUG_RETURN(res);
8647
      break;
8648
    }
8649
    case UNDEFINED_INDEX:
mskold@mysql.com's avatar
mskold@mysql.com committed
8650 8651 8652 8653
      DBUG_ASSERT(FALSE);
      DBUG_RETURN(1);
      break;
    }
8654 8655
  }
  
8656
  if (multi_range_curr != multi_range_end)
8657
  {
8658 8659 8660 8661 8662 8663
    /**
     * Mark that we're using entire buffer (even if might not) as
     *   we haven't read all ranges for some reason
     * This as we don't want mysqld to reuse the buffer when we read
     *   the remaining ranges
     */
8664
    buffer->end_of_used_area= (byte*)buffer->buffer_end;
8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675
  }
  else
  {
    buffer->end_of_used_area= curr;
  }
  
  /**
   * Set first operation in multi range
   */
  m_current_multi_operation= 
    lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
8676
  if (!(res= execute_no_commit_ie(this, m_active_trans,true)))
8677
  {
8678 8679
    m_multi_range_defined= multi_range_curr;
    multi_range_curr= ranges;
8680 8681
    m_multi_range_result_ptr= (byte*)buffer->buffer;
    DBUG_RETURN(read_multi_range_next(found_range_p));
8682 8683 8684 8685
  }
  ERR_RETURN(m_active_trans->getNdbError());
}

8686
#if 0
8687
#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
8688 8689 8690 8691
#else
#define DBUG_MULTI_RANGE(x)
#endif

8692
int
8693
ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
8694 8695
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
8696
  if (m_disable_multi_read)
8697
  {
8698
    DBUG_MULTI_RANGE(11);
8699
    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
8700
  }
8701
  
8702
  int res;
8703
  int range_no;
8704
  ulong reclength= table_share->reclength;
8705
  const NdbOperation* op= m_current_multi_operation;
8706
  for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
8707
  {
8708 8709 8710
    DBUG_MULTI_RANGE(12);
    if (multi_range_curr->range_flag & SKIP_RANGE)
      continue;
8711
    if (multi_range_curr->range_flag & UNIQUE_RANGE)
8712
    {
8713
      if (op->getNdbError().code == 0)
8714 8715
      {
        DBUG_MULTI_RANGE(13);
8716
        goto found_next;
8717
      }
8718 8719 8720
      
      op= m_active_trans->getNextCompletedOperation(op);
      m_multi_range_result_ptr += reclength;
8721
      continue;
8722
    } 
8723
    else if (m_multi_cursor && !multi_range_sorted)
8724
    {
8725 8726
      DBUG_MULTI_RANGE(1);
      if ((res= fetch_next(m_multi_cursor)) == 0)
8727
      {
8728 8729 8730
        DBUG_MULTI_RANGE(2);
        range_no= m_multi_cursor->get_range_no();
        goto found;
8731 8732 8733
      } 
      else
      {
8734
        DBUG_MULTI_RANGE(14);
8735
        goto close_scan;
8736 8737
      }
    }
8738
    else if (m_multi_cursor && multi_range_sorted)
8739
    {
8740 8741
      if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
      {
8742 8743
        DBUG_MULTI_RANGE(3);
        goto close_scan;
8744
      }
8745
      
8746
      range_no= m_multi_cursor->get_range_no();
8747
      uint current_range_no= multi_range_curr - m_multi_ranges;
mskold@mysql.com's avatar
mskold@mysql.com committed
8748
      if ((uint) range_no == current_range_no)
8749
      {
8750
        DBUG_MULTI_RANGE(4);
8751
        // return current row
8752
        goto found;
8753
      }
8754
      else if (range_no > (int)current_range_no)
8755
      {
8756 8757 8758 8759
        DBUG_MULTI_RANGE(5);
        // wait with current row
        m_active_cursor= 0;
        continue;
8760 8761 8762
      }
      else 
      {
8763 8764 8765
        DBUG_MULTI_RANGE(6);
        // First fetch from cursor
        DBUG_ASSERT(range_no == -1);
8766
        if ((res= m_multi_cursor->nextResult(TRUE)))
8767
        {
8768
          DBUG_MULTI_RANGE(15);
8769 8770 8771 8772
          goto close_scan;
        }
        multi_range_curr--; // Will be increased in for-loop
        continue;
8773
      }
8774
    }
8775
    else /** m_multi_cursor == 0 */
8776
    {
8777
      DBUG_MULTI_RANGE(7);
8778 8779 8780 8781
      /**
       * Corresponds to range 5 in example in read_multi_range_first
       */
      (void)1;
8782
      continue;
8783
    }
8784
    
8785
    DBUG_ASSERT(FALSE); // Should only get here via goto's
8786 8787 8788
close_scan:
    if (res == 1)
    {
8789
      m_multi_cursor->close(FALSE, TRUE);
8790
      m_active_cursor= m_multi_cursor= 0;
8791
      DBUG_MULTI_RANGE(8);
8792 8793 8794 8795
      continue;
    } 
    else 
    {
8796
      DBUG_MULTI_RANGE(9);
8797 8798 8799
      DBUG_RETURN(ndb_err(m_active_trans));
    }
  }
8800
  
8801
  if (multi_range_curr == multi_range_end)
8802 8803
  {
    DBUG_MULTI_RANGE(16);
8804 8805
    Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
    thd_ndb->query_state&= NDB_QUERY_NORMAL;
8806
    DBUG_RETURN(HA_ERR_END_OF_FILE);
8807
  }
8808
  
8809 8810 8811 8812
  /**
   * Read remaining ranges
   */
  DBUG_RETURN(read_multi_range_first(multi_range_found_p, 
8813 8814 8815 8816
                                     multi_range_curr,
                                     multi_range_end - multi_range_curr, 
                                     multi_range_sorted,
                                     multi_range_buffer));
8817 8818
  
found:
8819 8820 8821
  /**
   * Found a record belonging to a scan
   */
8822
  m_active_cursor= m_multi_cursor;
8823
  * multi_range_found_p= m_multi_ranges + range_no;
8824 8825
  memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
  setup_recattr(m_active_cursor->getFirstRecAttr());
8826 8827 8828
  unpack_record(table->record[0]);
  table->status= 0;     
  DBUG_RETURN(0);
8829
  
8830
found_next:
8831 8832 8833 8834
  /**
   * Found a record belonging to a pk/index op,
   *   copy result and move to next to prepare for next call
   */
8835
  * multi_range_found_p= multi_range_curr;
8836
  memcpy(table->record[0], m_multi_range_result_ptr, reclength);
8837
  setup_recattr(op->getFirstRecAttr());
8838
  unpack_record(table->record[0]);
8839 8840
  table->status= 0;
  
8841
  multi_range_curr++;
8842
  m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
8843 8844
  m_multi_range_result_ptr += reclength;
  DBUG_RETURN(0);
8845 8846
}

8847 8848 8849 8850 8851 8852 8853 8854
int
ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
{
  DBUG_ENTER("setup_recattr");

  Field **field, **end;
  NdbValue *value= m_value;
  
8855
  end= table->field + table_share->fields;
8856 8857 8858 8859 8860 8861
  
  for (field= table->field; field < end; field++, value++)
  {
    if ((* value).ptr)
    {
      DBUG_ASSERT(curr != 0);
8862 8863 8864
      NdbValue* val= m_value + curr->getColumn()->getColumnNo();
      DBUG_ASSERT(val->ptr);
      val->rec= curr;
8865
      curr= curr->next();
8866 8867 8868
    }
  }
  
8869
  DBUG_RETURN(0);
8870 8871
}

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8872 8873
char*
ha_ndbcluster::update_table_comment(
8874 8875
                                /* out: table comment + additional */
        const char*     comment)/* in:  table comment defined by user */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8876 8877
{
  uint length= strlen(comment);
8878
  if (length > 64000 - 3)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8879 8880 8881 8882 8883 8884 8885 8886 8887 8888
  {
    return((char*)comment); /* string too long */
  }

  Ndb* ndb;
  if (!(ndb= get_ndb()))
  {
    return((char*)comment);
  }

8889 8890 8891 8892
  if (ndb->setDatabaseName(m_dbname))
  {
    return((char*)comment);
  }
8893 8894
  const NDBTAB* tab= m_table;
  DBUG_ASSERT(tab != NULL);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8895 8896 8897 8898 8899 8900

  char *str;
  const char *fmt="%s%snumber_of_replicas: %d";
  const unsigned fmt_len_plus_extra= length + strlen(fmt);
  if ((str= my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
  {
8901 8902
    sql_print_error("ha_ndbcluster::update_table_comment: "
                    "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8903 8904 8905
    return (char*)comment;
  }

8906 8907 8908
  my_snprintf(str,fmt_len_plus_extra,fmt,comment,
              length > 0 ? " ":"",
              tab->getReplicaCount());
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8909 8910 8911 8912 8913
  return str;
}


// Utility thread main loop
8914
pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8915 8916 8917
{
  THD *thd; /* needs to be first for thread_stack */
  struct timespec abstime;
8918
  Thd_ndb *thd_ndb;
8919 8920
  uint share_list_size= 0;
  NDB_SHARE **share_list= NULL;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8921 8922 8923

  my_thread_init();
  DBUG_ENTER("ndb_util_thread");
8924
  DBUG_PRINT("enter", ("ndb_cache_check_time: %lu", ndb_cache_check_time));
8925 8926
 
   pthread_mutex_lock(&LOCK_ndb_util_thread);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8927 8928

  thd= new THD; /* note that contructor of THD uses DBUG_ */
8929 8930 8931 8932 8933
  if (thd == NULL)
  {
    my_errno= HA_ERR_OUT_OF_MEM;
    DBUG_RETURN(NULL);
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
8934 8935 8936 8937 8938
  THD_CHECK_SENTRY(thd);
  pthread_detach_this_thread();
  ndb_util_thread= pthread_self();

  thd->thread_stack= (char*)&thd; /* remember where our stack is */
8939
  if (thd->store_globals())
8940
    goto ndb_util_thread_fail;
8941 8942 8943 8944 8945 8946 8947
  thd->init_for_queries();
  thd->version=refresh_version;
  thd->main_security_ctx.host_or_ip= "";
  thd->client_capabilities = 0;
  my_net_init(&thd->net, 0);
  thd->main_security_ctx.master_access= ~0;
  thd->main_security_ctx.priv_user = 0;
8948
  thd->current_stmt_binlog_row_based= TRUE;     // If in mixed mode
8949

8950 8951 8952 8953 8954 8955 8956 8957
  CHARSET_INFO *charset_connection;
  charset_connection= get_charset_by_csname("utf8",
                                            MY_CS_PRIMARY, MYF(MY_WME));
  thd->variables.character_set_client= charset_connection;
  thd->variables.character_set_results= charset_connection;
  thd->variables.collation_connection= charset_connection;
  thd->update_charset();

8958
  /* Signal successful initialization */
8959
  ndb_util_thread_running= 1;
8960 8961
  pthread_cond_signal(&COND_ndb_util_ready);
  pthread_mutex_unlock(&LOCK_ndb_util_thread);
8962

8963 8964 8965 8966 8967
  /*
    wait for mysql server to start
  */
  pthread_mutex_lock(&LOCK_server_started);
  while (!mysqld_server_started)
8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978
  {
    set_timespec(abstime, 1);
    pthread_cond_timedwait(&COND_server_started, &LOCK_server_started,
	                       &abstime);
    if (ndbcluster_terminating)
    {
      pthread_mutex_unlock(&LOCK_server_started);
      pthread_mutex_lock(&LOCK_ndb_util_thread);
      goto ndb_util_thread_end;
    }
  }
8979 8980 8981 8982 8983 8984
  pthread_mutex_unlock(&LOCK_server_started);

  /*
    Wait for cluster to start
  */
  pthread_mutex_lock(&LOCK_ndb_util_thread);
8985
  while (!ndb_cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
8986 8987
  {
    /* ndb not connected yet */
8988 8989
    pthread_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread);
    if (ndbcluster_terminating)
8990 8991 8992 8993
      goto ndb_util_thread_end;
  }
  pthread_mutex_unlock(&LOCK_ndb_util_thread);

8994 8995
  /* Get thd_ndb for this thread */
  if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
8996
  {
8997
    sql_print_error("Could not allocate Thd_ndb object");
8998
    pthread_mutex_lock(&LOCK_ndb_util_thread);
8999
    goto ndb_util_thread_end;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9000
  }
9001 9002
  set_thd_ndb(thd, thd_ndb);
  thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9003 9004

#ifdef HAVE_NDB_BINLOG
9005 9006
  if (ndb_extra_logging && ndb_binlog_running)
    sql_print_information("NDB Binlog: Ndb tables initially read only.");
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9007 9008 9009
  /* create tables needed by the replication */
  ndbcluster_setup_binlog_table_shares(thd);
#else
9010 9011 9012 9013
  /*
    Get all table definitions from the storage node
  */
  ndbcluster_find_all_files(thd);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9014
#endif
9015

9016
  set_timespec(abstime, 0);
9017
  for (;;)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9018 9019
  {
    pthread_mutex_lock(&LOCK_ndb_util_thread);
9020 9021 9022 9023 9024 9025
    if (!ndbcluster_terminating)
      pthread_cond_timedwait(&COND_ndb_util_thread,
                             &LOCK_ndb_util_thread,
                             &abstime);
    if (ndbcluster_terminating) /* Shutting down server */
      goto ndb_util_thread_end;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9026
    pthread_mutex_unlock(&LOCK_ndb_util_thread);
9027
#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
9028
    DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %lu",
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9029
                                   ndb_cache_check_time));
9030
#endif
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9031

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9032 9033
#ifdef HAVE_NDB_BINLOG
    /*
9034 9035
      Check that the ndb_apply_status_share and ndb_schema_share 
      have been created.
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9036 9037
      If not try to create it
    */
9038
    if (!ndb_binlog_tables_inited)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9039 9040 9041
      ndbcluster_setup_binlog_table_shares(thd);
#endif

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9042 9043
    if (ndb_cache_check_time == 0)
    {
9044 9045
      /* Wake up in 1 second to check if value has changed */
      set_timespec(abstime, 1);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9046 9047 9048 9049 9050 9051
      continue;
    }

    /* Lock mutex and fill list with pointers to all open tables */
    NDB_SHARE *share;
    pthread_mutex_lock(&ndbcluster_mutex);
9052
    uint i, open_count, record_count= ndbcluster_open_tables.records;
9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066
    if (share_list_size < record_count)
    {
      NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
      if (!new_share_list)
      {
        sql_print_warning("ndb util thread: malloc failure, "
                          "query cache not maintained properly");
        pthread_mutex_unlock(&ndbcluster_mutex);
        goto next;                               // At least do not crash
      }
      delete [] share_list;
      share_list_size= record_count;
      share_list= new_share_list;
    }
9067
    for (i= 0, open_count= 0; i < record_count; i++)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9068 9069
    {
      share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9070 9071 9072 9073 9074 9075
#ifdef HAVE_NDB_BINLOG
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 0)
        continue; // injector thread is the only user, skip statistics
      share->util_lock= current_thd; // Mark that util thread has lock
#endif /* HAVE_NDB_BINLOG */
9076
      /* ndb_share reference temporary, free below */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9077
      share->use_count++; /* Make sure the table can't be closed */
9078 9079
      DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                               share->key, share->use_count));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9080 9081 9082 9083 9084
      DBUG_PRINT("ndb_util_thread",
                 ("Found open table[%d]: %s, use_count: %d",
                  i, share->table_name, share->use_count));

      /* Store pointer to table */
9085
      share_list[open_count++]= share;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9086 9087 9088
    }
    pthread_mutex_unlock(&ndbcluster_mutex);

9089
    /* Iterate through the open files list */
9090
    for (i= 0; i < open_count; i++)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9091
    {
9092
      share= share_list[i];
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9093 9094 9095 9096 9097 9098 9099
#ifdef HAVE_NDB_BINLOG
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 1)
      {
        /*
          Util thread and injector thread is the only user, skip statistics
	*/
9100 9101 9102
        /* ndb_share reference temporary free */
        DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                                 share->key, share->use_count));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9103 9104 9105 9106
        free_share(&share);
        continue;
      }
#endif /* HAVE_NDB_BINLOG */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9107
      DBUG_PRINT("ndb_util_thread",
9108
                 ("Fetching commit count for: %s", share->key));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9109

9110 9111 9112 9113 9114
      struct Ndb_statistics stat;
      uint lock;
      pthread_mutex_lock(&share->mutex);
      lock= share->commit_count_lock;
      pthread_mutex_unlock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9115
      {
9116 9117
        /* Contact NDB to get commit count for table */
        Ndb* ndb= thd_ndb->ndb;
9118 9119 9120 9121
        if (ndb->setDatabaseName(share->db))
        {
          goto loop_next;
        }
9122 9123
        Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
        if (ndbtab_g.get_table() &&
9124
            ndb_get_table_statistics(NULL, FALSE, ndb,
9125
                                     ndbtab_g.get_table(), &stat) == 0)
9126
        {
9127
#ifndef DBUG_OFF
9128
          char buff[22], buff2[22];
9129
#endif
9130 9131
          DBUG_PRINT("info",
                     ("Table: %s  commit_count: %s  rows: %s",
9132 9133
                      share->key,
                      llstr(stat.commit_count, buff),
kostja@bodhi.local's avatar
kostja@bodhi.local committed
9134
                      llstr(stat.row_count, buff2)));
9135 9136 9137 9138 9139 9140 9141 9142
        }
        else
        {
          DBUG_PRINT("ndb_util_thread",
                     ("Error: Could not get commit count for table %s",
                      share->key));
          stat.commit_count= 0;
        }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9143
      }
9144
  loop_next:
9145 9146 9147 9148 9149
      pthread_mutex_lock(&share->mutex);
      if (share->commit_count_lock == lock)
        share->commit_count= stat.commit_count;
      pthread_mutex_unlock(&share->mutex);

9150 9151 9152
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key, share->use_count));
9153
      free_share(&share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9154
    }
9155
next:
9156 9157 9158 9159 9160 9161 9162 9163 9164
    /* Calculate new time to wake up */
    int secs= 0;
    int msecs= ndb_cache_check_time;

    struct timeval tick_time;
    gettimeofday(&tick_time, 0);
    abstime.tv_sec=  tick_time.tv_sec;
    abstime.tv_nsec= tick_time.tv_usec * 1000;

9165
    if (msecs >= 1000){
9166 9167 9168 9169 9170 9171 9172 9173 9174 9175
      secs=  msecs / 1000;
      msecs= msecs % 1000;
    }

    abstime.tv_sec+=  secs;
    abstime.tv_nsec+= msecs * 1000000;
    if (abstime.tv_nsec >= 1000000000) {
      abstime.tv_sec+=  1;
      abstime.tv_nsec-= 1000000000;
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9176
  }
9177 9178 9179

  pthread_mutex_lock(&LOCK_ndb_util_thread);

9180 9181
ndb_util_thread_end:
  net_end(&thd->net);
9182
ndb_util_thread_fail:
9183 9184
  if (share_list)
    delete [] share_list;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9185 9186
  thd->cleanup();
  delete thd;
9187 9188
  
  /* signal termination */
9189
  ndb_util_thread_running= 0;
9190
  pthread_cond_signal(&COND_ndb_util_ready);
9191
  pthread_mutex_unlock(&LOCK_ndb_util_thread);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
9192 9193 9194 9195 9196 9197
  DBUG_PRINT("exit", ("ndb_util_thread"));
  my_thread_end();
  pthread_exit(0);
  DBUG_RETURN(NULL);
}

9198 9199 9200
/*
  Condition pushdown
*/
9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217
/*
  Push a condition to ndbcluster storage engine for evaluation 
  during table   and index scans. The conditions will be stored on a stack
  for possibly storing several conditions. The stack can be popped
  by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
  will clear the stack.
  The current implementation supports arbitrary AND/OR nested conditions
  with comparisons between columns and constants (including constant
  expressions and function calls) and the following comparison operators:
  =, !=, >, >=, <, <=, "is null", and "is not null".
  
  RETURN
    NULL The condition was supported and will be evaluated for each 
    row found during the scan
    cond The condition was not supported and all rows will be returned from
         the scan for evaluation (and thus not saved on stack)
*/
9218 9219 9220 9221 9222
const 
COND* 
ha_ndbcluster::cond_push(const COND *cond) 
{ 
  DBUG_ENTER("cond_push");
9223 9224 9225
  if (!m_cond) 
    m_cond= new ha_ndbcluster_cond;
  if (!m_cond)
9226 9227 9228 9229
  {
    my_errno= HA_ERR_OUT_OF_MEM;
    DBUG_RETURN(NULL);
  }
9230
  DBUG_EXECUTE("where",print_where((COND *)cond, m_tabname););
9231
  DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
9232 9233
}

9234 9235 9236
/*
  Pop the top condition from the condition stack of the handler instance.
*/
9237 9238 9239
void 
ha_ndbcluster::cond_pop() 
{ 
9240 9241
  if (m_cond)
    m_cond->cond_pop();
9242 9243 9244
}


9245 9246 9247
/*
  get table space info for SHOW CREATE TABLE
*/
9248
char* ha_ndbcluster::get_tablespace_name(THD *thd, char* name, uint name_len)
9249
{
9250
  Ndb *ndb= check_ndb_in_thd(thd);
9251
  NDBDICT *ndbdict= ndb->getDictionary();
9252 9253
  NdbError ndberr;
  Uint32 id;
9254
  ndb->setDatabaseName(m_dbname);
9255 9256
  const NDBTAB *ndbtab= m_table;
  DBUG_ASSERT(ndbtab != NULL);
9257 9258
  if (!ndbtab->getTablespace(&id))
  {
9259
    return 0;
9260 9261 9262 9263
  }
  {
    NdbDictionary::Tablespace ts= ndbdict->getTablespace(id);
    ndberr= ndbdict->getNdbError();
9264
    if(ndberr.classification != NdbError::NoError)
9265
      goto err;
9266
    DBUG_PRINT("info", ("Found tablespace '%s'", ts.getName()));
9267 9268
    if (name)
    {
9269
      strxnmov(name, name_len, ts.getName(), NullS);
9270 9271 9272 9273
      return name;
    }
    else
      return (my_strdup(ts.getName(), MYF(0)));
9274 9275 9276
  }
err:
  if (ndberr.status == NdbError::TemporaryError)
9277
    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
9278 9279 9280
			ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
			ndberr.code, ndberr.message, "NDB");
  else
9281
    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
9282 9283
			ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
			ndberr.code, ndberr.message, "NDB");
9284 9285 9286
  return 0;
}

9287 9288 9289
/*
  Implements the SHOW NDB STATUS command.
*/
9290
bool
9291
ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
9292
                       enum ha_stat_type stat_type)
9293
{
9294
  char buf[IO_SIZE];
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9295
  uint buflen;
9296 9297
  DBUG_ENTER("ndbcluster_show_status");
  
9298 9299 9300
  if (stat_type != HA_ENGINE_STATUS)
  {
    DBUG_RETURN(FALSE);
9301
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9302 9303 9304 9305

  update_status_variables(g_ndb_cluster_connection);
  buflen=
    my_snprintf(buf, sizeof(buf),
9306
                "cluster_node_id=%ld, "
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9307
                "connected_host=%s, "
9308 9309 9310 9311
                "connected_port=%ld, "
                "number_of_data_nodes=%ld, "
                "number_of_ready_data_nodes=%ld, "
                "connect_count=%ld",
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9312 9313 9314
                ndb_cluster_node_id,
                ndb_connected_host,
                ndb_connected_port,
justin.he@qa3-104.qa.cn.tlan's avatar
justin.he@qa3-104.qa.cn.tlan committed
9315 9316
                ndb_number_of_data_nodes,
                ndb_number_of_ready_data_nodes,
9317
                ndb_connect_count);
9318 9319
  if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                 STRING_WITH_LEN("connection"), buf, buflen))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9320 9321
    DBUG_RETURN(TRUE);

9322
  if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb)
9323
  {
9324
    Ndb* ndb= (get_thd_ndb(thd))->ndb;
9325 9326
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
9327 9328
    while (ndb->get_free_list_usage(&tmp))
    {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9329
      buflen=
9330
        my_snprintf(buf, sizeof(buf),
9331 9332
                  "created=%u, free=%u, sizeof=%u",
                  tmp.m_created, tmp.m_free, tmp.m_sizeof);
9333
      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
9334
                     tmp.m_name, strlen(tmp.m_name), buf, buflen))
9335
        DBUG_RETURN(TRUE);
9336 9337
    }
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9338 9339 9340 9341
#ifdef HAVE_NDB_BINLOG
  ndbcluster_show_status_binlog(thd, stat_print, stat_type);
#endif

9342 9343
  DBUG_RETURN(FALSE);
}
9344

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
9345

9346 9347 9348
/*
  Create a table in NDB Cluster
 */
9349
static uint get_no_fragments(ulonglong max_rows)
9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386
{
#if MYSQL_VERSION_ID >= 50000
  uint acc_row_size= 25 + /*safety margin*/ 2;
#else
  uint acc_row_size= pk_length*4;
  /* add acc overhead */
  if (pk_length <= 8)  /* main page will set the limit */
    acc_row_size+= 25 + /*safety margin*/ 2;
  else                /* overflow page will set the limit */
    acc_row_size+= 4 + /*safety margin*/ 4;
#endif
  ulonglong acc_fragment_size= 512*1024*1024;
#if MYSQL_VERSION_ID >= 50100
  return (max_rows*acc_row_size)/acc_fragment_size+1;
#else
  return ((max_rows*acc_row_size)/acc_fragment_size+1
	  +1/*correct rounding*/)/2;
#endif
}


/*
  Routine to adjust default number of partitions to always be a multiple
  of number of nodes and never more than 4 times the number of nodes.

*/
static bool adjusted_frag_count(uint no_fragments, uint no_nodes,
                                uint &reported_frags)
{
  uint i= 0;
  reported_frags= no_nodes;
  while (reported_frags < no_fragments && ++i < 4 &&
         (reported_frags + no_nodes) < MAX_PARTITIONS) 
    reported_frags+= no_nodes;
  return (reported_frags < no_fragments);
}

9387
int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info)
9388
{
9389
  ha_rows max_rows, min_rows;
9390
  if (create_info)
9391
  {
9392 9393
    max_rows= create_info->max_rows;
    min_rows= create_info->min_rows;
9394 9395 9396 9397 9398 9399
  }
  else
  {
    max_rows= table_share->max_rows;
    min_rows= table_share->min_rows;
  }
9400
  uint reported_frags;
9401 9402
  uint no_fragments=
    get_no_fragments(max_rows >= min_rows ? max_rows : min_rows);
9403
  uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
9404 9405 9406 9407 9408 9409
  if (adjusted_frag_count(no_fragments, no_nodes, reported_frags))
  {
    push_warning(current_thd,
                 MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
    "Ndb might have problems storing the max amount of rows specified");
  }
9410 9411 9412 9413
  return (int)reported_frags;
}


9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455
/*
  Set-up auto-partitioning for NDB Cluster

  SYNOPSIS
    set_auto_partitions()
    part_info                  Partition info struct to set-up
 
  RETURN VALUE
    NONE

  DESCRIPTION
    Set-up auto partitioning scheme for tables that didn't define any
    partitioning. We'll use PARTITION BY KEY() in this case which
    translates into partition by primary key if a primary key exists
    and partition by hidden key otherwise.
*/

void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
{
  DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
  part_info->list_of_part_fields= TRUE;
  part_info->part_type= HASH_PARTITION;
  switch (opt_ndb_distribution_id)
  {
  case ND_KEYHASH:
    part_info->linear_hash_ind= FALSE;
    break;
  case ND_LINHASH:
    part_info->linear_hash_ind= TRUE;
    break;
  }
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::set_range_data(void *tab_ref, partition_info *part_info)
{
  NDBTAB *tab= (NDBTAB*)tab_ref;
  int32 *range_data= (int32*)my_malloc(part_info->no_parts*sizeof(int32),
                                       MYF(0));
  uint i;
  int error= 0;
9456
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
9457 9458 9459 9460 9461 9462 9463 9464 9465 9466
  DBUG_ENTER("set_range_data");

  if (!range_data)
  {
    mem_alloc_error(part_info->no_parts*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (i= 0; i < part_info->no_parts; i++)
  {
    longlong range_val= part_info->range_int_array[i];
9467 9468
    if (unsigned_flag)
      range_val-= 0x8000000000000000ULL;
9469
    if (range_val < INT_MIN32 || range_val >= INT_MAX32)
9470
    {
9471 9472 9473 9474 9475 9476 9477 9478
      if ((i != part_info->no_parts - 1) ||
          (range_val != LONGLONG_MAX))
      {
        my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
        error= 1;
        goto error;
      }
      range_val= INT_MAX32;
9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494
    }
    range_data[i]= (int32)range_val;
  }
  tab->setRangeListData(range_data, sizeof(int32)*part_info->no_parts);
error:
  my_free((char*)range_data, MYF(0));
  DBUG_RETURN(error);
}

int ha_ndbcluster::set_list_data(void *tab_ref, partition_info *part_info)
{
  NDBTAB *tab= (NDBTAB*)tab_ref;
  int32 *list_data= (int32*)my_malloc(part_info->no_list_values * 2
                                      * sizeof(int32), MYF(0));
  uint32 *part_id, i;
  int error= 0;
9495
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506
  DBUG_ENTER("set_list_data");

  if (!list_data)
  {
    mem_alloc_error(part_info->no_list_values*2*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (i= 0; i < part_info->no_list_values; i++)
  {
    LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
    longlong list_val= list_entry->list_value;
9507 9508
    if (unsigned_flag)
      list_val-= 0x8000000000000000ULL;
9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524
    if (list_val < INT_MIN32 || list_val > INT_MAX32)
    {
      my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
      error= 1;
      goto error;
    }
    list_data[2*i]= (int32)list_val;
    part_id= (uint32*)&list_data[2*i+1];
    *part_id= list_entry->partition_id;
  }
  tab->setRangeListData(list_data, 2*sizeof(int32)*part_info->no_list_values);
error:
  my_free((char*)list_data, MYF(0));
  DBUG_RETURN(error);
}

9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541
/*
  User defined partitioning set-up. We need to check how many fragments the
  user wants defined and which node groups to put those into. Later we also
  want to attach those partitions to a tablespace.

  All the functionality of the partition function, partition limits and so
  forth are entirely handled by the MySQL Server. There is one exception to
  this rule for PARTITION BY KEY where NDB handles the hash function and
  this type can thus be handled transparently also by NDB API program.
  For RANGE, HASH and LIST and subpartitioning the NDB API programs must
  implement the function to map to a partition.
*/

uint ha_ndbcluster::set_up_partition_info(partition_info *part_info,
                                          TABLE *table,
                                          void *tab_par)
{
9542 9543
  uint16 frag_data[MAX_PARTITIONS];
  char *ts_names[MAX_PARTITIONS];
9544
  ulong fd_index= 0, i, j;
9545 9546 9547
  NDBTAB *tab= (NDBTAB*)tab_par;
  NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
  partition_element *part_elem;
9548
  bool first= TRUE;
9549
  uint tot_ts_name_len;
9550 9551 9552
  List_iterator<partition_element> part_it(part_info->partitions);
  int error;
  DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565

  if (part_info->part_type == HASH_PARTITION &&
      part_info->list_of_part_fields == TRUE)
  {
    Field **fields= part_info->part_field_array;

    if (part_info->linear_hash_ind)
      ftype= NDBTAB::DistrKeyLin;
    else
      ftype= NDBTAB::DistrKeyHash;

    for (i= 0; i < part_info->part_field_list.elements; i++)
    {
9566
      NDBCOL *col= tab->getColumn(fields[i]->field_index);
9567 9568 9569 9570
      DBUG_PRINT("info",("setting dist key on %s", col->getName()));
      col->setPartitionKey(TRUE);
    }
  }
9571
  else 
9572
  {
9573 9574 9575 9576 9577 9578 9579 9580
    if (!current_thd->variables.new_mode)
    {
      push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          ER(ER_ILLEGAL_HA_CREATE_OPTION),
                          ndbcluster_hton_name,
                          "LIST, RANGE and HASH partition disabled by default,"
                          " use --new option to enable");
9581
      DBUG_RETURN(HA_ERR_UNSUPPORTED);
9582 9583
    }
   /*
9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599
      Create a shadow field for those tables that have user defined
      partitioning. This field stores the value of the partition
      function such that NDB can handle reorganisations of the data
      even when the MySQL Server isn't available to assist with
      calculation of the partition function value.
    */
    NDBCOL col;
    DBUG_PRINT("info", ("Generating partition func value field"));
    col.setName("$PART_FUNC_VALUE");
    col.setType(NdbDictionary::Column::Int);
    col.setLength(1);
    col.setNullable(FALSE);
    col.setPrimaryKey(FALSE);
    col.setAutoIncrement(FALSE);
    tab->addColumn(col);
    if (part_info->part_type == RANGE_PARTITION)
9600
    {
9601 9602 9603 9604
      if ((error= set_range_data((void*)tab, part_info)))
      {
        DBUG_RETURN(error);
      }
9605
    }
9606
    else if (part_info->part_type == LIST_PARTITION)
9607
    {
9608 9609 9610 9611
      if ((error= set_list_data((void*)tab, part_info)))
      {
        DBUG_RETURN(error);
      }
9612 9613 9614
    }
  }
  tab->setFragmentType(ftype);
9615 9616 9617
  i= 0;
  tot_ts_name_len= 0;
  do
9618
  {
9619 9620
    uint ng;
    part_elem= part_it++;
9621
    if (!part_info->is_sub_partitioned())
9622
    {
9623 9624 9625 9626 9627
      ng= part_elem->nodegroup_id;
      if (first && ng == UNDEF_NODEGROUP)
        ng= 0;
      ts_names[fd_index]= part_elem->tablespace_name;
      frag_data[fd_index++]= ng;
9628
    }
9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645
    else
    {
      List_iterator<partition_element> sub_it(part_elem->subpartitions);
      j= 0;
      do
      {
        part_elem= sub_it++;
        ng= part_elem->nodegroup_id;
        if (first && ng == UNDEF_NODEGROUP)
          ng= 0;
        ts_names[fd_index]= part_elem->tablespace_name;
        frag_data[fd_index++]= ng;
      } while (++j < part_info->no_subparts);
    }
    first= FALSE;
  } while (++i < part_info->no_parts);
  tab->setDefaultNoPartitionsFlag(part_info->use_default_no_partitions);
9646
  tab->setLinearFlag(part_info->linear_hash_ind);
9647
  {
9648 9649
    ha_rows max_rows= table_share->max_rows;
    ha_rows min_rows= table_share->min_rows;
9650 9651 9652 9653 9654
    if (max_rows < min_rows)
      max_rows= min_rows;
    if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
    {
      tab->setMaxRows(max_rows);
9655
      tab->setMinRows(min_rows);
9656 9657
    }
  }
9658 9659 9660 9661
  tab->setTablespaceNames(ts_names, fd_index*sizeof(char*));
  tab->setFragmentCount(fd_index);
  tab->setFragmentData(&frag_data, fd_index*2);
  DBUG_RETURN(0);
9662
}
9663

9664

9665
bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *create_info,
9666 9667
					       uint table_changes)
{
9668 9669 9670
  DBUG_ENTER("ha_ndbcluster::check_if_incompatible_data");
  uint i;
  const NDBTAB *tab= (const NDBTAB *) m_table;
marty@linux.site's avatar
marty@linux.site committed
9671

9672 9673 9674 9675 9676 9677
  if (current_thd->variables.ndb_use_copying_alter_table)
  {
    DBUG_PRINT("info", ("On-line alter table disabled"));
    DBUG_RETURN(COMPATIBLE_DATA_NO);
  }

jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9678 9679
  int pk= 0;
  int ai= 0;
9680 9681 9682 9683 9684 9685

  if (create_info->tablespace)
    create_info->storage_media = HA_SM_DISK;
  else
    create_info->storage_media = HA_SM_MEMORY;

9686 9687 9688
  for (i= 0; i < table->s->fields; i++) 
  {
    Field *field= table->field[i];
9689
    const NDBCOL *col= tab->getColumn(i);
9690 9691 9692 9693 9694 9695 9696
    if (col->getStorageType() == NDB_STORAGETYPE_MEMORY && create_info->storage_media != HA_SM_MEMORY ||
        col->getStorageType() == NDB_STORAGETYPE_DISK && create_info->storage_media != HA_SM_DISK)
    {
      DBUG_PRINT("info", ("Column storage media is changed"));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
    
9697
    if (field->flags & FIELD_IS_RENAMED)
9698 9699 9700 9701
    {
      DBUG_PRINT("info", ("Field has been renamed, copy table"));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
9702
    if ((field->flags & FIELD_IN_ADD_INDEX) &&
9703 9704 9705 9706 9707
        col->getStorageType() == NdbDictionary::Column::StorageTypeDisk)
    {
      DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9708 9709 9710 9711 9712
    
    if (field->flags & PRI_KEY_FLAG)
      pk=1;
    if (field->flags & FIELD_IN_ADD_INDEX)
      ai=1;
9713
  }
9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743

  char tablespace_name[FN_LEN]; 
  if (get_tablespace_name(current_thd, tablespace_name, FN_LEN))
  {
    if (create_info->tablespace) 
    {
      if (strcmp(create_info->tablespace, tablespace_name))
      {
        DBUG_PRINT("info", ("storage media is changed, old tablespace=%s, new tablespace=%s",
          tablespace_name, create_info->tablespace));
        DBUG_RETURN(COMPATIBLE_DATA_NO);
      }
    }
    else
    {
      DBUG_PRINT("info", ("storage media is changed, old is DISK and tablespace=%s, new is MEM",
        tablespace_name));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
  }
  else
  {
    if (create_info->storage_media != HA_SM_MEMORY)
    {
      DBUG_PRINT("info", ("storage media is changed, old is MEM, new is DISK and tablespace=%s",
        create_info->tablespace));
      DBUG_RETURN(COMPATIBLE_DATA_NO);
    }
  }

9744
  if (table_changes != IS_EQUAL_YES)
9745
    DBUG_RETURN(COMPATIBLE_DATA_NO);
9746
  
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764
  /**
   * Changing from/to primary key
   *
   * This is _not_ correct, but check_if_incompatible_data-interface
   *   doesnt give more info, so I guess that we can't do any
   *   online add index if not using primary key
   *
   *   This as mysql will handle a unique not null index as primary 
   *     even wo/ user specifiying it... :-(
   *   
   */
  if ((table_share->primary_key == MAX_KEY && pk) ||
      (table_share->primary_key != MAX_KEY && !pk) ||
      (table_share->primary_key == MAX_KEY && !pk && ai))
  {
    DBUG_RETURN(COMPATIBLE_DATA_NO);
  }
  
9765
  /* Check that auto_increment value was not changed */
9766 9767
  if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
      create_info->auto_increment_value != 0)
9768
    DBUG_RETURN(COMPATIBLE_DATA_NO);
9769 9770
  
  /* Check that row format didn't change */
9771
  if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
9772
      get_row_type() != create_info->row_type)
9773
    DBUG_RETURN(COMPATIBLE_DATA_NO);
9774

9775
  DBUG_RETURN(COMPATIBLE_DATA_YES);
9776 9777
}

9778
bool set_up_tablespace(st_alter_tablespace *alter_info,
9779 9780
                       NdbDictionary::Tablespace *ndb_ts)
{
9781 9782 9783 9784
  ndb_ts->setName(alter_info->tablespace_name);
  ndb_ts->setExtentSize(alter_info->extent_size);
  ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
  return FALSE;
9785 9786
}

9787
bool set_up_datafile(st_alter_tablespace *alter_info,
9788 9789
                     NdbDictionary::Datafile *ndb_df)
{
9790
  if (alter_info->max_size > 0)
9791 9792
  {
    my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
9793
    return TRUE;
9794
  }
9795 9796 9797 9798
  ndb_df->setPath(alter_info->data_file_name);
  ndb_df->setSize(alter_info->initial_size);
  ndb_df->setTablespace(alter_info->tablespace_name);
  return FALSE;
9799 9800
}

9801
bool set_up_logfile_group(st_alter_tablespace *alter_info,
9802 9803
                          NdbDictionary::LogfileGroup *ndb_lg)
{
9804 9805 9806
  ndb_lg->setName(alter_info->logfile_group_name);
  ndb_lg->setUndoBufferSize(alter_info->undo_buffer_size);
  return FALSE;
9807 9808
}

9809
bool set_up_undofile(st_alter_tablespace *alter_info,
9810 9811
                     NdbDictionary::Undofile *ndb_uf)
{
9812 9813 9814 9815
  ndb_uf->setPath(alter_info->undo_file_name);
  ndb_uf->setSize(alter_info->initial_size);
  ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
  return FALSE;
9816 9817
}

9818 9819
int ndbcluster_alter_tablespace(handlerton *hton,
                                THD* thd, st_alter_tablespace *alter_info)
9820
{
9821 9822 9823 9824 9825 9826
  int is_tablespace= 0;
  NdbError err;
  NDBDICT *dict;
  int error;
  const char *errmsg;
  Ndb *ndb;
9827
  DBUG_ENTER("ha_ndbcluster::alter_tablespace");
9828
  LINT_INIT(errmsg);
9829

9830
  ndb= check_ndb_in_thd(thd);
9831
  if (ndb == NULL)
9832
  {
9833
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
9834
  }
9835
  dict= ndb->getDictionary();
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9836

9837
  switch (alter_info->ts_cmd_type){
9838 9839
  case (CREATE_TABLESPACE):
  {
9840
    error= ER_CREATE_FILEGROUP_FAILED;
9841
    
9842 9843
    NdbDictionary::Tablespace ndb_ts;
    NdbDictionary::Datafile ndb_df;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9844
    NdbDictionary::ObjectId objid;
9845
    if (set_up_tablespace(alter_info, &ndb_ts))
9846 9847 9848
    {
      DBUG_RETURN(1);
    }
9849
    if (set_up_datafile(alter_info, &ndb_df))
9850 9851 9852
    {
      DBUG_RETURN(1);
    }
9853
    errmsg= "TABLESPACE";
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9854
    if (dict->createTablespace(ndb_ts, &objid))
9855 9856
    {
      DBUG_PRINT("error", ("createTablespace returned %d", error));
9857
      goto ndberror;
9858
    }
9859
    DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
9860 9861
    errmsg= "DATAFILE";
    if (dict->createDatafile(ndb_df))
9862
    {
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9863 9864 9865 9866 9867 9868 9869 9870 9871
      err= dict->getNdbError();
      NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropTablespace(tmp);
      }
      
9872
      DBUG_PRINT("error", ("createDatafile returned %d", error));
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9873
      goto ndberror2;
9874
    }
9875
    is_tablespace= 1;
9876 9877 9878 9879
    break;
  }
  case (ALTER_TABLESPACE):
  {
9880
    error= ER_ALTER_FILEGROUP_FAILED;
9881
    if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
9882 9883
    {
      NdbDictionary::Datafile ndb_df;
9884
      if (set_up_datafile(alter_info, &ndb_df))
9885 9886 9887
      {
	DBUG_RETURN(1);
      }
9888 9889
      errmsg= " CREATE DATAFILE";
      if (dict->createDatafile(ndb_df))
9890
      {
9891
	goto ndberror;
9892 9893
      }
    }
9894
    else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
9895
    {
9896 9897
      NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
      NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9898 9899 9900
      NdbDictionary::ObjectId objid;
      df.getTablespaceId(&objid);
      if (ts.getObjectId() == objid.getObjectId() && 
9901
	  strcmp(df.getPath(), alter_info->data_file_name) == 0)
9902
      {
9903 9904
	errmsg= " DROP DATAFILE";
	if (dict->dropDatafile(df))
9905
	{
9906
	  goto ndberror;
9907 9908 9909 9910 9911
	}
      }
      else
      {
	DBUG_PRINT("error", ("No such datafile"));
9912
	my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
9913 9914 9915 9916 9917 9918
	DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("error", ("Unsupported alter tablespace: %d", 
9919
			   alter_info->ts_alter_tablespace_type));
9920 9921
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
9922
    is_tablespace= 1;
9923 9924 9925 9926
    break;
  }
  case (CREATE_LOGFILE_GROUP):
  {
9927
    error= ER_CREATE_FILEGROUP_FAILED;
9928 9929
    NdbDictionary::LogfileGroup ndb_lg;
    NdbDictionary::Undofile ndb_uf;
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9930
    NdbDictionary::ObjectId objid;
9931
    if (alter_info->undo_file_name == NULL)
9932 9933 9934 9935 9936 9937
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
9938
    if (set_up_logfile_group(alter_info, &ndb_lg))
9939 9940 9941
    {
      DBUG_RETURN(1);
    }
9942
    errmsg= "LOGFILE GROUP";
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9943
    if (dict->createLogfileGroup(ndb_lg, &objid))
9944
    {
9945
      goto ndberror;
9946
    }
9947 9948
    DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
    if (set_up_undofile(alter_info, &ndb_uf))
9949 9950 9951
    {
      DBUG_RETURN(1);
    }
9952 9953
    errmsg= "UNDOFILE";
    if (dict->createUndofile(ndb_uf))
9954
    {
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
9955 9956 9957 9958 9959 9960 9961 9962 9963
      err= dict->getNdbError();
      NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropLogfileGroup(tmp);
      }
      goto ndberror2;
9964 9965 9966 9967 9968
    }
    break;
  }
  case (ALTER_LOGFILE_GROUP):
  {
9969
    error= ER_ALTER_FILEGROUP_FAILED;
9970
    if (alter_info->undo_file_name == NULL)
9971 9972 9973 9974 9975 9976 9977
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    NdbDictionary::Undofile ndb_uf;
9978
    if (set_up_undofile(alter_info, &ndb_uf))
9979 9980 9981
    {
      DBUG_RETURN(1);
    }
9982 9983
    errmsg= "CREATE UNDOFILE";
    if (dict->createUndofile(ndb_uf))
9984
    {
9985
      goto ndberror;
9986 9987 9988 9989 9990
    }
    break;
  }
  case (DROP_TABLESPACE):
  {
9991
    error= ER_DROP_FILEGROUP_FAILED;
9992
    errmsg= "TABLESPACE";
9993
    if (dict->dropTablespace(dict->getTablespace(alter_info->tablespace_name)))
9994
    {
9995
      goto ndberror;
9996
    }
9997
    is_tablespace= 1;
9998 9999 10000 10001
    break;
  }
  case (DROP_LOGFILE_GROUP):
  {
10002
    error= ER_DROP_FILEGROUP_FAILED;
10003
    errmsg= "LOGFILE GROUP";
10004
    if (dict->dropLogfileGroup(dict->getLogfileGroup(alter_info->logfile_group_name)))
10005
    {
10006
      goto ndberror;
10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022
    }
    break;
  }
  case (CHANGE_FILE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  case (ALTER_ACCESS_MODE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  default:
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  }
10023
#ifdef HAVE_NDB_BINLOG
10024 10025 10026
  if (is_tablespace)
    ndbcluster_log_schema_op(thd, 0,
                             thd->query, thd->query_length,
10027
                             "", alter_info->tablespace_name,
10028
                             0, 0,
10029
                             SOT_TABLESPACE, 0, 0, 0);
10030 10031 10032
  else
    ndbcluster_log_schema_op(thd, 0,
                             thd->query, thd->query_length,
10033
                             "", alter_info->logfile_group_name,
10034
                             0, 0,
10035
                             SOT_LOGFILE_GROUP, 0, 0, 0);
10036
#endif
10037
  DBUG_RETURN(FALSE);
10038 10039

ndberror:
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10040 10041
  err= dict->getNdbError();
ndberror2:
10042
  set_ndb_err(thd, err);
10043 10044 10045 10046
  ndb_to_mysql_error(&err);
  
  my_error(error, MYF(0), errmsg);
  DBUG_RETURN(1);
10047 10048
}

10049 10050 10051 10052 10053 10054 10055

bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts)
{
  Ndb *ndb;
  NDBDICT *dict;
  int err;
  DBUG_ENTER("ha_ndbcluster::get_no_parts");
10056
  LINT_INIT(err);
10057 10058 10059

  set_dbname(name);
  set_tabname(name);
10060
  for (;;)
10061 10062 10063 10064 10065 10066 10067
  {
    if (check_ndb_connection())
    {
      err= HA_ERR_NO_CONNECTION;
      break;
    }
    ndb= get_ndb();
10068
    ndb->setDatabaseName(m_dbname);
10069 10070
    Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
    if (!ndbtab_g.get_table())
10071
      ERR_BREAK(dict->getNdbError(), err);
10072
    *no_parts= ndbtab_g.get_table()->getFragmentCount();
10073
    DBUG_RETURN(FALSE);
10074
  }
10075 10076 10077 10078 10079

  print_error(err, MYF(0));
  DBUG_RETURN(TRUE);
}

10080 10081 10082
static int ndbcluster_fill_files_table(handlerton *hton, 
                                       THD *thd, 
                                       TABLE_LIST *tables,
10083
                                       COND *cond)
10084 10085
{
  TABLE* table= tables->table;
10086
  Ndb *ndb= check_ndb_in_thd(thd);
10087 10088
  NdbDictionary::Dictionary* dict= ndb->getDictionary();
  NdbDictionary::Dictionary::List dflist;
10089
  NdbError ndberr;
10090
  uint i;
10091
  DBUG_ENTER("ndbcluster_fill_files_table");
10092

10093 10094
  dict->listObjects(dflist, NdbDictionary::Object::Datafile);
  ndberr= dict->getNdbError();
10095 10096
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);
10097

10098
  for (i= 0; i < dflist.count; i++)
10099 10100 10101
  {
    NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
    Ndb_cluster_connection_node_iter iter;
10102 10103
    uint id;
    
10104 10105
    g_ndb_cluster_connection->init_get_next_node(iter);

10106
    while ((id= g_ndb_cluster_connection->get_next_node(iter)))
10107
    {
10108
      init_fill_schema_files_row(table);
10109
      NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
10110
      ndberr= dict->getNdbError();
10111 10112 10113 10114 10115 10116
      if(ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10117 10118
      NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
      ndberr= dict->getNdbError();
10119 10120 10121 10122 10123 10124
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10125

10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162
      table->field[IS_FILES_FILE_NAME]->set_notnull();
      table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
                                              system_charset_info);
      table->field[IS_FILES_FILE_TYPE]->set_notnull();
      table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
                                              system_charset_info);
      table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
      table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
                                                    strlen(df.getTablespace()),
                                                    system_charset_info);
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->
        store(ts.getDefaultLogfileGroup(),
              strlen(ts.getDefaultLogfileGroup()),
              system_charset_info);
      table->field[IS_FILES_ENGINE]->set_notnull();
      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                           ndbcluster_hton_name_length,
                                           system_charset_info);

      table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
      table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
                                                 / ts.getExtentSize());
      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
      table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
                                                  / ts.getExtentSize());
      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
      table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize());
      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
      table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize());
      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
      table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize());
      table->field[IS_FILES_VERSION]->set_notnull();
      table->field[IS_FILES_VERSION]->store(df.getObjectVersion());

      table->field[IS_FILES_ROW_FORMAT]->set_notnull();
      table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);
10163 10164

      char extra[30];
10165
      int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
10166 10167
      table->field[IS_FILES_EXTRA]->set_notnull();
      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
10168 10169 10170 10171
      schema_table_store_record(thd, table);
    }
  }

jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10172 10173
  NdbDictionary::Dictionary::List uflist;
  dict->listObjects(uflist, NdbDictionary::Object::Undofile);
10174
  ndberr= dict->getNdbError();
10175 10176
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);
10177

jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10178
  for (i= 0; i < uflist.count; i++)
10179
  {
jonas@perch.ndb.mysql.com's avatar
ndb -  
jonas@perch.ndb.mysql.com committed
10180
    NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
10181 10182 10183 10184 10185
    Ndb_cluster_connection_node_iter iter;
    unsigned id;

    g_ndb_cluster_connection->init_get_next_node(iter);

10186
    while ((id= g_ndb_cluster_connection->get_next_node(iter)))
10187 10188
    {
      NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
10189
      ndberr= dict->getNdbError();
10190 10191 10192 10193 10194 10195
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10196 10197 10198
      NdbDictionary::LogfileGroup lfg=
        dict->getLogfileGroup(uf.getLogfileGroup());
      ndberr= dict->getNdbError();
10199 10200 10201 10202 10203 10204
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }
10205

10206 10207 10208 10209 10210 10211 10212
      init_fill_schema_files_row(table);
      table->field[IS_FILES_FILE_NAME]->set_notnull();
      table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
                                              system_charset_info);
      table->field[IS_FILES_FILE_TYPE]->set_notnull();
      table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
                                              system_charset_info);
jonas@perch.ndb.mysql.com's avatar
jonas@perch.ndb.mysql.com committed
10213 10214
      NdbDictionary::ObjectId objid;
      uf.getLogfileGroupId(&objid);
10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
                                                  strlen(uf.getLogfileGroup()),
                                                       system_charset_info);
      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId());
      table->field[IS_FILES_ENGINE]->set_notnull();
      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                           ndbcluster_hton_name_length,
                                           system_charset_info);

      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
      table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4);
      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
      table->field[IS_FILES_EXTENT_SIZE]->store(4);

      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
      table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize());
      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
      table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize());

      table->field[IS_FILES_VERSION]->set_notnull();
      table->field[IS_FILES_VERSION]->store(uf.getObjectVersion());
10238

10239
      char extra[100];
10240 10241
      int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
                           id, (ulong) lfg.getUndoBufferSize());
10242 10243
      table->field[IS_FILES_EXTRA]->set_notnull();
      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
10244 10245 10246
      schema_table_store_record(thd, table);
    }
  }
10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267

  // now for LFGs
  NdbDictionary::Dictionary::List lfglist;
  dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < lfglist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];

    NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
    ndberr= dict->getNdbError();
    if (ndberr.classification != NdbError::NoError)
    {
      if (ndberr.classification == NdbError::SchemaError)
        continue;
      ERR_RETURN(ndberr);
    }

10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290
    init_fill_schema_files_row(table);
    table->field[IS_FILES_FILE_TYPE]->set_notnull();
    table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
                                            system_charset_info);

    table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
    table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
                                                     strlen(elt.name),
                                                     system_charset_info);
    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId());
    table->field[IS_FILES_ENGINE]->set_notnull();
    table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                         ndbcluster_hton_name_length,
                                         system_charset_info);

    table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
    table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords());
    table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
    table->field[IS_FILES_EXTENT_SIZE]->store(4);

    table->field[IS_FILES_VERSION]->set_notnull();
    table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion());
10291 10292

    char extra[100];
10293 10294
    int len= my_snprintf(extra,sizeof(extra),
                         "UNDO_BUFFER_SIZE=%lu",
10295
                         (ulong) lfg.getUndoBufferSize());
10296 10297
    table->field[IS_FILES_EXTRA]->set_notnull();
    table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
10298 10299
    schema_table_store_record(thd, table);
  }
10300
  DBUG_RETURN(0);
10301
}
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10302

brian@zim.(none)'s avatar
brian@zim.(none) committed
10303 10304 10305 10306 10307
SHOW_VAR ndb_status_variables_export[]= {
  {"Ndb",                      (char*) &ndb_status_variables,   SHOW_ARRAY},
  {NullS, NullS, SHOW_LONG}
};

10308
struct st_mysql_storage_engine ndbcluster_storage_engine=
10309
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10310 10311 10312 10313

mysql_declare_plugin(ndbcluster)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
10314
  &ndbcluster_storage_engine,
10315
  ndbcluster_hton_name,
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10316
  "MySQL AB",
10317
  "Clustered, fault-tolerant tables",
10318
  PLUGIN_LICENSE_GPL,
10319
  ndbcluster_init, /* Plugin Init */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10320 10321
  NULL, /* Plugin Deinit */
  0x0100 /* 1.0 */,
10322 10323 10324
  ndb_status_variables_export,/* status variables                */
  NULL,                       /* system variables                */
  NULL                        /* config options                  */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
10325 10326 10327 10328
}
mysql_declare_plugin_end;

#endif